1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //===----------------------------------------------------------------------===//
13 #include "SelectionDAGBuilder.h"
14 #include "SDNodeDbgValue.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/Analysis/BranchProbabilityInfo.h"
28 #include "llvm/Analysis/ConstantFolding.h"
29 #include "llvm/Analysis/EHPersonalities.h"
30 #include "llvm/Analysis/MemoryLocation.h"
31 #include "llvm/Analysis/TargetLibraryInfo.h"
32 #include "llvm/Analysis/ValueTracking.h"
33 #include "llvm/CodeGen/Analysis.h"
34 #include "llvm/CodeGen/CodeGenCommonISel.h"
35 #include "llvm/CodeGen/FunctionLoweringInfo.h"
36 #include "llvm/CodeGen/GCMetadata.h"
37 #include "llvm/CodeGen/MachineBasicBlock.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineInstrBuilder.h"
41 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
42 #include "llvm/CodeGen/MachineMemOperand.h"
43 #include "llvm/CodeGen/MachineModuleInfo.h"
44 #include "llvm/CodeGen/MachineOperand.h"
45 #include "llvm/CodeGen/MachineRegisterInfo.h"
46 #include "llvm/CodeGen/RuntimeLibcalls.h"
47 #include "llvm/CodeGen/SelectionDAG.h"
48 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
49 #include "llvm/CodeGen/StackMaps.h"
50 #include "llvm/CodeGen/SwiftErrorValueTracking.h"
51 #include "llvm/CodeGen/TargetFrameLowering.h"
52 #include "llvm/CodeGen/TargetInstrInfo.h"
53 #include "llvm/CodeGen/TargetOpcodes.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/CodeGen/TargetSubtargetInfo.h"
56 #include "llvm/CodeGen/WinEHFuncInfo.h"
57 #include "llvm/IR/Argument.h"
58 #include "llvm/IR/Attributes.h"
59 #include "llvm/IR/BasicBlock.h"
60 #include "llvm/IR/CFG.h"
61 #include "llvm/IR/CallingConv.h"
62 #include "llvm/IR/Constant.h"
63 #include "llvm/IR/ConstantRange.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugInfoMetadata.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/DiagnosticInfo.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/GetElementPtrTypeIterator.h"
71 #include "llvm/IR/InlineAsm.h"
72 #include "llvm/IR/InstrTypes.h"
73 #include "llvm/IR/Instructions.h"
74 #include "llvm/IR/IntrinsicInst.h"
75 #include "llvm/IR/Intrinsics.h"
76 #include "llvm/IR/IntrinsicsAArch64.h"
77 #include "llvm/IR/IntrinsicsWebAssembly.h"
78 #include "llvm/IR/LLVMContext.h"
79 #include "llvm/IR/Metadata.h"
80 #include "llvm/IR/Module.h"
81 #include "llvm/IR/Operator.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Statepoint.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/MC/MCContext.h"
88 #include "llvm/Support/AtomicOrdering.h"
89 #include "llvm/Support/Casting.h"
90 #include "llvm/Support/CommandLine.h"
91 #include "llvm/Support/Compiler.h"
92 #include "llvm/Support/Debug.h"
93 #include "llvm/Support/MathExtras.h"
94 #include "llvm/Support/raw_ostream.h"
95 #include "llvm/Target/TargetIntrinsicInfo.h"
96 #include "llvm/Target/TargetMachine.h"
97 #include "llvm/Target/TargetOptions.h"
98 #include "llvm/Transforms/Utils/Local.h"
104 using namespace llvm
;
105 using namespace PatternMatch
;
106 using namespace SwitchCG
;
108 #define DEBUG_TYPE "isel"
110 /// LimitFloatPrecision - Generate low-precision inline sequences for
111 /// some float libcalls (6, 8 or 12 bits).
112 static unsigned LimitFloatPrecision
;
115 InsertAssertAlign("insert-assert-align", cl::init(true),
116 cl::desc("Insert the experimental `assertalign` node."),
119 static cl::opt
<unsigned, true>
120 LimitFPPrecision("limit-float-precision",
121 cl::desc("Generate low-precision inline sequences "
122 "for some float libcalls"),
123 cl::location(LimitFloatPrecision
), cl::Hidden
,
126 static cl::opt
<unsigned> SwitchPeelThreshold(
127 "switch-peel-threshold", cl::Hidden
, cl::init(66),
128 cl::desc("Set the case probability threshold for peeling the case from a "
129 "switch statement. A value greater than 100 will void this "
132 // Limit the width of DAG chains. This is important in general to prevent
133 // DAG-based analysis from blowing up. For example, alias analysis and
134 // load clustering may not complete in reasonable time. It is difficult to
135 // recognize and avoid this situation within each individual analysis, and
136 // future analyses are likely to have the same behavior. Limiting DAG width is
137 // the safe approach and will be especially important with global DAGs.
139 // MaxParallelChains default is arbitrarily high to avoid affecting
140 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
141 // sequence over this should have been converted to llvm.memcpy by the
142 // frontend. It is easy to induce this behavior with .ll code such as:
143 // %buffer = alloca [4096 x i8]
144 // %data = load [4096 x i8]* %argPtr
145 // store [4096 x i8] %data, [4096 x i8]* %buffer
146 static const unsigned MaxParallelChains
= 64;
148 static SDValue
getCopyFromPartsVector(SelectionDAG
&DAG
, const SDLoc
&DL
,
149 const SDValue
*Parts
, unsigned NumParts
,
150 MVT PartVT
, EVT ValueVT
, const Value
*V
,
151 Optional
<CallingConv::ID
> CC
);
153 /// getCopyFromParts - Create a value that contains the specified legal parts
154 /// combined into the value they represent. If the parts combine to a type
155 /// larger than ValueVT then AssertOp can be used to specify whether the extra
156 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
157 /// (ISD::AssertSext).
158 static SDValue
getCopyFromParts(SelectionDAG
&DAG
, const SDLoc
&DL
,
159 const SDValue
*Parts
, unsigned NumParts
,
160 MVT PartVT
, EVT ValueVT
, const Value
*V
,
161 Optional
<CallingConv::ID
> CC
= None
,
162 Optional
<ISD::NodeType
> AssertOp
= None
) {
163 // Let the target assemble the parts if it wants to
164 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
165 if (SDValue Val
= TLI
.joinRegisterPartsIntoValue(DAG
, DL
, Parts
, NumParts
,
166 PartVT
, ValueVT
, CC
))
169 if (ValueVT
.isVector())
170 return getCopyFromPartsVector(DAG
, DL
, Parts
, NumParts
, PartVT
, ValueVT
, V
,
173 assert(NumParts
> 0 && "No parts to assemble!");
174 SDValue Val
= Parts
[0];
177 // Assemble the value from multiple parts.
178 if (ValueVT
.isInteger()) {
179 unsigned PartBits
= PartVT
.getSizeInBits();
180 unsigned ValueBits
= ValueVT
.getSizeInBits();
182 // Assemble the power of 2 part.
183 unsigned RoundParts
=
184 (NumParts
& (NumParts
- 1)) ? 1 << Log2_32(NumParts
) : NumParts
;
185 unsigned RoundBits
= PartBits
* RoundParts
;
186 EVT RoundVT
= RoundBits
== ValueBits
?
187 ValueVT
: EVT::getIntegerVT(*DAG
.getContext(), RoundBits
);
190 EVT HalfVT
= EVT::getIntegerVT(*DAG
.getContext(), RoundBits
/2);
192 if (RoundParts
> 2) {
193 Lo
= getCopyFromParts(DAG
, DL
, Parts
, RoundParts
/ 2,
195 Hi
= getCopyFromParts(DAG
, DL
, Parts
+ RoundParts
/ 2,
196 RoundParts
/ 2, PartVT
, HalfVT
, V
);
198 Lo
= DAG
.getNode(ISD::BITCAST
, DL
, HalfVT
, Parts
[0]);
199 Hi
= DAG
.getNode(ISD::BITCAST
, DL
, HalfVT
, Parts
[1]);
202 if (DAG
.getDataLayout().isBigEndian())
205 Val
= DAG
.getNode(ISD::BUILD_PAIR
, DL
, RoundVT
, Lo
, Hi
);
207 if (RoundParts
< NumParts
) {
208 // Assemble the trailing non-power-of-2 part.
209 unsigned OddParts
= NumParts
- RoundParts
;
210 EVT OddVT
= EVT::getIntegerVT(*DAG
.getContext(), OddParts
* PartBits
);
211 Hi
= getCopyFromParts(DAG
, DL
, Parts
+ RoundParts
, OddParts
, PartVT
,
214 // Combine the round and odd parts.
216 if (DAG
.getDataLayout().isBigEndian())
218 EVT TotalVT
= EVT::getIntegerVT(*DAG
.getContext(), NumParts
* PartBits
);
219 Hi
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, TotalVT
, Hi
);
220 Hi
= DAG
.getNode(ISD::SHL
, DL
, TotalVT
, Hi
,
221 DAG
.getConstant(Lo
.getValueSizeInBits(), DL
,
222 TLI
.getShiftAmountTy(
223 TotalVT
, DAG
.getDataLayout())));
224 Lo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, TotalVT
, Lo
);
225 Val
= DAG
.getNode(ISD::OR
, DL
, TotalVT
, Lo
, Hi
);
227 } else if (PartVT
.isFloatingPoint()) {
228 // FP split into multiple FP parts (for ppcf128)
229 assert(ValueVT
== EVT(MVT::ppcf128
) && PartVT
== MVT::f64
&&
232 Lo
= DAG
.getNode(ISD::BITCAST
, DL
, EVT(MVT::f64
), Parts
[0]);
233 Hi
= DAG
.getNode(ISD::BITCAST
, DL
, EVT(MVT::f64
), Parts
[1]);
234 if (TLI
.hasBigEndianPartOrdering(ValueVT
, DAG
.getDataLayout()))
236 Val
= DAG
.getNode(ISD::BUILD_PAIR
, DL
, ValueVT
, Lo
, Hi
);
238 // FP split into integer parts (soft fp)
239 assert(ValueVT
.isFloatingPoint() && PartVT
.isInteger() &&
240 !PartVT
.isVector() && "Unexpected split");
241 EVT IntVT
= EVT::getIntegerVT(*DAG
.getContext(), ValueVT
.getSizeInBits());
242 Val
= getCopyFromParts(DAG
, DL
, Parts
, NumParts
, PartVT
, IntVT
, V
, CC
);
246 // There is now one part, held in Val. Correct it to match ValueVT.
247 // PartEVT is the type of the register class that holds the value.
248 // ValueVT is the type of the inline asm operation.
249 EVT PartEVT
= Val
.getValueType();
251 if (PartEVT
== ValueVT
)
254 if (PartEVT
.isInteger() && ValueVT
.isFloatingPoint() &&
255 ValueVT
.bitsLT(PartEVT
)) {
256 // For an FP value in an integer part, we need to truncate to the right
258 PartEVT
= EVT::getIntegerVT(*DAG
.getContext(), ValueVT
.getSizeInBits());
259 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, PartEVT
, Val
);
262 // Handle types that have the same size.
263 if (PartEVT
.getSizeInBits() == ValueVT
.getSizeInBits())
264 return DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
266 // Handle types with different sizes.
267 if (PartEVT
.isInteger() && ValueVT
.isInteger()) {
268 if (ValueVT
.bitsLT(PartEVT
)) {
269 // For a truncate, see if we have any information to
270 // indicate whether the truncated bits will always be
271 // zero or sign-extension.
273 Val
= DAG
.getNode(*AssertOp
, DL
, PartEVT
, Val
,
274 DAG
.getValueType(ValueVT
));
275 return DAG
.getNode(ISD::TRUNCATE
, DL
, ValueVT
, Val
);
277 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, ValueVT
, Val
);
280 if (PartEVT
.isFloatingPoint() && ValueVT
.isFloatingPoint()) {
281 // FP_ROUND's are always exact here.
282 if (ValueVT
.bitsLT(Val
.getValueType()))
284 ISD::FP_ROUND
, DL
, ValueVT
, Val
,
285 DAG
.getTargetConstant(1, DL
, TLI
.getPointerTy(DAG
.getDataLayout())));
287 return DAG
.getNode(ISD::FP_EXTEND
, DL
, ValueVT
, Val
);
290 // Handle MMX to a narrower integer type by bitcasting MMX to integer and
292 if (PartEVT
== MVT::x86mmx
&& ValueVT
.isInteger() &&
293 ValueVT
.bitsLT(PartEVT
)) {
294 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i64
, Val
);
295 return DAG
.getNode(ISD::TRUNCATE
, DL
, ValueVT
, Val
);
298 report_fatal_error("Unknown mismatch in getCopyFromParts!");
301 static void diagnosePossiblyInvalidConstraint(LLVMContext
&Ctx
, const Value
*V
,
302 const Twine
&ErrMsg
) {
303 const Instruction
*I
= dyn_cast_or_null
<Instruction
>(V
);
305 return Ctx
.emitError(ErrMsg
);
307 const char *AsmError
= ", possible invalid constraint for vector type";
308 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
))
309 if (CI
->isInlineAsm())
310 return Ctx
.emitError(I
, ErrMsg
+ AsmError
);
312 return Ctx
.emitError(I
, ErrMsg
);
315 /// getCopyFromPartsVector - Create a value that contains the specified legal
316 /// parts combined into the value they represent. If the parts combine to a
317 /// type larger than ValueVT then AssertOp can be used to specify whether the
318 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
319 /// ValueVT (ISD::AssertSext).
320 static SDValue
getCopyFromPartsVector(SelectionDAG
&DAG
, const SDLoc
&DL
,
321 const SDValue
*Parts
, unsigned NumParts
,
322 MVT PartVT
, EVT ValueVT
, const Value
*V
,
323 Optional
<CallingConv::ID
> CallConv
) {
324 assert(ValueVT
.isVector() && "Not a vector value");
325 assert(NumParts
> 0 && "No parts to assemble!");
326 const bool IsABIRegCopy
= CallConv
.has_value();
328 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
329 SDValue Val
= Parts
[0];
331 // Handle a multi-element vector.
335 unsigned NumIntermediates
;
339 NumRegs
= TLI
.getVectorTypeBreakdownForCallingConv(
340 *DAG
.getContext(), *CallConv
, ValueVT
, IntermediateVT
,
341 NumIntermediates
, RegisterVT
);
344 TLI
.getVectorTypeBreakdown(*DAG
.getContext(), ValueVT
, IntermediateVT
,
345 NumIntermediates
, RegisterVT
);
348 assert(NumRegs
== NumParts
&& "Part count doesn't match vector breakdown!");
349 NumParts
= NumRegs
; // Silence a compiler warning.
350 assert(RegisterVT
== PartVT
&& "Part type doesn't match vector breakdown!");
351 assert(RegisterVT
.getSizeInBits() ==
352 Parts
[0].getSimpleValueType().getSizeInBits() &&
353 "Part type sizes don't match!");
355 // Assemble the parts into intermediate operands.
356 SmallVector
<SDValue
, 8> Ops(NumIntermediates
);
357 if (NumIntermediates
== NumParts
) {
358 // If the register was not expanded, truncate or copy the value,
360 for (unsigned i
= 0; i
!= NumParts
; ++i
)
361 Ops
[i
] = getCopyFromParts(DAG
, DL
, &Parts
[i
], 1,
362 PartVT
, IntermediateVT
, V
, CallConv
);
363 } else if (NumParts
> 0) {
364 // If the intermediate type was expanded, build the intermediate
365 // operands from the parts.
366 assert(NumParts
% NumIntermediates
== 0 &&
367 "Must expand into a divisible number of parts!");
368 unsigned Factor
= NumParts
/ NumIntermediates
;
369 for (unsigned i
= 0; i
!= NumIntermediates
; ++i
)
370 Ops
[i
] = getCopyFromParts(DAG
, DL
, &Parts
[i
* Factor
], Factor
,
371 PartVT
, IntermediateVT
, V
, CallConv
);
374 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
375 // intermediate operands.
377 IntermediateVT
.isVector()
379 *DAG
.getContext(), IntermediateVT
.getScalarType(),
380 IntermediateVT
.getVectorElementCount() * NumParts
)
381 : EVT::getVectorVT(*DAG
.getContext(),
382 IntermediateVT
.getScalarType(),
384 Val
= DAG
.getNode(IntermediateVT
.isVector() ? ISD::CONCAT_VECTORS
386 DL
, BuiltVectorTy
, Ops
);
389 // There is now one part, held in Val. Correct it to match ValueVT.
390 EVT PartEVT
= Val
.getValueType();
392 if (PartEVT
== ValueVT
)
395 if (PartEVT
.isVector()) {
396 // Vector/Vector bitcast.
397 if (ValueVT
.getSizeInBits() == PartEVT
.getSizeInBits())
398 return DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
400 // If the element type of the source/dest vectors are the same, but the
401 // parts vector has more elements than the value vector, then we have a
402 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
404 if (PartEVT
.getVectorElementCount() != ValueVT
.getVectorElementCount()) {
405 assert((PartEVT
.getVectorElementCount().getKnownMinValue() >
406 ValueVT
.getVectorElementCount().getKnownMinValue()) &&
407 (PartEVT
.getVectorElementCount().isScalable() ==
408 ValueVT
.getVectorElementCount().isScalable()) &&
409 "Cannot narrow, it would be a lossy transformation");
411 EVT::getVectorVT(*DAG
.getContext(), PartEVT
.getVectorElementType(),
412 ValueVT
.getVectorElementCount());
413 Val
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, PartEVT
, Val
,
414 DAG
.getVectorIdxConstant(0, DL
));
415 if (PartEVT
== ValueVT
)
419 // Promoted vector extract
420 return DAG
.getAnyExtOrTrunc(Val
, DL
, ValueVT
);
423 // Trivial bitcast if the types are the same size and the destination
424 // vector type is legal.
425 if (PartEVT
.getSizeInBits() == ValueVT
.getSizeInBits() &&
426 TLI
.isTypeLegal(ValueVT
))
427 return DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
429 if (ValueVT
.getVectorNumElements() != 1) {
430 // Certain ABIs require that vectors are passed as integers. For vectors
431 // are the same size, this is an obvious bitcast.
432 if (ValueVT
.getSizeInBits() == PartEVT
.getSizeInBits()) {
433 return DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
434 } else if (ValueVT
.bitsLT(PartEVT
)) {
435 const uint64_t ValueSize
= ValueVT
.getFixedSizeInBits();
436 EVT IntermediateType
= EVT::getIntegerVT(*DAG
.getContext(), ValueSize
);
437 // Drop the extra bits.
438 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, IntermediateType
, Val
);
439 return DAG
.getBitcast(ValueVT
, Val
);
442 diagnosePossiblyInvalidConstraint(
443 *DAG
.getContext(), V
, "non-trivial scalar-to-vector conversion");
444 return DAG
.getUNDEF(ValueVT
);
447 // Handle cases such as i8 -> <1 x i1>
448 EVT ValueSVT
= ValueVT
.getVectorElementType();
449 if (ValueVT
.getVectorNumElements() == 1 && ValueSVT
!= PartEVT
) {
450 if (ValueSVT
.getSizeInBits() == PartEVT
.getSizeInBits())
451 Val
= DAG
.getNode(ISD::BITCAST
, DL
, ValueSVT
, Val
);
453 Val
= ValueVT
.isFloatingPoint()
454 ? DAG
.getFPExtendOrRound(Val
, DL
, ValueSVT
)
455 : DAG
.getAnyExtOrTrunc(Val
, DL
, ValueSVT
);
458 return DAG
.getBuildVector(ValueVT
, DL
, Val
);
461 static void getCopyToPartsVector(SelectionDAG
&DAG
, const SDLoc
&dl
,
462 SDValue Val
, SDValue
*Parts
, unsigned NumParts
,
463 MVT PartVT
, const Value
*V
,
464 Optional
<CallingConv::ID
> CallConv
);
466 /// getCopyToParts - Create a series of nodes that contain the specified value
467 /// split into legal parts. If the parts contain more bits than Val, then, for
468 /// integers, ExtendKind can be used to specify how to generate the extra bits.
469 static void getCopyToParts(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Val
,
470 SDValue
*Parts
, unsigned NumParts
, MVT PartVT
,
472 Optional
<CallingConv::ID
> CallConv
= None
,
473 ISD::NodeType ExtendKind
= ISD::ANY_EXTEND
) {
474 // Let the target split the parts if it wants to
475 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
476 if (TLI
.splitValueIntoRegisterParts(DAG
, DL
, Val
, Parts
, NumParts
, PartVT
,
479 EVT ValueVT
= Val
.getValueType();
481 // Handle the vector case separately.
482 if (ValueVT
.isVector())
483 return getCopyToPartsVector(DAG
, DL
, Val
, Parts
, NumParts
, PartVT
, V
,
486 unsigned PartBits
= PartVT
.getSizeInBits();
487 unsigned OrigNumParts
= NumParts
;
488 assert(DAG
.getTargetLoweringInfo().isTypeLegal(PartVT
) &&
489 "Copying to an illegal type!");
494 assert(!ValueVT
.isVector() && "Vector case handled elsewhere");
495 EVT PartEVT
= PartVT
;
496 if (PartEVT
== ValueVT
) {
497 assert(NumParts
== 1 && "No-op copy with multiple parts!");
502 if (NumParts
* PartBits
> ValueVT
.getSizeInBits()) {
503 // If the parts cover more bits than the value has, promote the value.
504 if (PartVT
.isFloatingPoint() && ValueVT
.isFloatingPoint()) {
505 assert(NumParts
== 1 && "Do not know what to promote to!");
506 Val
= DAG
.getNode(ISD::FP_EXTEND
, DL
, PartVT
, Val
);
508 if (ValueVT
.isFloatingPoint()) {
509 // FP values need to be bitcast, then extended if they are being put
510 // into a larger container.
511 ValueVT
= EVT::getIntegerVT(*DAG
.getContext(), ValueVT
.getSizeInBits());
512 Val
= DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
514 assert((PartVT
.isInteger() || PartVT
== MVT::x86mmx
) &&
515 ValueVT
.isInteger() &&
516 "Unknown mismatch!");
517 ValueVT
= EVT::getIntegerVT(*DAG
.getContext(), NumParts
* PartBits
);
518 Val
= DAG
.getNode(ExtendKind
, DL
, ValueVT
, Val
);
519 if (PartVT
== MVT::x86mmx
)
520 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
522 } else if (PartBits
== ValueVT
.getSizeInBits()) {
523 // Different types of the same size.
524 assert(NumParts
== 1 && PartEVT
!= ValueVT
);
525 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
526 } else if (NumParts
* PartBits
< ValueVT
.getSizeInBits()) {
527 // If the parts cover less bits than value has, truncate the value.
528 assert((PartVT
.isInteger() || PartVT
== MVT::x86mmx
) &&
529 ValueVT
.isInteger() &&
530 "Unknown mismatch!");
531 ValueVT
= EVT::getIntegerVT(*DAG
.getContext(), NumParts
* PartBits
);
532 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, ValueVT
, Val
);
533 if (PartVT
== MVT::x86mmx
)
534 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
537 // The value may have changed - recompute ValueVT.
538 ValueVT
= Val
.getValueType();
539 assert(NumParts
* PartBits
== ValueVT
.getSizeInBits() &&
540 "Failed to tile the value with PartVT!");
543 if (PartEVT
!= ValueVT
) {
544 diagnosePossiblyInvalidConstraint(*DAG
.getContext(), V
,
545 "scalar-to-vector conversion failed");
546 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
553 // Expand the value into multiple parts.
554 if (NumParts
& (NumParts
- 1)) {
555 // The number of parts is not a power of 2. Split off and copy the tail.
556 assert(PartVT
.isInteger() && ValueVT
.isInteger() &&
557 "Do not know what to expand to!");
558 unsigned RoundParts
= 1 << Log2_32(NumParts
);
559 unsigned RoundBits
= RoundParts
* PartBits
;
560 unsigned OddParts
= NumParts
- RoundParts
;
561 SDValue OddVal
= DAG
.getNode(ISD::SRL
, DL
, ValueVT
, Val
,
562 DAG
.getShiftAmountConstant(RoundBits
, ValueVT
, DL
));
564 getCopyToParts(DAG
, DL
, OddVal
, Parts
+ RoundParts
, OddParts
, PartVT
, V
,
567 if (DAG
.getDataLayout().isBigEndian())
568 // The odd parts were reversed by getCopyToParts - unreverse them.
569 std::reverse(Parts
+ RoundParts
, Parts
+ NumParts
);
571 NumParts
= RoundParts
;
572 ValueVT
= EVT::getIntegerVT(*DAG
.getContext(), NumParts
* PartBits
);
573 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, ValueVT
, Val
);
576 // The number of parts is a power of 2. Repeatedly bisect the value using
578 Parts
[0] = DAG
.getNode(ISD::BITCAST
, DL
,
579 EVT::getIntegerVT(*DAG
.getContext(),
580 ValueVT
.getSizeInBits()),
583 for (unsigned StepSize
= NumParts
; StepSize
> 1; StepSize
/= 2) {
584 for (unsigned i
= 0; i
< NumParts
; i
+= StepSize
) {
585 unsigned ThisBits
= StepSize
* PartBits
/ 2;
586 EVT ThisVT
= EVT::getIntegerVT(*DAG
.getContext(), ThisBits
);
587 SDValue
&Part0
= Parts
[i
];
588 SDValue
&Part1
= Parts
[i
+StepSize
/2];
590 Part1
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
,
591 ThisVT
, Part0
, DAG
.getIntPtrConstant(1, DL
));
592 Part0
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
,
593 ThisVT
, Part0
, DAG
.getIntPtrConstant(0, DL
));
595 if (ThisBits
== PartBits
&& ThisVT
!= PartVT
) {
596 Part0
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Part0
);
597 Part1
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Part1
);
602 if (DAG
.getDataLayout().isBigEndian())
603 std::reverse(Parts
, Parts
+ OrigNumParts
);
606 static SDValue
widenVectorToPartType(SelectionDAG
&DAG
, SDValue Val
,
607 const SDLoc
&DL
, EVT PartVT
) {
608 if (!PartVT
.isVector())
611 EVT ValueVT
= Val
.getValueType();
612 ElementCount PartNumElts
= PartVT
.getVectorElementCount();
613 ElementCount ValueNumElts
= ValueVT
.getVectorElementCount();
615 // We only support widening vectors with equivalent element types and
616 // fixed/scalable properties. If a target needs to widen a fixed-length type
617 // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
618 if (ElementCount::isKnownLE(PartNumElts
, ValueNumElts
) ||
619 PartNumElts
.isScalable() != ValueNumElts
.isScalable() ||
620 PartVT
.getVectorElementType() != ValueVT
.getVectorElementType())
623 // Widening a scalable vector to another scalable vector is done by inserting
624 // the vector into a larger undef one.
625 if (PartNumElts
.isScalable())
626 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, PartVT
, DAG
.getUNDEF(PartVT
),
627 Val
, DAG
.getVectorIdxConstant(0, DL
));
629 EVT ElementVT
= PartVT
.getVectorElementType();
630 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
632 SmallVector
<SDValue
, 16> Ops
;
633 DAG
.ExtractVectorElements(Val
, Ops
);
634 SDValue EltUndef
= DAG
.getUNDEF(ElementVT
);
635 Ops
.append((PartNumElts
- ValueNumElts
).getFixedValue(), EltUndef
);
637 // FIXME: Use CONCAT for 2x -> 4x.
638 return DAG
.getBuildVector(PartVT
, DL
, Ops
);
641 /// getCopyToPartsVector - Create a series of nodes that contain the specified
642 /// value split into legal parts.
643 static void getCopyToPartsVector(SelectionDAG
&DAG
, const SDLoc
&DL
,
644 SDValue Val
, SDValue
*Parts
, unsigned NumParts
,
645 MVT PartVT
, const Value
*V
,
646 Optional
<CallingConv::ID
> CallConv
) {
647 EVT ValueVT
= Val
.getValueType();
648 assert(ValueVT
.isVector() && "Not a vector");
649 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
650 const bool IsABIRegCopy
= CallConv
.has_value();
653 EVT PartEVT
= PartVT
;
654 if (PartEVT
== ValueVT
) {
656 } else if (PartVT
.getSizeInBits() == ValueVT
.getSizeInBits()) {
657 // Bitconvert vector->vector case.
658 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
659 } else if (SDValue Widened
= widenVectorToPartType(DAG
, Val
, DL
, PartVT
)) {
661 } else if (PartVT
.isVector() &&
662 PartEVT
.getVectorElementType().bitsGE(
663 ValueVT
.getVectorElementType()) &&
664 PartEVT
.getVectorElementCount() ==
665 ValueVT
.getVectorElementCount()) {
667 // Promoted vector extract
668 Val
= DAG
.getAnyExtOrTrunc(Val
, DL
, PartVT
);
669 } else if (PartEVT
.isVector() &&
670 PartEVT
.getVectorElementType() !=
671 ValueVT
.getVectorElementType() &&
672 TLI
.getTypeAction(*DAG
.getContext(), ValueVT
) ==
673 TargetLowering::TypeWidenVector
) {
674 // Combination of widening and promotion.
676 EVT::getVectorVT(*DAG
.getContext(), ValueVT
.getVectorElementType(),
677 PartVT
.getVectorElementCount());
678 SDValue Widened
= widenVectorToPartType(DAG
, Val
, DL
, WidenVT
);
679 Val
= DAG
.getAnyExtOrTrunc(Widened
, DL
, PartVT
);
681 if (ValueVT
.getVectorElementCount().isScalar()) {
682 Val
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, PartVT
, Val
,
683 DAG
.getVectorIdxConstant(0, DL
));
685 uint64_t ValueSize
= ValueVT
.getFixedSizeInBits();
686 assert(PartVT
.getFixedSizeInBits() > ValueSize
&&
687 "lossy conversion of vector to scalar type");
688 EVT IntermediateType
= EVT::getIntegerVT(*DAG
.getContext(), ValueSize
);
689 Val
= DAG
.getBitcast(IntermediateType
, Val
);
690 Val
= DAG
.getAnyExtOrTrunc(Val
, DL
, PartVT
);
694 assert(Val
.getValueType() == PartVT
&& "Unexpected vector part value type");
699 // Handle a multi-element vector.
702 unsigned NumIntermediates
;
705 NumRegs
= TLI
.getVectorTypeBreakdownForCallingConv(
706 *DAG
.getContext(), CallConv
.getValue(), ValueVT
, IntermediateVT
,
707 NumIntermediates
, RegisterVT
);
710 TLI
.getVectorTypeBreakdown(*DAG
.getContext(), ValueVT
, IntermediateVT
,
711 NumIntermediates
, RegisterVT
);
714 assert(NumRegs
== NumParts
&& "Part count doesn't match vector breakdown!");
715 NumParts
= NumRegs
; // Silence a compiler warning.
716 assert(RegisterVT
== PartVT
&& "Part type doesn't match vector breakdown!");
718 assert(IntermediateVT
.isScalableVector() == ValueVT
.isScalableVector() &&
719 "Mixing scalable and fixed vectors when copying in parts");
721 Optional
<ElementCount
> DestEltCnt
;
723 if (IntermediateVT
.isVector())
724 DestEltCnt
= IntermediateVT
.getVectorElementCount() * NumIntermediates
;
726 DestEltCnt
= ElementCount::getFixed(NumIntermediates
);
728 EVT BuiltVectorTy
= EVT::getVectorVT(
729 *DAG
.getContext(), IntermediateVT
.getScalarType(), *DestEltCnt
);
731 if (ValueVT
== BuiltVectorTy
) {
733 } else if (ValueVT
.getSizeInBits() == BuiltVectorTy
.getSizeInBits()) {
734 // Bitconvert vector->vector case.
735 Val
= DAG
.getNode(ISD::BITCAST
, DL
, BuiltVectorTy
, Val
);
737 if (BuiltVectorTy
.getVectorElementType().bitsGT(
738 ValueVT
.getVectorElementType())) {
739 // Integer promotion.
740 ValueVT
= EVT::getVectorVT(*DAG
.getContext(),
741 BuiltVectorTy
.getVectorElementType(),
742 ValueVT
.getVectorElementCount());
743 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, ValueVT
, Val
);
746 if (SDValue Widened
= widenVectorToPartType(DAG
, Val
, DL
, BuiltVectorTy
)) {
751 assert(Val
.getValueType() == BuiltVectorTy
&& "Unexpected vector value type");
753 // Split the vector into intermediate operands.
754 SmallVector
<SDValue
, 8> Ops(NumIntermediates
);
755 for (unsigned i
= 0; i
!= NumIntermediates
; ++i
) {
756 if (IntermediateVT
.isVector()) {
757 // This does something sensible for scalable vectors - see the
758 // definition of EXTRACT_SUBVECTOR for further details.
759 unsigned IntermediateNumElts
= IntermediateVT
.getVectorMinNumElements();
761 DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, IntermediateVT
, Val
,
762 DAG
.getVectorIdxConstant(i
* IntermediateNumElts
, DL
));
764 Ops
[i
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, IntermediateVT
, Val
,
765 DAG
.getVectorIdxConstant(i
, DL
));
769 // Split the intermediate operands into legal parts.
770 if (NumParts
== NumIntermediates
) {
771 // If the register was not expanded, promote or copy the value,
773 for (unsigned i
= 0; i
!= NumParts
; ++i
)
774 getCopyToParts(DAG
, DL
, Ops
[i
], &Parts
[i
], 1, PartVT
, V
, CallConv
);
775 } else if (NumParts
> 0) {
776 // If the intermediate type was expanded, split each the value into
778 assert(NumIntermediates
!= 0 && "division by zero");
779 assert(NumParts
% NumIntermediates
== 0 &&
780 "Must expand into a divisible number of parts!");
781 unsigned Factor
= NumParts
/ NumIntermediates
;
782 for (unsigned i
= 0; i
!= NumIntermediates
; ++i
)
783 getCopyToParts(DAG
, DL
, Ops
[i
], &Parts
[i
* Factor
], Factor
, PartVT
, V
,
788 RegsForValue::RegsForValue(const SmallVector
<unsigned, 4> ®s
, MVT regvt
,
789 EVT valuevt
, Optional
<CallingConv::ID
> CC
)
790 : ValueVTs(1, valuevt
), RegVTs(1, regvt
), Regs(regs
),
791 RegCount(1, regs
.size()), CallConv(CC
) {}
793 RegsForValue::RegsForValue(LLVMContext
&Context
, const TargetLowering
&TLI
,
794 const DataLayout
&DL
, unsigned Reg
, Type
*Ty
,
795 Optional
<CallingConv::ID
> CC
) {
796 ComputeValueVTs(TLI
, DL
, Ty
, ValueVTs
);
800 for (EVT ValueVT
: ValueVTs
) {
803 ? TLI
.getNumRegistersForCallingConv(Context
, CC
.getValue(), ValueVT
)
804 : TLI
.getNumRegisters(Context
, ValueVT
);
807 ? TLI
.getRegisterTypeForCallingConv(Context
, CC
.getValue(), ValueVT
)
808 : TLI
.getRegisterType(Context
, ValueVT
);
809 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
810 Regs
.push_back(Reg
+ i
);
811 RegVTs
.push_back(RegisterVT
);
812 RegCount
.push_back(NumRegs
);
817 SDValue
RegsForValue::getCopyFromRegs(SelectionDAG
&DAG
,
818 FunctionLoweringInfo
&FuncInfo
,
819 const SDLoc
&dl
, SDValue
&Chain
,
820 SDValue
*Flag
, const Value
*V
) const {
821 // A Value with type {} or [0 x %t] needs no registers.
822 if (ValueVTs
.empty())
825 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
827 // Assemble the legal parts into the final values.
828 SmallVector
<SDValue
, 4> Values(ValueVTs
.size());
829 SmallVector
<SDValue
, 8> Parts
;
830 for (unsigned Value
= 0, Part
= 0, e
= ValueVTs
.size(); Value
!= e
; ++Value
) {
831 // Copy the legal parts from the registers.
832 EVT ValueVT
= ValueVTs
[Value
];
833 unsigned NumRegs
= RegCount
[Value
];
834 MVT RegisterVT
= isABIMangled() ? TLI
.getRegisterTypeForCallingConv(
836 CallConv
.getValue(), RegVTs
[Value
])
839 Parts
.resize(NumRegs
);
840 for (unsigned i
= 0; i
!= NumRegs
; ++i
) {
843 P
= DAG
.getCopyFromReg(Chain
, dl
, Regs
[Part
+i
], RegisterVT
);
845 P
= DAG
.getCopyFromReg(Chain
, dl
, Regs
[Part
+i
], RegisterVT
, *Flag
);
846 *Flag
= P
.getValue(2);
849 Chain
= P
.getValue(1);
852 // If the source register was virtual and if we know something about it,
853 // add an assert node.
854 if (!Register::isVirtualRegister(Regs
[Part
+ i
]) ||
855 !RegisterVT
.isInteger())
858 const FunctionLoweringInfo::LiveOutInfo
*LOI
=
859 FuncInfo
.GetLiveOutRegInfo(Regs
[Part
+i
]);
863 unsigned RegSize
= RegisterVT
.getScalarSizeInBits();
864 unsigned NumSignBits
= LOI
->NumSignBits
;
865 unsigned NumZeroBits
= LOI
->Known
.countMinLeadingZeros();
867 if (NumZeroBits
== RegSize
) {
868 // The current value is a zero.
869 // Explicitly express that as it would be easier for
870 // optimizations to kick in.
871 Parts
[i
] = DAG
.getConstant(0, dl
, RegisterVT
);
875 // FIXME: We capture more information than the dag can represent. For
876 // now, just use the tightest assertzext/assertsext possible.
878 EVT
FromVT(MVT::Other
);
880 FromVT
= EVT::getIntegerVT(*DAG
.getContext(), RegSize
- NumZeroBits
);
882 } else if (NumSignBits
> 1) {
884 EVT::getIntegerVT(*DAG
.getContext(), RegSize
- NumSignBits
+ 1);
889 // Add an assertion node.
890 assert(FromVT
!= MVT::Other
);
891 Parts
[i
] = DAG
.getNode(isSExt
? ISD::AssertSext
: ISD::AssertZext
, dl
,
892 RegisterVT
, P
, DAG
.getValueType(FromVT
));
895 Values
[Value
] = getCopyFromParts(DAG
, dl
, Parts
.begin(), NumRegs
,
896 RegisterVT
, ValueVT
, V
, CallConv
);
901 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(ValueVTs
), Values
);
904 void RegsForValue::getCopyToRegs(SDValue Val
, SelectionDAG
&DAG
,
905 const SDLoc
&dl
, SDValue
&Chain
, SDValue
*Flag
,
907 ISD::NodeType PreferredExtendType
) const {
908 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
909 ISD::NodeType ExtendKind
= PreferredExtendType
;
911 // Get the list of the values's legal parts.
912 unsigned NumRegs
= Regs
.size();
913 SmallVector
<SDValue
, 8> Parts(NumRegs
);
914 for (unsigned Value
= 0, Part
= 0, e
= ValueVTs
.size(); Value
!= e
; ++Value
) {
915 unsigned NumParts
= RegCount
[Value
];
917 MVT RegisterVT
= isABIMangled() ? TLI
.getRegisterTypeForCallingConv(
919 CallConv
.getValue(), RegVTs
[Value
])
922 if (ExtendKind
== ISD::ANY_EXTEND
&& TLI
.isZExtFree(Val
, RegisterVT
))
923 ExtendKind
= ISD::ZERO_EXTEND
;
925 getCopyToParts(DAG
, dl
, Val
.getValue(Val
.getResNo() + Value
), &Parts
[Part
],
926 NumParts
, RegisterVT
, V
, CallConv
, ExtendKind
);
930 // Copy the parts into the registers.
931 SmallVector
<SDValue
, 8> Chains(NumRegs
);
932 for (unsigned i
= 0; i
!= NumRegs
; ++i
) {
935 Part
= DAG
.getCopyToReg(Chain
, dl
, Regs
[i
], Parts
[i
]);
937 Part
= DAG
.getCopyToReg(Chain
, dl
, Regs
[i
], Parts
[i
], *Flag
);
938 *Flag
= Part
.getValue(1);
941 Chains
[i
] = Part
.getValue(0);
944 if (NumRegs
== 1 || Flag
)
945 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
946 // flagged to it. That is the CopyToReg nodes and the user are considered
947 // a single scheduling unit. If we create a TokenFactor and return it as
948 // chain, then the TokenFactor is both a predecessor (operand) of the
949 // user as well as a successor (the TF operands are flagged to the user).
950 // c1, f1 = CopyToReg
951 // c2, f2 = CopyToReg
952 // c3 = TokenFactor c1, c2
955 Chain
= Chains
[NumRegs
-1];
957 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Chains
);
960 void RegsForValue::AddInlineAsmOperands(unsigned Code
, bool HasMatching
,
961 unsigned MatchingIdx
, const SDLoc
&dl
,
963 std::vector
<SDValue
> &Ops
) const {
964 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
966 unsigned Flag
= InlineAsm::getFlagWord(Code
, Regs
.size());
968 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, MatchingIdx
);
969 else if (!Regs
.empty() && Register::isVirtualRegister(Regs
.front())) {
970 // Put the register class of the virtual registers in the flag word. That
971 // way, later passes can recompute register class constraints for inline
972 // assembly as well as normal instructions.
973 // Don't do this for tied operands that can use the regclass information
975 const MachineRegisterInfo
&MRI
= DAG
.getMachineFunction().getRegInfo();
976 const TargetRegisterClass
*RC
= MRI
.getRegClass(Regs
.front());
977 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, RC
->getID());
980 SDValue Res
= DAG
.getTargetConstant(Flag
, dl
, MVT::i32
);
983 if (Code
== InlineAsm::Kind_Clobber
) {
984 // Clobbers should always have a 1:1 mapping with registers, and may
985 // reference registers that have illegal (e.g. vector) types. Hence, we
986 // shouldn't try to apply any sort of splitting logic to them.
987 assert(Regs
.size() == RegVTs
.size() && Regs
.size() == ValueVTs
.size() &&
988 "No 1:1 mapping from clobbers to regs?");
989 Register SP
= TLI
.getStackPointerRegisterToSaveRestore();
991 for (unsigned I
= 0, E
= ValueVTs
.size(); I
!= E
; ++I
) {
992 Ops
.push_back(DAG
.getRegister(Regs
[I
], RegVTs
[I
]));
995 DAG
.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
996 "If we clobbered the stack pointer, MFI should know about it.");
1001 for (unsigned Value
= 0, Reg
= 0, e
= ValueVTs
.size(); Value
!= e
; ++Value
) {
1002 MVT RegisterVT
= RegVTs
[Value
];
1003 unsigned NumRegs
= TLI
.getNumRegisters(*DAG
.getContext(), ValueVTs
[Value
],
1005 for (unsigned i
= 0; i
!= NumRegs
; ++i
) {
1006 assert(Reg
< Regs
.size() && "Mismatch in # registers expected");
1007 unsigned TheReg
= Regs
[Reg
++];
1008 Ops
.push_back(DAG
.getRegister(TheReg
, RegisterVT
));
1013 SmallVector
<std::pair
<unsigned, TypeSize
>, 4>
1014 RegsForValue::getRegsAndSizes() const {
1015 SmallVector
<std::pair
<unsigned, TypeSize
>, 4> OutVec
;
1017 for (auto CountAndVT
: zip_first(RegCount
, RegVTs
)) {
1018 unsigned RegCount
= std::get
<0>(CountAndVT
);
1019 MVT RegisterVT
= std::get
<1>(CountAndVT
);
1020 TypeSize RegisterSize
= RegisterVT
.getSizeInBits();
1021 for (unsigned E
= I
+ RegCount
; I
!= E
; ++I
)
1022 OutVec
.push_back(std::make_pair(Regs
[I
], RegisterSize
));
1027 void SelectionDAGBuilder::init(GCFunctionInfo
*gfi
, AliasAnalysis
*aa
,
1028 const TargetLibraryInfo
*li
) {
1032 Context
= DAG
.getContext();
1033 LPadToCallSiteMap
.clear();
1034 SL
->init(DAG
.getTargetLoweringInfo(), TM
, DAG
.getDataLayout());
1037 void SelectionDAGBuilder::clear() {
1039 UnusedArgNodeMap
.clear();
1040 PendingLoads
.clear();
1041 PendingExports
.clear();
1042 PendingConstrainedFP
.clear();
1043 PendingConstrainedFPStrict
.clear();
1045 HasTailCall
= false;
1046 SDNodeOrder
= LowestSDNodeOrder
;
1047 StatepointLowering
.clear();
1050 void SelectionDAGBuilder::clearDanglingDebugInfo() {
1051 DanglingDebugInfoMap
.clear();
1054 // Update DAG root to include dependencies on Pending chains.
1055 SDValue
SelectionDAGBuilder::updateRoot(SmallVectorImpl
<SDValue
> &Pending
) {
1056 SDValue Root
= DAG
.getRoot();
1058 if (Pending
.empty())
1061 // Add current root to PendingChains, unless we already indirectly
1063 if (Root
.getOpcode() != ISD::EntryToken
) {
1064 unsigned i
= 0, e
= Pending
.size();
1065 for (; i
!= e
; ++i
) {
1066 assert(Pending
[i
].getNode()->getNumOperands() > 1);
1067 if (Pending
[i
].getNode()->getOperand(0) == Root
)
1068 break; // Don't add the root if we already indirectly depend on it.
1072 Pending
.push_back(Root
);
1075 if (Pending
.size() == 1)
1078 Root
= DAG
.getTokenFactor(getCurSDLoc(), Pending
);
1085 SDValue
SelectionDAGBuilder::getMemoryRoot() {
1086 return updateRoot(PendingLoads
);
1089 SDValue
SelectionDAGBuilder::getRoot() {
1090 // Chain up all pending constrained intrinsics together with all
1091 // pending loads, by simply appending them to PendingLoads and
1092 // then calling getMemoryRoot().
1093 PendingLoads
.reserve(PendingLoads
.size() +
1094 PendingConstrainedFP
.size() +
1095 PendingConstrainedFPStrict
.size());
1096 PendingLoads
.append(PendingConstrainedFP
.begin(),
1097 PendingConstrainedFP
.end());
1098 PendingLoads
.append(PendingConstrainedFPStrict
.begin(),
1099 PendingConstrainedFPStrict
.end());
1100 PendingConstrainedFP
.clear();
1101 PendingConstrainedFPStrict
.clear();
1102 return getMemoryRoot();
1105 SDValue
SelectionDAGBuilder::getControlRoot() {
1106 // We need to emit pending fpexcept.strict constrained intrinsics,
1107 // so append them to the PendingExports list.
1108 PendingExports
.append(PendingConstrainedFPStrict
.begin(),
1109 PendingConstrainedFPStrict
.end());
1110 PendingConstrainedFPStrict
.clear();
1111 return updateRoot(PendingExports
);
1114 void SelectionDAGBuilder::visit(const Instruction
&I
) {
1115 // Set up outgoing PHI node register values before emitting the terminator.
1116 if (I
.isTerminator()) {
1117 HandlePHINodesInSuccessorBlocks(I
.getParent());
1120 // Increase the SDNodeOrder if dealing with a non-debug instruction.
1121 if (!isa
<DbgInfoIntrinsic
>(I
))
1126 visit(I
.getOpcode(), I
);
1128 if (!I
.isTerminator() && !HasTailCall
&&
1129 !isa
<GCStatepointInst
>(I
)) // statepoints handle their exports internally
1130 CopyToExportRegsIfNeeded(&I
);
1135 void SelectionDAGBuilder::visitPHI(const PHINode
&) {
1136 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1139 void SelectionDAGBuilder::visit(unsigned Opcode
, const User
&I
) {
1140 // Note: this doesn't use InstVisitor, because it has to work with
1141 // ConstantExpr's in addition to instructions.
1143 default: llvm_unreachable("Unknown instruction type encountered!");
1144 // Build the switch statement using the Instruction.def file.
1145 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1146 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1147 #include "llvm/IR/Instruction.def"
1151 void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst
*DI
,
1152 DebugLoc DL
, unsigned Order
) {
1153 // We treat variadic dbg_values differently at this stage.
1154 if (DI
->hasArgList()) {
1155 // For variadic dbg_values we will now insert an undef.
1156 // FIXME: We can potentially recover these!
1157 SmallVector
<SDDbgOperand
, 2> Locs
;
1158 for (const Value
*V
: DI
->getValues()) {
1159 auto Undef
= UndefValue::get(V
->getType());
1160 Locs
.push_back(SDDbgOperand::fromConst(Undef
));
1162 SDDbgValue
*SDV
= DAG
.getDbgValueList(
1163 DI
->getVariable(), DI
->getExpression(), Locs
, {},
1164 /*IsIndirect=*/false, DL
, Order
, /*IsVariadic=*/true);
1165 DAG
.AddDbgValue(SDV
, /*isParameter=*/false);
1167 // TODO: Dangling debug info will eventually either be resolved or produce
1168 // an Undef DBG_VALUE. However in the resolution case, a gap may appear
1169 // between the original dbg.value location and its resolved DBG_VALUE,
1170 // which we should ideally fill with an extra Undef DBG_VALUE.
1171 assert(DI
->getNumVariableLocationOps() == 1 &&
1172 "DbgValueInst without an ArgList should have a single location "
1174 DanglingDebugInfoMap
[DI
->getValue(0)].emplace_back(DI
, DL
, Order
);
1178 void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable
*Variable
,
1179 const DIExpression
*Expr
) {
1180 auto isMatchingDbgValue
= [&](DanglingDebugInfo
&DDI
) {
1181 const DbgValueInst
*DI
= DDI
.getDI();
1182 DIVariable
*DanglingVariable
= DI
->getVariable();
1183 DIExpression
*DanglingExpr
= DI
->getExpression();
1184 if (DanglingVariable
== Variable
&& Expr
->fragmentsOverlap(DanglingExpr
)) {
1185 LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI
<< "\n");
1191 for (auto &DDIMI
: DanglingDebugInfoMap
) {
1192 DanglingDebugInfoVector
&DDIV
= DDIMI
.second
;
1194 // If debug info is to be dropped, run it through final checks to see
1195 // whether it can be salvaged.
1196 for (auto &DDI
: DDIV
)
1197 if (isMatchingDbgValue(DDI
))
1198 salvageUnresolvedDbgValue(DDI
);
1200 erase_if(DDIV
, isMatchingDbgValue
);
1204 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1205 // generate the debug data structures now that we've seen its definition.
1206 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value
*V
,
1208 auto DanglingDbgInfoIt
= DanglingDebugInfoMap
.find(V
);
1209 if (DanglingDbgInfoIt
== DanglingDebugInfoMap
.end())
1212 DanglingDebugInfoVector
&DDIV
= DanglingDbgInfoIt
->second
;
1213 for (auto &DDI
: DDIV
) {
1214 const DbgValueInst
*DI
= DDI
.getDI();
1215 assert(!DI
->hasArgList() && "Not implemented for variadic dbg_values");
1216 assert(DI
&& "Ill-formed DanglingDebugInfo");
1217 DebugLoc dl
= DDI
.getdl();
1218 unsigned ValSDNodeOrder
= Val
.getNode()->getIROrder();
1219 unsigned DbgSDNodeOrder
= DDI
.getSDNodeOrder();
1220 DILocalVariable
*Variable
= DI
->getVariable();
1221 DIExpression
*Expr
= DI
->getExpression();
1222 assert(Variable
->isValidLocationForIntrinsic(dl
) &&
1223 "Expected inlined-at fields to agree");
1225 if (Val
.getNode()) {
1226 // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
1227 // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
1228 // we couldn't resolve it directly when examining the DbgValue intrinsic
1229 // in the first place we should not be more successful here). Unless we
1230 // have some test case that prove this to be correct we should avoid
1231 // calling EmitFuncArgumentDbgValue here.
1232 if (!EmitFuncArgumentDbgValue(V
, Variable
, Expr
, dl
,
1233 FuncArgumentDbgValueKind::Value
, Val
)) {
1234 LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1235 << DbgSDNodeOrder
<< "] for:\n " << *DI
<< "\n");
1236 LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val
.dump());
1237 // Increase the SDNodeOrder for the DbgValue here to make sure it is
1238 // inserted after the definition of Val when emitting the instructions
1239 // after ISel. An alternative could be to teach
1240 // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1241 LLVM_DEBUG(if (ValSDNodeOrder
> DbgSDNodeOrder
) dbgs()
1242 << "changing SDNodeOrder from " << DbgSDNodeOrder
<< " to "
1243 << ValSDNodeOrder
<< "\n");
1244 SDV
= getDbgValue(Val
, Variable
, Expr
, dl
,
1245 std::max(DbgSDNodeOrder
, ValSDNodeOrder
));
1246 DAG
.AddDbgValue(SDV
, false);
1248 LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1249 << "in EmitFuncArgumentDbgValue\n");
1251 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
<< "\n");
1252 auto Undef
= UndefValue::get(DDI
.getDI()->getValue(0)->getType());
1254 DAG
.getConstantDbgValue(Variable
, Expr
, Undef
, dl
, DbgSDNodeOrder
);
1255 DAG
.AddDbgValue(SDV
, false);
1261 void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo
&DDI
) {
1262 // TODO: For the variadic implementation, instead of only checking the fail
1263 // state of `handleDebugValue`, we need know specifically which values were
1264 // invalid, so that we attempt to salvage only those values when processing
1266 assert(!DDI
.getDI()->hasArgList() &&
1267 "Not implemented for variadic dbg_values");
1268 Value
*V
= DDI
.getDI()->getValue(0);
1269 DILocalVariable
*Var
= DDI
.getDI()->getVariable();
1270 DIExpression
*Expr
= DDI
.getDI()->getExpression();
1271 DebugLoc DL
= DDI
.getdl();
1272 DebugLoc InstDL
= DDI
.getDI()->getDebugLoc();
1273 unsigned SDOrder
= DDI
.getSDNodeOrder();
1274 // Currently we consider only dbg.value intrinsics -- we tell the salvager
1275 // that DW_OP_stack_value is desired.
1276 assert(isa
<DbgValueInst
>(DDI
.getDI()));
1277 bool StackValue
= true;
1279 // Can this Value can be encoded without any further work?
1280 if (handleDebugValue(V
, Var
, Expr
, DL
, InstDL
, SDOrder
, /*IsVariadic=*/false))
1283 // Attempt to salvage back through as many instructions as possible. Bail if
1284 // a non-instruction is seen, such as a constant expression or global
1285 // variable. FIXME: Further work could recover those too.
1286 while (isa
<Instruction
>(V
)) {
1287 Instruction
&VAsInst
= *cast
<Instruction
>(V
);
1288 // Temporary "0", awaiting real implementation.
1289 SmallVector
<uint64_t, 16> Ops
;
1290 SmallVector
<Value
*, 4> AdditionalValues
;
1291 V
= salvageDebugInfoImpl(VAsInst
, Expr
->getNumLocationOperands(), Ops
,
1293 // If we cannot salvage any further, and haven't yet found a suitable debug
1294 // expression, bail out.
1298 // TODO: If AdditionalValues isn't empty, then the salvage can only be
1299 // represented with a DBG_VALUE_LIST, so we give up. When we have support
1300 // here for variadic dbg_values, remove that condition.
1301 if (!AdditionalValues
.empty())
1304 // New value and expr now represent this debuginfo.
1305 Expr
= DIExpression::appendOpsToArg(Expr
, Ops
, 0, StackValue
);
1307 // Some kind of simplification occurred: check whether the operand of the
1308 // salvaged debug expression can be encoded in this DAG.
1309 if (handleDebugValue(V
, Var
, Expr
, DL
, InstDL
, SDOrder
,
1310 /*IsVariadic=*/false)) {
1311 LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
1312 << DDI
.getDI() << "\nBy stripping back to:\n " << V
);
1317 // This was the final opportunity to salvage this debug information, and it
1318 // couldn't be done. Place an undef DBG_VALUE at this location to terminate
1319 // any earlier variable location.
1320 auto Undef
= UndefValue::get(DDI
.getDI()->getValue(0)->getType());
1321 auto SDV
= DAG
.getConstantDbgValue(Var
, Expr
, Undef
, DL
, SDNodeOrder
);
1322 DAG
.AddDbgValue(SDV
, false);
1324 LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI
.getDI()
1326 LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI
.getDI()->getOperand(0)
1330 bool SelectionDAGBuilder::handleDebugValue(ArrayRef
<const Value
*> Values
,
1331 DILocalVariable
*Var
,
1332 DIExpression
*Expr
, DebugLoc dl
,
1333 DebugLoc InstDL
, unsigned Order
,
1337 SmallVector
<SDDbgOperand
> LocationOps
;
1338 SmallVector
<SDNode
*> Dependencies
;
1339 for (const Value
*V
: Values
) {
1341 if (isa
<ConstantInt
>(V
) || isa
<ConstantFP
>(V
) || isa
<UndefValue
>(V
) ||
1342 isa
<ConstantPointerNull
>(V
)) {
1343 LocationOps
.emplace_back(SDDbgOperand::fromConst(V
));
1347 // If the Value is a frame index, we can create a FrameIndex debug value
1348 // without relying on the DAG at all.
1349 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(V
)) {
1350 auto SI
= FuncInfo
.StaticAllocaMap
.find(AI
);
1351 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
1352 LocationOps
.emplace_back(SDDbgOperand::fromFrameIdx(SI
->second
));
1357 // Do not use getValue() in here; we don't want to generate code at
1358 // this point if it hasn't been done yet.
1359 SDValue N
= NodeMap
[V
];
1360 if (!N
.getNode() && isa
<Argument
>(V
)) // Check unused arguments map.
1361 N
= UnusedArgNodeMap
[V
];
1363 // Only emit func arg dbg value for non-variadic dbg.values for now.
1365 EmitFuncArgumentDbgValue(V
, Var
, Expr
, dl
,
1366 FuncArgumentDbgValueKind::Value
, N
))
1368 if (auto *FISDN
= dyn_cast
<FrameIndexSDNode
>(N
.getNode())) {
1369 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
1370 // describe stack slot locations.
1372 // Consider "int x = 0; int *px = &x;". There are two kinds of
1373 // interesting debug values here after optimization:
1375 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
1376 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
1378 // Both describe the direct values of their associated variables.
1379 Dependencies
.push_back(N
.getNode());
1380 LocationOps
.emplace_back(SDDbgOperand::fromFrameIdx(FISDN
->getIndex()));
1383 LocationOps
.emplace_back(
1384 SDDbgOperand::fromNode(N
.getNode(), N
.getResNo()));
1388 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1389 // Special rules apply for the first dbg.values of parameter variables in a
1390 // function. Identify them by the fact they reference Argument Values, that
1391 // they're parameters, and they are parameters of the current function. We
1392 // need to let them dangle until they get an SDNode.
1393 bool IsParamOfFunc
=
1394 isa
<Argument
>(V
) && Var
->isParameter() && !InstDL
.getInlinedAt();
1398 // The value is not used in this block yet (or it would have an SDNode).
1399 // We still want the value to appear for the user if possible -- if it has
1400 // an associated VReg, we can refer to that instead.
1401 auto VMI
= FuncInfo
.ValueMap
.find(V
);
1402 if (VMI
!= FuncInfo
.ValueMap
.end()) {
1403 unsigned Reg
= VMI
->second
;
1404 // If this is a PHI node, it may be split up into several MI PHI nodes
1405 // (in FunctionLoweringInfo::set).
1406 RegsForValue
RFV(V
->getContext(), TLI
, DAG
.getDataLayout(), Reg
,
1407 V
->getType(), None
);
1408 if (RFV
.occupiesMultipleRegs()) {
1409 // FIXME: We could potentially support variadic dbg_values here.
1412 unsigned Offset
= 0;
1413 unsigned BitsToDescribe
= 0;
1414 if (auto VarSize
= Var
->getSizeInBits())
1415 BitsToDescribe
= *VarSize
;
1416 if (auto Fragment
= Expr
->getFragmentInfo())
1417 BitsToDescribe
= Fragment
->SizeInBits
;
1418 for (const auto &RegAndSize
: RFV
.getRegsAndSizes()) {
1419 // Bail out if all bits are described already.
1420 if (Offset
>= BitsToDescribe
)
1422 // TODO: handle scalable vectors.
1423 unsigned RegisterSize
= RegAndSize
.second
;
1424 unsigned FragmentSize
= (Offset
+ RegisterSize
> BitsToDescribe
)
1425 ? BitsToDescribe
- Offset
1427 auto FragmentExpr
= DIExpression::createFragmentExpression(
1428 Expr
, Offset
, FragmentSize
);
1431 SDDbgValue
*SDV
= DAG
.getVRegDbgValue(
1432 Var
, *FragmentExpr
, RegAndSize
.first
, false, dl
, SDNodeOrder
);
1433 DAG
.AddDbgValue(SDV
, false);
1434 Offset
+= RegisterSize
;
1438 // We can use simple vreg locations for variadic dbg_values as well.
1439 LocationOps
.emplace_back(SDDbgOperand::fromVReg(Reg
));
1442 // We failed to create a SDDbgOperand for V.
1446 // We have created a SDDbgOperand for each Value in Values.
1447 // Should use Order instead of SDNodeOrder?
1448 assert(!LocationOps
.empty());
1450 DAG
.getDbgValueList(Var
, Expr
, LocationOps
, Dependencies
,
1451 /*IsIndirect=*/false, dl
, SDNodeOrder
, IsVariadic
);
1452 DAG
.AddDbgValue(SDV
, /*isParameter=*/false);
1456 void SelectionDAGBuilder::resolveOrClearDbgInfo() {
1457 // Try to fixup any remaining dangling debug info -- and drop it if we can't.
1458 for (auto &Pair
: DanglingDebugInfoMap
)
1459 for (auto &DDI
: Pair
.second
)
1460 salvageUnresolvedDbgValue(DDI
);
1461 clearDanglingDebugInfo();
1464 /// getCopyFromRegs - If there was virtual register allocated for the value V
1465 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1466 SDValue
SelectionDAGBuilder::getCopyFromRegs(const Value
*V
, Type
*Ty
) {
1467 DenseMap
<const Value
*, Register
>::iterator It
= FuncInfo
.ValueMap
.find(V
);
1470 if (It
!= FuncInfo
.ValueMap
.end()) {
1471 Register InReg
= It
->second
;
1473 RegsForValue
RFV(*DAG
.getContext(), DAG
.getTargetLoweringInfo(),
1474 DAG
.getDataLayout(), InReg
, Ty
,
1475 None
); // This is not an ABI copy.
1476 SDValue Chain
= DAG
.getEntryNode();
1477 Result
= RFV
.getCopyFromRegs(DAG
, FuncInfo
, getCurSDLoc(), Chain
, nullptr,
1479 resolveDanglingDebugInfo(V
, Result
);
1485 /// getValue - Return an SDValue for the given Value.
1486 SDValue
SelectionDAGBuilder::getValue(const Value
*V
) {
1487 // If we already have an SDValue for this value, use it. It's important
1488 // to do this first, so that we don't create a CopyFromReg if we already
1489 // have a regular SDValue.
1490 SDValue
&N
= NodeMap
[V
];
1491 if (N
.getNode()) return N
;
1493 // If there's a virtual register allocated and initialized for this
1495 if (SDValue copyFromReg
= getCopyFromRegs(V
, V
->getType()))
1498 // Otherwise create a new SDValue and remember it.
1499 SDValue Val
= getValueImpl(V
);
1501 resolveDanglingDebugInfo(V
, Val
);
1505 /// getNonRegisterValue - Return an SDValue for the given Value, but
1506 /// don't look in FuncInfo.ValueMap for a virtual register.
1507 SDValue
SelectionDAGBuilder::getNonRegisterValue(const Value
*V
) {
1508 // If we already have an SDValue for this value, use it.
1509 SDValue
&N
= NodeMap
[V
];
1511 if (isa
<ConstantSDNode
>(N
) || isa
<ConstantFPSDNode
>(N
)) {
1512 // Remove the debug location from the node as the node is about to be used
1513 // in a location which may differ from the original debug location. This
1514 // is relevant to Constant and ConstantFP nodes because they can appear
1515 // as constant expressions inside PHI nodes.
1516 N
->setDebugLoc(DebugLoc());
1521 // Otherwise create a new SDValue and remember it.
1522 SDValue Val
= getValueImpl(V
);
1524 resolveDanglingDebugInfo(V
, Val
);
1528 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1529 /// Create an SDValue for the given value.
1530 SDValue
SelectionDAGBuilder::getValueImpl(const Value
*V
) {
1531 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1533 if (const Constant
*C
= dyn_cast
<Constant
>(V
)) {
1534 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), V
->getType(), true);
1536 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
))
1537 return DAG
.getConstant(*CI
, getCurSDLoc(), VT
);
1539 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
1540 return DAG
.getGlobalAddress(GV
, getCurSDLoc(), VT
);
1542 if (isa
<ConstantPointerNull
>(C
)) {
1543 unsigned AS
= V
->getType()->getPointerAddressSpace();
1544 return DAG
.getConstant(0, getCurSDLoc(),
1545 TLI
.getPointerTy(DAG
.getDataLayout(), AS
));
1548 if (match(C
, m_VScale(DAG
.getDataLayout())))
1549 return DAG
.getVScale(getCurSDLoc(), VT
, APInt(VT
.getSizeInBits(), 1));
1551 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
1552 return DAG
.getConstantFP(*CFP
, getCurSDLoc(), VT
);
1554 if (isa
<UndefValue
>(C
) && !V
->getType()->isAggregateType())
1555 return DAG
.getUNDEF(VT
);
1557 if (const ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
)) {
1558 visit(CE
->getOpcode(), *CE
);
1559 SDValue N1
= NodeMap
[V
];
1560 assert(N1
.getNode() && "visit didn't populate the NodeMap!");
1564 if (isa
<ConstantStruct
>(C
) || isa
<ConstantArray
>(C
)) {
1565 SmallVector
<SDValue
, 4> Constants
;
1566 for (const Use
&U
: C
->operands()) {
1567 SDNode
*Val
= getValue(U
).getNode();
1568 // If the operand is an empty aggregate, there are no values.
1570 // Add each leaf value from the operand to the Constants list
1571 // to form a flattened list of all the values.
1572 for (unsigned i
= 0, e
= Val
->getNumValues(); i
!= e
; ++i
)
1573 Constants
.push_back(SDValue(Val
, i
));
1576 return DAG
.getMergeValues(Constants
, getCurSDLoc());
1579 if (const ConstantDataSequential
*CDS
=
1580 dyn_cast
<ConstantDataSequential
>(C
)) {
1581 SmallVector
<SDValue
, 4> Ops
;
1582 for (unsigned i
= 0, e
= CDS
->getNumElements(); i
!= e
; ++i
) {
1583 SDNode
*Val
= getValue(CDS
->getElementAsConstant(i
)).getNode();
1584 // Add each leaf value from the operand to the Constants list
1585 // to form a flattened list of all the values.
1586 for (unsigned i
= 0, e
= Val
->getNumValues(); i
!= e
; ++i
)
1587 Ops
.push_back(SDValue(Val
, i
));
1590 if (isa
<ArrayType
>(CDS
->getType()))
1591 return DAG
.getMergeValues(Ops
, getCurSDLoc());
1592 return NodeMap
[V
] = DAG
.getBuildVector(VT
, getCurSDLoc(), Ops
);
1595 if (C
->getType()->isStructTy() || C
->getType()->isArrayTy()) {
1596 assert((isa
<ConstantAggregateZero
>(C
) || isa
<UndefValue
>(C
)) &&
1597 "Unknown struct or array constant!");
1599 SmallVector
<EVT
, 4> ValueVTs
;
1600 ComputeValueVTs(TLI
, DAG
.getDataLayout(), C
->getType(), ValueVTs
);
1601 unsigned NumElts
= ValueVTs
.size();
1603 return SDValue(); // empty struct
1604 SmallVector
<SDValue
, 4> Constants(NumElts
);
1605 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1606 EVT EltVT
= ValueVTs
[i
];
1607 if (isa
<UndefValue
>(C
))
1608 Constants
[i
] = DAG
.getUNDEF(EltVT
);
1609 else if (EltVT
.isFloatingPoint())
1610 Constants
[i
] = DAG
.getConstantFP(0, getCurSDLoc(), EltVT
);
1612 Constants
[i
] = DAG
.getConstant(0, getCurSDLoc(), EltVT
);
1615 return DAG
.getMergeValues(Constants
, getCurSDLoc());
1618 if (const BlockAddress
*BA
= dyn_cast
<BlockAddress
>(C
))
1619 return DAG
.getBlockAddress(BA
, VT
);
1621 if (const auto *Equiv
= dyn_cast
<DSOLocalEquivalent
>(C
))
1622 return getValue(Equiv
->getGlobalValue());
1624 if (const auto *NC
= dyn_cast
<NoCFIValue
>(C
))
1625 return getValue(NC
->getGlobalValue());
1627 VectorType
*VecTy
= cast
<VectorType
>(V
->getType());
1629 // Now that we know the number and type of the elements, get that number of
1630 // elements into the Ops array based on what kind of constant it is.
1631 if (const ConstantVector
*CV
= dyn_cast
<ConstantVector
>(C
)) {
1632 SmallVector
<SDValue
, 16> Ops
;
1633 unsigned NumElements
= cast
<FixedVectorType
>(VecTy
)->getNumElements();
1634 for (unsigned i
= 0; i
!= NumElements
; ++i
)
1635 Ops
.push_back(getValue(CV
->getOperand(i
)));
1637 return NodeMap
[V
] = DAG
.getBuildVector(VT
, getCurSDLoc(), Ops
);
1640 if (isa
<ConstantAggregateZero
>(C
)) {
1642 TLI
.getValueType(DAG
.getDataLayout(), VecTy
->getElementType());
1645 if (EltVT
.isFloatingPoint())
1646 Op
= DAG
.getConstantFP(0, getCurSDLoc(), EltVT
);
1648 Op
= DAG
.getConstant(0, getCurSDLoc(), EltVT
);
1650 if (isa
<ScalableVectorType
>(VecTy
))
1651 return NodeMap
[V
] = DAG
.getSplatVector(VT
, getCurSDLoc(), Op
);
1653 SmallVector
<SDValue
, 16> Ops
;
1654 Ops
.assign(cast
<FixedVectorType
>(VecTy
)->getNumElements(), Op
);
1655 return NodeMap
[V
] = DAG
.getBuildVector(VT
, getCurSDLoc(), Ops
);
1658 llvm_unreachable("Unknown vector constant");
1661 // If this is a static alloca, generate it as the frameindex instead of
1663 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(V
)) {
1664 DenseMap
<const AllocaInst
*, int>::iterator SI
=
1665 FuncInfo
.StaticAllocaMap
.find(AI
);
1666 if (SI
!= FuncInfo
.StaticAllocaMap
.end())
1667 return DAG
.getFrameIndex(SI
->second
,
1668 TLI
.getFrameIndexTy(DAG
.getDataLayout()));
1671 // If this is an instruction which fast-isel has deferred, select it now.
1672 if (const Instruction
*Inst
= dyn_cast
<Instruction
>(V
)) {
1673 unsigned InReg
= FuncInfo
.InitializeRegForValue(Inst
);
1675 RegsForValue
RFV(*DAG
.getContext(), TLI
, DAG
.getDataLayout(), InReg
,
1676 Inst
->getType(), None
);
1677 SDValue Chain
= DAG
.getEntryNode();
1678 return RFV
.getCopyFromRegs(DAG
, FuncInfo
, getCurSDLoc(), Chain
, nullptr, V
);
1681 if (const MetadataAsValue
*MD
= dyn_cast
<MetadataAsValue
>(V
))
1682 return DAG
.getMDNode(cast
<MDNode
>(MD
->getMetadata()));
1684 if (const auto *BB
= dyn_cast
<BasicBlock
>(V
))
1685 return DAG
.getBasicBlock(FuncInfo
.MBBMap
[BB
]);
1687 llvm_unreachable("Can't get register for value!");
1690 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst
&I
) {
1691 auto Pers
= classifyEHPersonality(FuncInfo
.Fn
->getPersonalityFn());
1692 bool IsMSVCCXX
= Pers
== EHPersonality::MSVC_CXX
;
1693 bool IsCoreCLR
= Pers
== EHPersonality::CoreCLR
;
1694 bool IsSEH
= isAsynchronousEHPersonality(Pers
);
1695 MachineBasicBlock
*CatchPadMBB
= FuncInfo
.MBB
;
1697 CatchPadMBB
->setIsEHScopeEntry();
1698 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1699 if (IsMSVCCXX
|| IsCoreCLR
)
1700 CatchPadMBB
->setIsEHFuncletEntry();
1703 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst
&I
) {
1704 // Update machine-CFG edge.
1705 MachineBasicBlock
*TargetMBB
= FuncInfo
.MBBMap
[I
.getSuccessor()];
1706 FuncInfo
.MBB
->addSuccessor(TargetMBB
);
1707 TargetMBB
->setIsEHCatchretTarget(true);
1708 DAG
.getMachineFunction().setHasEHCatchret(true);
1710 auto Pers
= classifyEHPersonality(FuncInfo
.Fn
->getPersonalityFn());
1711 bool IsSEH
= isAsynchronousEHPersonality(Pers
);
1713 // If this is not a fall-through branch or optimizations are switched off,
1715 if (TargetMBB
!= NextBlock(FuncInfo
.MBB
) ||
1716 TM
.getOptLevel() == CodeGenOpt::None
)
1717 DAG
.setRoot(DAG
.getNode(ISD::BR
, getCurSDLoc(), MVT::Other
,
1718 getControlRoot(), DAG
.getBasicBlock(TargetMBB
)));
1722 // Figure out the funclet membership for the catchret's successor.
1723 // This will be used by the FuncletLayout pass to determine how to order the
1725 // A 'catchret' returns to the outer scope's color.
1726 Value
*ParentPad
= I
.getCatchSwitchParentPad();
1727 const BasicBlock
*SuccessorColor
;
1728 if (isa
<ConstantTokenNone
>(ParentPad
))
1729 SuccessorColor
= &FuncInfo
.Fn
->getEntryBlock();
1731 SuccessorColor
= cast
<Instruction
>(ParentPad
)->getParent();
1732 assert(SuccessorColor
&& "No parent funclet for catchret!");
1733 MachineBasicBlock
*SuccessorColorMBB
= FuncInfo
.MBBMap
[SuccessorColor
];
1734 assert(SuccessorColorMBB
&& "No MBB for SuccessorColor!");
1736 // Create the terminator node.
1737 SDValue Ret
= DAG
.getNode(ISD::CATCHRET
, getCurSDLoc(), MVT::Other
,
1738 getControlRoot(), DAG
.getBasicBlock(TargetMBB
),
1739 DAG
.getBasicBlock(SuccessorColorMBB
));
1743 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst
&CPI
) {
1744 // Don't emit any special code for the cleanuppad instruction. It just marks
1745 // the start of an EH scope/funclet.
1746 FuncInfo
.MBB
->setIsEHScopeEntry();
1747 auto Pers
= classifyEHPersonality(FuncInfo
.Fn
->getPersonalityFn());
1748 if (Pers
!= EHPersonality::Wasm_CXX
) {
1749 FuncInfo
.MBB
->setIsEHFuncletEntry();
1750 FuncInfo
.MBB
->setIsCleanupFuncletEntry();
1754 // In wasm EH, even though a catchpad may not catch an exception if a tag does
1755 // not match, it is OK to add only the first unwind destination catchpad to the
1756 // successors, because there will be at least one invoke instruction within the
1757 // catch scope that points to the next unwind destination, if one exists, so
1758 // CFGSort cannot mess up with BB sorting order.
1759 // (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
1760 // call within them, and catchpads only consisting of 'catch (...)' have a
1761 // '__cxa_end_catch' call within them, both of which generate invokes in case
1762 // the next unwind destination exists, i.e., the next unwind destination is not
1765 // Having at most one EH pad successor is also simpler and helps later
1770 // invoke void @foo to ... unwind label %catch.dispatch
1772 // %0 = catchswitch within ... [label %catch.start] unwind label %next
1775 // ... in this BB or some other child BB dominated by this BB there will be an
1776 // invoke that points to 'next' BB as an unwind destination
1778 // next: ; We don't need to add this to 'current' BB's successor
1780 static void findWasmUnwindDestinations(
1781 FunctionLoweringInfo
&FuncInfo
, const BasicBlock
*EHPadBB
,
1782 BranchProbability Prob
,
1783 SmallVectorImpl
<std::pair
<MachineBasicBlock
*, BranchProbability
>>
1786 const Instruction
*Pad
= EHPadBB
->getFirstNonPHI();
1787 if (isa
<CleanupPadInst
>(Pad
)) {
1788 // Stop on cleanup pads.
1789 UnwindDests
.emplace_back(FuncInfo
.MBBMap
[EHPadBB
], Prob
);
1790 UnwindDests
.back().first
->setIsEHScopeEntry();
1792 } else if (auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(Pad
)) {
1793 // Add the catchpad handlers to the possible destinations. We don't
1794 // continue to the unwind destination of the catchswitch for wasm.
1795 for (const BasicBlock
*CatchPadBB
: CatchSwitch
->handlers()) {
1796 UnwindDests
.emplace_back(FuncInfo
.MBBMap
[CatchPadBB
], Prob
);
1797 UnwindDests
.back().first
->setIsEHScopeEntry();
1806 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1807 /// many places it could ultimately go. In the IR, we have a single unwind
1808 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1809 /// This function skips over imaginary basic blocks that hold catchswitch
1810 /// instructions, and finds all the "real" machine
1811 /// basic block destinations. As those destinations may not be successors of
1812 /// EHPadBB, here we also calculate the edge probability to those destinations.
1813 /// The passed-in Prob is the edge probability to EHPadBB.
1814 static void findUnwindDestinations(
1815 FunctionLoweringInfo
&FuncInfo
, const BasicBlock
*EHPadBB
,
1816 BranchProbability Prob
,
1817 SmallVectorImpl
<std::pair
<MachineBasicBlock
*, BranchProbability
>>
1819 EHPersonality Personality
=
1820 classifyEHPersonality(FuncInfo
.Fn
->getPersonalityFn());
1821 bool IsMSVCCXX
= Personality
== EHPersonality::MSVC_CXX
;
1822 bool IsCoreCLR
= Personality
== EHPersonality::CoreCLR
;
1823 bool IsWasmCXX
= Personality
== EHPersonality::Wasm_CXX
;
1824 bool IsSEH
= isAsynchronousEHPersonality(Personality
);
1827 findWasmUnwindDestinations(FuncInfo
, EHPadBB
, Prob
, UnwindDests
);
1828 assert(UnwindDests
.size() <= 1 &&
1829 "There should be at most one unwind destination for wasm");
1834 const Instruction
*Pad
= EHPadBB
->getFirstNonPHI();
1835 BasicBlock
*NewEHPadBB
= nullptr;
1836 if (isa
<LandingPadInst
>(Pad
)) {
1837 // Stop on landingpads. They are not funclets.
1838 UnwindDests
.emplace_back(FuncInfo
.MBBMap
[EHPadBB
], Prob
);
1840 } else if (isa
<CleanupPadInst
>(Pad
)) {
1841 // Stop on cleanup pads. Cleanups are always funclet entries for all known
1843 UnwindDests
.emplace_back(FuncInfo
.MBBMap
[EHPadBB
], Prob
);
1844 UnwindDests
.back().first
->setIsEHScopeEntry();
1845 UnwindDests
.back().first
->setIsEHFuncletEntry();
1847 } else if (auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(Pad
)) {
1848 // Add the catchpad handlers to the possible destinations.
1849 for (const BasicBlock
*CatchPadBB
: CatchSwitch
->handlers()) {
1850 UnwindDests
.emplace_back(FuncInfo
.MBBMap
[CatchPadBB
], Prob
);
1851 // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1852 if (IsMSVCCXX
|| IsCoreCLR
)
1853 UnwindDests
.back().first
->setIsEHFuncletEntry();
1855 UnwindDests
.back().first
->setIsEHScopeEntry();
1857 NewEHPadBB
= CatchSwitch
->getUnwindDest();
1862 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
1863 if (BPI
&& NewEHPadBB
)
1864 Prob
*= BPI
->getEdgeProbability(EHPadBB
, NewEHPadBB
);
1865 EHPadBB
= NewEHPadBB
;
1869 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst
&I
) {
1870 // Update successor info.
1871 SmallVector
<std::pair
<MachineBasicBlock
*, BranchProbability
>, 1> UnwindDests
;
1872 auto UnwindDest
= I
.getUnwindDest();
1873 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
1874 BranchProbability UnwindDestProb
=
1876 ? BPI
->getEdgeProbability(FuncInfo
.MBB
->getBasicBlock(), UnwindDest
)
1877 : BranchProbability::getZero();
1878 findUnwindDestinations(FuncInfo
, UnwindDest
, UnwindDestProb
, UnwindDests
);
1879 for (auto &UnwindDest
: UnwindDests
) {
1880 UnwindDest
.first
->setIsEHPad();
1881 addSuccessorWithProb(FuncInfo
.MBB
, UnwindDest
.first
, UnwindDest
.second
);
1883 FuncInfo
.MBB
->normalizeSuccProbs();
1885 // Create the terminator node.
1887 DAG
.getNode(ISD::CLEANUPRET
, getCurSDLoc(), MVT::Other
, getControlRoot());
1891 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst
&CSI
) {
1892 report_fatal_error("visitCatchSwitch not yet implemented!");
1895 void SelectionDAGBuilder::visitRet(const ReturnInst
&I
) {
1896 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1897 auto &DL
= DAG
.getDataLayout();
1898 SDValue Chain
= getControlRoot();
1899 SmallVector
<ISD::OutputArg
, 8> Outs
;
1900 SmallVector
<SDValue
, 8> OutVals
;
1902 // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1905 // %val = call <ty> @llvm.experimental.deoptimize()
1909 if (I
.getParent()->getTerminatingDeoptimizeCall()) {
1910 LowerDeoptimizingReturn();
1914 if (!FuncInfo
.CanLowerReturn
) {
1915 unsigned DemoteReg
= FuncInfo
.DemoteRegister
;
1916 const Function
*F
= I
.getParent()->getParent();
1918 // Emit a store of the return value through the virtual register.
1919 // Leave Outs empty so that LowerReturn won't try to load return
1920 // registers the usual way.
1921 SmallVector
<EVT
, 1> PtrValueVTs
;
1922 ComputeValueVTs(TLI
, DL
,
1923 F
->getReturnType()->getPointerTo(
1924 DAG
.getDataLayout().getAllocaAddrSpace()),
1928 DAG
.getCopyFromReg(Chain
, getCurSDLoc(), DemoteReg
, PtrValueVTs
[0]);
1929 SDValue RetOp
= getValue(I
.getOperand(0));
1931 SmallVector
<EVT
, 4> ValueVTs
, MemVTs
;
1932 SmallVector
<uint64_t, 4> Offsets
;
1933 ComputeValueVTs(TLI
, DL
, I
.getOperand(0)->getType(), ValueVTs
, &MemVTs
,
1935 unsigned NumValues
= ValueVTs
.size();
1937 SmallVector
<SDValue
, 4> Chains(NumValues
);
1938 Align BaseAlign
= DL
.getPrefTypeAlign(I
.getOperand(0)->getType());
1939 for (unsigned i
= 0; i
!= NumValues
; ++i
) {
1940 // An aggregate return value cannot wrap around the address space, so
1941 // offsets to its parts don't wrap either.
1942 SDValue Ptr
= DAG
.getObjectPtrOffset(getCurSDLoc(), RetPtr
,
1943 TypeSize::Fixed(Offsets
[i
]));
1945 SDValue Val
= RetOp
.getValue(RetOp
.getResNo() + i
);
1946 if (MemVTs
[i
] != ValueVTs
[i
])
1947 Val
= DAG
.getPtrExtOrTrunc(Val
, getCurSDLoc(), MemVTs
[i
]);
1948 Chains
[i
] = DAG
.getStore(
1949 Chain
, getCurSDLoc(), Val
,
1950 // FIXME: better loc info would be nice.
1951 Ptr
, MachinePointerInfo::getUnknownStack(DAG
.getMachineFunction()),
1952 commonAlignment(BaseAlign
, Offsets
[i
]));
1955 Chain
= DAG
.getNode(ISD::TokenFactor
, getCurSDLoc(),
1956 MVT::Other
, Chains
);
1957 } else if (I
.getNumOperands() != 0) {
1958 SmallVector
<EVT
, 4> ValueVTs
;
1959 ComputeValueVTs(TLI
, DL
, I
.getOperand(0)->getType(), ValueVTs
);
1960 unsigned NumValues
= ValueVTs
.size();
1962 SDValue RetOp
= getValue(I
.getOperand(0));
1964 const Function
*F
= I
.getParent()->getParent();
1966 bool NeedsRegBlock
= TLI
.functionArgumentNeedsConsecutiveRegisters(
1967 I
.getOperand(0)->getType(), F
->getCallingConv(),
1968 /*IsVarArg*/ false, DL
);
1970 ISD::NodeType ExtendKind
= ISD::ANY_EXTEND
;
1971 if (F
->getAttributes().hasRetAttr(Attribute::SExt
))
1972 ExtendKind
= ISD::SIGN_EXTEND
;
1973 else if (F
->getAttributes().hasRetAttr(Attribute::ZExt
))
1974 ExtendKind
= ISD::ZERO_EXTEND
;
1976 LLVMContext
&Context
= F
->getContext();
1977 bool RetInReg
= F
->getAttributes().hasRetAttr(Attribute::InReg
);
1979 for (unsigned j
= 0; j
!= NumValues
; ++j
) {
1980 EVT VT
= ValueVTs
[j
];
1982 if (ExtendKind
!= ISD::ANY_EXTEND
&& VT
.isInteger())
1983 VT
= TLI
.getTypeForExtReturn(Context
, VT
, ExtendKind
);
1985 CallingConv::ID CC
= F
->getCallingConv();
1987 unsigned NumParts
= TLI
.getNumRegistersForCallingConv(Context
, CC
, VT
);
1988 MVT PartVT
= TLI
.getRegisterTypeForCallingConv(Context
, CC
, VT
);
1989 SmallVector
<SDValue
, 4> Parts(NumParts
);
1990 getCopyToParts(DAG
, getCurSDLoc(),
1991 SDValue(RetOp
.getNode(), RetOp
.getResNo() + j
),
1992 &Parts
[0], NumParts
, PartVT
, &I
, CC
, ExtendKind
);
1994 // 'inreg' on function refers to return value
1995 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
1999 if (I
.getOperand(0)->getType()->isPointerTy()) {
2001 Flags
.setPointerAddrSpace(
2002 cast
<PointerType
>(I
.getOperand(0)->getType())->getAddressSpace());
2005 if (NeedsRegBlock
) {
2006 Flags
.setInConsecutiveRegs();
2007 if (j
== NumValues
- 1)
2008 Flags
.setInConsecutiveRegsLast();
2011 // Propagate extension type if any
2012 if (ExtendKind
== ISD::SIGN_EXTEND
)
2014 else if (ExtendKind
== ISD::ZERO_EXTEND
)
2017 for (unsigned i
= 0; i
< NumParts
; ++i
) {
2018 Outs
.push_back(ISD::OutputArg(Flags
,
2019 Parts
[i
].getValueType().getSimpleVT(),
2020 VT
, /*isfixed=*/true, 0, 0));
2021 OutVals
.push_back(Parts
[i
]);
2027 // Push in swifterror virtual register as the last element of Outs. This makes
2028 // sure swifterror virtual register will be returned in the swifterror
2029 // physical register.
2030 const Function
*F
= I
.getParent()->getParent();
2031 if (TLI
.supportSwiftError() &&
2032 F
->getAttributes().hasAttrSomewhere(Attribute::SwiftError
)) {
2033 assert(SwiftError
.getFunctionArg() && "Need a swift error argument");
2034 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
2035 Flags
.setSwiftError();
2036 Outs
.push_back(ISD::OutputArg(
2037 Flags
, /*vt=*/TLI
.getPointerTy(DL
), /*argvt=*/EVT(TLI
.getPointerTy(DL
)),
2038 /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
2039 // Create SDNode for the swifterror virtual register.
2041 DAG
.getRegister(SwiftError
.getOrCreateVRegUseAt(
2042 &I
, FuncInfo
.MBB
, SwiftError
.getFunctionArg()),
2043 EVT(TLI
.getPointerTy(DL
))));
2046 bool isVarArg
= DAG
.getMachineFunction().getFunction().isVarArg();
2047 CallingConv::ID CallConv
=
2048 DAG
.getMachineFunction().getFunction().getCallingConv();
2049 Chain
= DAG
.getTargetLoweringInfo().LowerReturn(
2050 Chain
, CallConv
, isVarArg
, Outs
, OutVals
, getCurSDLoc(), DAG
);
2052 // Verify that the target's LowerReturn behaved as expected.
2053 assert(Chain
.getNode() && Chain
.getValueType() == MVT::Other
&&
2054 "LowerReturn didn't return a valid chain!");
2056 // Update the DAG with the new chain value resulting from return lowering.
2060 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
2061 /// created for it, emit nodes to copy the value into the virtual
2063 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value
*V
) {
2065 if (V
->getType()->isEmptyTy())
2068 DenseMap
<const Value
*, Register
>::iterator VMI
= FuncInfo
.ValueMap
.find(V
);
2069 if (VMI
!= FuncInfo
.ValueMap
.end()) {
2070 assert(!V
->use_empty() && "Unused value assigned virtual registers!");
2071 CopyValueToVirtualRegister(V
, VMI
->second
);
2075 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
2076 /// the current basic block, add it to ValueMap now so that we'll get a
2078 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value
*V
) {
2079 // No need to export constants.
2080 if (!isa
<Instruction
>(V
) && !isa
<Argument
>(V
)) return;
2082 // Already exported?
2083 if (FuncInfo
.isExportedInst(V
)) return;
2085 unsigned Reg
= FuncInfo
.InitializeRegForValue(V
);
2086 CopyValueToVirtualRegister(V
, Reg
);
2089 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value
*V
,
2090 const BasicBlock
*FromBB
) {
2091 // The operands of the setcc have to be in this block. We don't know
2092 // how to export them from some other block.
2093 if (const Instruction
*VI
= dyn_cast
<Instruction
>(V
)) {
2094 // Can export from current BB.
2095 if (VI
->getParent() == FromBB
)
2098 // Is already exported, noop.
2099 return FuncInfo
.isExportedInst(V
);
2102 // If this is an argument, we can export it if the BB is the entry block or
2103 // if it is already exported.
2104 if (isa
<Argument
>(V
)) {
2105 if (FromBB
->isEntryBlock())
2108 // Otherwise, can only export this if it is already exported.
2109 return FuncInfo
.isExportedInst(V
);
2112 // Otherwise, constants can always be exported.
2116 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
2118 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock
*Src
,
2119 const MachineBasicBlock
*Dst
) const {
2120 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
2121 const BasicBlock
*SrcBB
= Src
->getBasicBlock();
2122 const BasicBlock
*DstBB
= Dst
->getBasicBlock();
2124 // If BPI is not available, set the default probability as 1 / N, where N is
2125 // the number of successors.
2126 auto SuccSize
= std::max
<uint32_t>(succ_size(SrcBB
), 1);
2127 return BranchProbability(1, SuccSize
);
2129 return BPI
->getEdgeProbability(SrcBB
, DstBB
);
2132 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock
*Src
,
2133 MachineBasicBlock
*Dst
,
2134 BranchProbability Prob
) {
2136 Src
->addSuccessorWithoutProb(Dst
);
2138 if (Prob
.isUnknown())
2139 Prob
= getEdgeProbability(Src
, Dst
);
2140 Src
->addSuccessor(Dst
, Prob
);
2144 static bool InBlock(const Value
*V
, const BasicBlock
*BB
) {
2145 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
))
2146 return I
->getParent() == BB
;
2150 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
2151 /// This function emits a branch and is used at the leaves of an OR or an
2152 /// AND operator tree.
2154 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value
*Cond
,
2155 MachineBasicBlock
*TBB
,
2156 MachineBasicBlock
*FBB
,
2157 MachineBasicBlock
*CurBB
,
2158 MachineBasicBlock
*SwitchBB
,
2159 BranchProbability TProb
,
2160 BranchProbability FProb
,
2162 const BasicBlock
*BB
= CurBB
->getBasicBlock();
2164 // If the leaf of the tree is a comparison, merge the condition into
2166 if (const CmpInst
*BOp
= dyn_cast
<CmpInst
>(Cond
)) {
2167 // The operands of the cmp have to be in this block. We don't know
2168 // how to export them from some other block. If this is the first block
2169 // of the sequence, no exporting is needed.
2170 if (CurBB
== SwitchBB
||
2171 (isExportableFromCurrentBlock(BOp
->getOperand(0), BB
) &&
2172 isExportableFromCurrentBlock(BOp
->getOperand(1), BB
))) {
2173 ISD::CondCode Condition
;
2174 if (const ICmpInst
*IC
= dyn_cast
<ICmpInst
>(Cond
)) {
2175 ICmpInst::Predicate Pred
=
2176 InvertCond
? IC
->getInversePredicate() : IC
->getPredicate();
2177 Condition
= getICmpCondCode(Pred
);
2179 const FCmpInst
*FC
= cast
<FCmpInst
>(Cond
);
2180 FCmpInst::Predicate Pred
=
2181 InvertCond
? FC
->getInversePredicate() : FC
->getPredicate();
2182 Condition
= getFCmpCondCode(Pred
);
2183 if (TM
.Options
.NoNaNsFPMath
)
2184 Condition
= getFCmpCodeWithoutNaN(Condition
);
2187 CaseBlock
CB(Condition
, BOp
->getOperand(0), BOp
->getOperand(1), nullptr,
2188 TBB
, FBB
, CurBB
, getCurSDLoc(), TProb
, FProb
);
2189 SL
->SwitchCases
.push_back(CB
);
2194 // Create a CaseBlock record representing this branch.
2195 ISD::CondCode Opc
= InvertCond
? ISD::SETNE
: ISD::SETEQ
;
2196 CaseBlock
CB(Opc
, Cond
, ConstantInt::getTrue(*DAG
.getContext()),
2197 nullptr, TBB
, FBB
, CurBB
, getCurSDLoc(), TProb
, FProb
);
2198 SL
->SwitchCases
.push_back(CB
);
2201 void SelectionDAGBuilder::FindMergedConditions(const Value
*Cond
,
2202 MachineBasicBlock
*TBB
,
2203 MachineBasicBlock
*FBB
,
2204 MachineBasicBlock
*CurBB
,
2205 MachineBasicBlock
*SwitchBB
,
2206 Instruction::BinaryOps Opc
,
2207 BranchProbability TProb
,
2208 BranchProbability FProb
,
2210 // Skip over not part of the tree and remember to invert op and operands at
2213 if (match(Cond
, m_OneUse(m_Not(m_Value(NotCond
)))) &&
2214 InBlock(NotCond
, CurBB
->getBasicBlock())) {
2215 FindMergedConditions(NotCond
, TBB
, FBB
, CurBB
, SwitchBB
, Opc
, TProb
, FProb
,
2220 const Instruction
*BOp
= dyn_cast
<Instruction
>(Cond
);
2221 const Value
*BOpOp0
, *BOpOp1
;
2222 // Compute the effective opcode for Cond, taking into account whether it needs
2223 // to be inverted, e.g.
2224 // and (not (or A, B)), C
2226 // and (and (not A, not B), C)
2227 Instruction::BinaryOps BOpc
= (Instruction::BinaryOps
)0;
2229 BOpc
= match(BOp
, m_LogicalAnd(m_Value(BOpOp0
), m_Value(BOpOp1
)))
2231 : (match(BOp
, m_LogicalOr(m_Value(BOpOp0
), m_Value(BOpOp1
)))
2233 : (Instruction::BinaryOps
)0);
2235 if (BOpc
== Instruction::And
)
2236 BOpc
= Instruction::Or
;
2237 else if (BOpc
== Instruction::Or
)
2238 BOpc
= Instruction::And
;
2242 // If this node is not part of the or/and tree, emit it as a branch.
2243 // Note that all nodes in the tree should have same opcode.
2244 bool BOpIsInOrAndTree
= BOpc
&& BOpc
== Opc
&& BOp
->hasOneUse();
2245 if (!BOpIsInOrAndTree
|| BOp
->getParent() != CurBB
->getBasicBlock() ||
2246 !InBlock(BOpOp0
, CurBB
->getBasicBlock()) ||
2247 !InBlock(BOpOp1
, CurBB
->getBasicBlock())) {
2248 EmitBranchForMergedCondition(Cond
, TBB
, FBB
, CurBB
, SwitchBB
,
2249 TProb
, FProb
, InvertCond
);
2253 // Create TmpBB after CurBB.
2254 MachineFunction::iterator
BBI(CurBB
);
2255 MachineFunction
&MF
= DAG
.getMachineFunction();
2256 MachineBasicBlock
*TmpBB
= MF
.CreateMachineBasicBlock(CurBB
->getBasicBlock());
2257 CurBB
->getParent()->insert(++BBI
, TmpBB
);
2259 if (Opc
== Instruction::Or
) {
2260 // Codegen X | Y as:
2269 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2270 // The requirement is that
2271 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
2272 // = TrueProb for original BB.
2273 // Assuming the original probabilities are A and B, one choice is to set
2274 // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
2275 // A/(1+B) and 2B/(1+B). This choice assumes that
2276 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
2277 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
2278 // TmpBB, but the math is more complicated.
2280 auto NewTrueProb
= TProb
/ 2;
2281 auto NewFalseProb
= TProb
/ 2 + FProb
;
2282 // Emit the LHS condition.
2283 FindMergedConditions(BOpOp0
, TBB
, TmpBB
, CurBB
, SwitchBB
, Opc
, NewTrueProb
,
2284 NewFalseProb
, InvertCond
);
2286 // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
2287 SmallVector
<BranchProbability
, 2> Probs
{TProb
/ 2, FProb
};
2288 BranchProbability::normalizeProbabilities(Probs
.begin(), Probs
.end());
2289 // Emit the RHS condition into TmpBB.
2290 FindMergedConditions(BOpOp1
, TBB
, FBB
, TmpBB
, SwitchBB
, Opc
, Probs
[0],
2291 Probs
[1], InvertCond
);
2293 assert(Opc
== Instruction::And
&& "Unknown merge op!");
2294 // Codegen X & Y as:
2302 // This requires creation of TmpBB after CurBB.
2304 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2305 // The requirement is that
2306 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
2307 // = FalseProb for original BB.
2308 // Assuming the original probabilities are A and B, one choice is to set
2309 // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
2310 // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
2311 // TrueProb for BB1 * FalseProb for TmpBB.
2313 auto NewTrueProb
= TProb
+ FProb
/ 2;
2314 auto NewFalseProb
= FProb
/ 2;
2315 // Emit the LHS condition.
2316 FindMergedConditions(BOpOp0
, TmpBB
, FBB
, CurBB
, SwitchBB
, Opc
, NewTrueProb
,
2317 NewFalseProb
, InvertCond
);
2319 // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
2320 SmallVector
<BranchProbability
, 2> Probs
{TProb
, FProb
/ 2};
2321 BranchProbability::normalizeProbabilities(Probs
.begin(), Probs
.end());
2322 // Emit the RHS condition into TmpBB.
2323 FindMergedConditions(BOpOp1
, TBB
, FBB
, TmpBB
, SwitchBB
, Opc
, Probs
[0],
2324 Probs
[1], InvertCond
);
2328 /// If the set of cases should be emitted as a series of branches, return true.
2329 /// If we should emit this as a bunch of and/or'd together conditions, return
2332 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector
<CaseBlock
> &Cases
) {
2333 if (Cases
.size() != 2) return true;
2335 // If this is two comparisons of the same values or'd or and'd together, they
2336 // will get folded into a single comparison, so don't emit two blocks.
2337 if ((Cases
[0].CmpLHS
== Cases
[1].CmpLHS
&&
2338 Cases
[0].CmpRHS
== Cases
[1].CmpRHS
) ||
2339 (Cases
[0].CmpRHS
== Cases
[1].CmpLHS
&&
2340 Cases
[0].CmpLHS
== Cases
[1].CmpRHS
)) {
2344 // Handle: (X != null) | (Y != null) --> (X|Y) != 0
2345 // Handle: (X == null) & (Y == null) --> (X|Y) == 0
2346 if (Cases
[0].CmpRHS
== Cases
[1].CmpRHS
&&
2347 Cases
[0].CC
== Cases
[1].CC
&&
2348 isa
<Constant
>(Cases
[0].CmpRHS
) &&
2349 cast
<Constant
>(Cases
[0].CmpRHS
)->isNullValue()) {
2350 if (Cases
[0].CC
== ISD::SETEQ
&& Cases
[0].TrueBB
== Cases
[1].ThisBB
)
2352 if (Cases
[0].CC
== ISD::SETNE
&& Cases
[0].FalseBB
== Cases
[1].ThisBB
)
2359 void SelectionDAGBuilder::visitBr(const BranchInst
&I
) {
2360 MachineBasicBlock
*BrMBB
= FuncInfo
.MBB
;
2362 // Update machine-CFG edges.
2363 MachineBasicBlock
*Succ0MBB
= FuncInfo
.MBBMap
[I
.getSuccessor(0)];
2365 if (I
.isUnconditional()) {
2366 // Update machine-CFG edges.
2367 BrMBB
->addSuccessor(Succ0MBB
);
2369 // If this is not a fall-through branch or optimizations are switched off,
2371 if (Succ0MBB
!= NextBlock(BrMBB
) || TM
.getOptLevel() == CodeGenOpt::None
)
2372 DAG
.setRoot(DAG
.getNode(ISD::BR
, getCurSDLoc(),
2373 MVT::Other
, getControlRoot(),
2374 DAG
.getBasicBlock(Succ0MBB
)));
2379 // If this condition is one of the special cases we handle, do special stuff
2381 const Value
*CondVal
= I
.getCondition();
2382 MachineBasicBlock
*Succ1MBB
= FuncInfo
.MBBMap
[I
.getSuccessor(1)];
2384 // If this is a series of conditions that are or'd or and'd together, emit
2385 // this as a sequence of branches instead of setcc's with and/or operations.
2386 // As long as jumps are not expensive (exceptions for multi-use logic ops,
2387 // unpredictable branches, and vector extracts because those jumps are likely
2388 // expensive for any target), this should improve performance.
2389 // For example, instead of something like:
2401 const Instruction
*BOp
= dyn_cast
<Instruction
>(CondVal
);
2402 if (!DAG
.getTargetLoweringInfo().isJumpExpensive() && BOp
&&
2403 BOp
->hasOneUse() && !I
.hasMetadata(LLVMContext::MD_unpredictable
)) {
2405 const Value
*BOp0
, *BOp1
;
2406 Instruction::BinaryOps Opcode
= (Instruction::BinaryOps
)0;
2407 if (match(BOp
, m_LogicalAnd(m_Value(BOp0
), m_Value(BOp1
))))
2408 Opcode
= Instruction::And
;
2409 else if (match(BOp
, m_LogicalOr(m_Value(BOp0
), m_Value(BOp1
))))
2410 Opcode
= Instruction::Or
;
2412 if (Opcode
&& !(match(BOp0
, m_ExtractElt(m_Value(Vec
), m_Value())) &&
2413 match(BOp1
, m_ExtractElt(m_Specific(Vec
), m_Value())))) {
2414 FindMergedConditions(BOp
, Succ0MBB
, Succ1MBB
, BrMBB
, BrMBB
, Opcode
,
2415 getEdgeProbability(BrMBB
, Succ0MBB
),
2416 getEdgeProbability(BrMBB
, Succ1MBB
),
2417 /*InvertCond=*/false);
2418 // If the compares in later blocks need to use values not currently
2419 // exported from this block, export them now. This block should always
2420 // be the first entry.
2421 assert(SL
->SwitchCases
[0].ThisBB
== BrMBB
&& "Unexpected lowering!");
2423 // Allow some cases to be rejected.
2424 if (ShouldEmitAsBranches(SL
->SwitchCases
)) {
2425 for (unsigned i
= 1, e
= SL
->SwitchCases
.size(); i
!= e
; ++i
) {
2426 ExportFromCurrentBlock(SL
->SwitchCases
[i
].CmpLHS
);
2427 ExportFromCurrentBlock(SL
->SwitchCases
[i
].CmpRHS
);
2430 // Emit the branch for this block.
2431 visitSwitchCase(SL
->SwitchCases
[0], BrMBB
);
2432 SL
->SwitchCases
.erase(SL
->SwitchCases
.begin());
2436 // Okay, we decided not to do this, remove any inserted MBB's and clear
2438 for (unsigned i
= 1, e
= SL
->SwitchCases
.size(); i
!= e
; ++i
)
2439 FuncInfo
.MF
->erase(SL
->SwitchCases
[i
].ThisBB
);
2441 SL
->SwitchCases
.clear();
2445 // Create a CaseBlock record representing this branch.
2446 CaseBlock
CB(ISD::SETEQ
, CondVal
, ConstantInt::getTrue(*DAG
.getContext()),
2447 nullptr, Succ0MBB
, Succ1MBB
, BrMBB
, getCurSDLoc());
2449 // Use visitSwitchCase to actually insert the fast branch sequence for this
2451 visitSwitchCase(CB
, BrMBB
);
2454 /// visitSwitchCase - Emits the necessary code to represent a single node in
2455 /// the binary search tree resulting from lowering a switch instruction.
2456 void SelectionDAGBuilder::visitSwitchCase(CaseBlock
&CB
,
2457 MachineBasicBlock
*SwitchBB
) {
2459 SDValue CondLHS
= getValue(CB
.CmpLHS
);
2462 if (CB
.CC
== ISD::SETTRUE
) {
2463 // Branch or fall through to TrueBB.
2464 addSuccessorWithProb(SwitchBB
, CB
.TrueBB
, CB
.TrueProb
);
2465 SwitchBB
->normalizeSuccProbs();
2466 if (CB
.TrueBB
!= NextBlock(SwitchBB
)) {
2467 DAG
.setRoot(DAG
.getNode(ISD::BR
, dl
, MVT::Other
, getControlRoot(),
2468 DAG
.getBasicBlock(CB
.TrueBB
)));
2473 auto &TLI
= DAG
.getTargetLoweringInfo();
2474 EVT MemVT
= TLI
.getMemValueType(DAG
.getDataLayout(), CB
.CmpLHS
->getType());
2476 // Build the setcc now.
2478 // Fold "(X == true)" to X and "(X == false)" to !X to
2479 // handle common cases produced by branch lowering.
2480 if (CB
.CmpRHS
== ConstantInt::getTrue(*DAG
.getContext()) &&
2481 CB
.CC
== ISD::SETEQ
)
2483 else if (CB
.CmpRHS
== ConstantInt::getFalse(*DAG
.getContext()) &&
2484 CB
.CC
== ISD::SETEQ
) {
2485 SDValue True
= DAG
.getConstant(1, dl
, CondLHS
.getValueType());
2486 Cond
= DAG
.getNode(ISD::XOR
, dl
, CondLHS
.getValueType(), CondLHS
, True
);
2488 SDValue CondRHS
= getValue(CB
.CmpRHS
);
2490 // If a pointer's DAG type is larger than its memory type then the DAG
2491 // values are zero-extended. This breaks signed comparisons so truncate
2492 // back to the underlying type before doing the compare.
2493 if (CondLHS
.getValueType() != MemVT
) {
2494 CondLHS
= DAG
.getPtrExtOrTrunc(CondLHS
, getCurSDLoc(), MemVT
);
2495 CondRHS
= DAG
.getPtrExtOrTrunc(CondRHS
, getCurSDLoc(), MemVT
);
2497 Cond
= DAG
.getSetCC(dl
, MVT::i1
, CondLHS
, CondRHS
, CB
.CC
);
2500 assert(CB
.CC
== ISD::SETLE
&& "Can handle only LE ranges now");
2502 const APInt
& Low
= cast
<ConstantInt
>(CB
.CmpLHS
)->getValue();
2503 const APInt
& High
= cast
<ConstantInt
>(CB
.CmpRHS
)->getValue();
2505 SDValue CmpOp
= getValue(CB
.CmpMHS
);
2506 EVT VT
= CmpOp
.getValueType();
2508 if (cast
<ConstantInt
>(CB
.CmpLHS
)->isMinValue(true)) {
2509 Cond
= DAG
.getSetCC(dl
, MVT::i1
, CmpOp
, DAG
.getConstant(High
, dl
, VT
),
2512 SDValue SUB
= DAG
.getNode(ISD::SUB
, dl
,
2513 VT
, CmpOp
, DAG
.getConstant(Low
, dl
, VT
));
2514 Cond
= DAG
.getSetCC(dl
, MVT::i1
, SUB
,
2515 DAG
.getConstant(High
-Low
, dl
, VT
), ISD::SETULE
);
2519 // Update successor info
2520 addSuccessorWithProb(SwitchBB
, CB
.TrueBB
, CB
.TrueProb
);
2521 // TrueBB and FalseBB are always different unless the incoming IR is
2522 // degenerate. This only happens when running llc on weird IR.
2523 if (CB
.TrueBB
!= CB
.FalseBB
)
2524 addSuccessorWithProb(SwitchBB
, CB
.FalseBB
, CB
.FalseProb
);
2525 SwitchBB
->normalizeSuccProbs();
2527 // If the lhs block is the next block, invert the condition so that we can
2528 // fall through to the lhs instead of the rhs block.
2529 if (CB
.TrueBB
== NextBlock(SwitchBB
)) {
2530 std::swap(CB
.TrueBB
, CB
.FalseBB
);
2531 SDValue True
= DAG
.getConstant(1, dl
, Cond
.getValueType());
2532 Cond
= DAG
.getNode(ISD::XOR
, dl
, Cond
.getValueType(), Cond
, True
);
2535 SDValue BrCond
= DAG
.getNode(ISD::BRCOND
, dl
,
2536 MVT::Other
, getControlRoot(), Cond
,
2537 DAG
.getBasicBlock(CB
.TrueBB
));
2539 // Insert the false branch. Do this even if it's a fall through branch,
2540 // this makes it easier to do DAG optimizations which require inverting
2541 // the branch condition.
2542 BrCond
= DAG
.getNode(ISD::BR
, dl
, MVT::Other
, BrCond
,
2543 DAG
.getBasicBlock(CB
.FalseBB
));
2545 DAG
.setRoot(BrCond
);
2548 /// visitJumpTable - Emit JumpTable node in the current MBB
2549 void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable
&JT
) {
2550 // Emit the code for the jump table
2551 assert(JT
.Reg
!= -1U && "Should lower JT Header first!");
2552 EVT PTy
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
2553 SDValue Index
= DAG
.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2555 SDValue Table
= DAG
.getJumpTable(JT
.JTI
, PTy
);
2556 SDValue BrJumpTable
= DAG
.getNode(ISD::BR_JT
, getCurSDLoc(),
2557 MVT::Other
, Index
.getValue(1),
2559 DAG
.setRoot(BrJumpTable
);
2562 /// visitJumpTableHeader - This function emits necessary code to produce index
2563 /// in the JumpTable from switch case.
2564 void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable
&JT
,
2565 JumpTableHeader
&JTH
,
2566 MachineBasicBlock
*SwitchBB
) {
2567 SDLoc dl
= getCurSDLoc();
2569 // Subtract the lowest switch case value from the value being switched on.
2570 SDValue SwitchOp
= getValue(JTH
.SValue
);
2571 EVT VT
= SwitchOp
.getValueType();
2572 SDValue Sub
= DAG
.getNode(ISD::SUB
, dl
, VT
, SwitchOp
,
2573 DAG
.getConstant(JTH
.First
, dl
, VT
));
2575 // The SDNode we just created, which holds the value being switched on minus
2576 // the smallest case value, needs to be copied to a virtual register so it
2577 // can be used as an index into the jump table in a subsequent basic block.
2578 // This value may be smaller or larger than the target's pointer type, and
2579 // therefore require extension or truncating.
2580 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2581 SwitchOp
= DAG
.getZExtOrTrunc(Sub
, dl
, TLI
.getPointerTy(DAG
.getDataLayout()));
2583 unsigned JumpTableReg
=
2584 FuncInfo
.CreateReg(TLI
.getPointerTy(DAG
.getDataLayout()));
2585 SDValue CopyTo
= DAG
.getCopyToReg(getControlRoot(), dl
,
2586 JumpTableReg
, SwitchOp
);
2587 JT
.Reg
= JumpTableReg
;
2589 if (!JTH
.FallthroughUnreachable
) {
2590 // Emit the range check for the jump table, and branch to the default block
2591 // for the switch statement if the value being switched on exceeds the
2592 // largest case in the switch.
2593 SDValue CMP
= DAG
.getSetCC(
2594 dl
, TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
2595 Sub
.getValueType()),
2596 Sub
, DAG
.getConstant(JTH
.Last
- JTH
.First
, dl
, VT
), ISD::SETUGT
);
2598 SDValue BrCond
= DAG
.getNode(ISD::BRCOND
, dl
,
2599 MVT::Other
, CopyTo
, CMP
,
2600 DAG
.getBasicBlock(JT
.Default
));
2602 // Avoid emitting unnecessary branches to the next block.
2603 if (JT
.MBB
!= NextBlock(SwitchBB
))
2604 BrCond
= DAG
.getNode(ISD::BR
, dl
, MVT::Other
, BrCond
,
2605 DAG
.getBasicBlock(JT
.MBB
));
2607 DAG
.setRoot(BrCond
);
2609 // Avoid emitting unnecessary branches to the next block.
2610 if (JT
.MBB
!= NextBlock(SwitchBB
))
2611 DAG
.setRoot(DAG
.getNode(ISD::BR
, dl
, MVT::Other
, CopyTo
,
2612 DAG
.getBasicBlock(JT
.MBB
)));
2614 DAG
.setRoot(CopyTo
);
2618 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2619 /// variable if there exists one.
2620 static SDValue
getLoadStackGuard(SelectionDAG
&DAG
, const SDLoc
&DL
,
2622 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2623 EVT PtrTy
= TLI
.getPointerTy(DAG
.getDataLayout());
2624 EVT PtrMemTy
= TLI
.getPointerMemTy(DAG
.getDataLayout());
2625 MachineFunction
&MF
= DAG
.getMachineFunction();
2626 Value
*Global
= TLI
.getSDagStackGuard(*MF
.getFunction().getParent());
2627 MachineSDNode
*Node
=
2628 DAG
.getMachineNode(TargetOpcode::LOAD_STACK_GUARD
, DL
, PtrTy
, Chain
);
2630 MachinePointerInfo
MPInfo(Global
);
2631 auto Flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOInvariant
|
2632 MachineMemOperand::MODereferenceable
;
2633 MachineMemOperand
*MemRef
= MF
.getMachineMemOperand(
2634 MPInfo
, Flags
, PtrTy
.getSizeInBits() / 8, DAG
.getEVTAlign(PtrTy
));
2635 DAG
.setNodeMemRefs(Node
, {MemRef
});
2637 if (PtrTy
!= PtrMemTy
)
2638 return DAG
.getPtrExtOrTrunc(SDValue(Node
, 0), DL
, PtrMemTy
);
2639 return SDValue(Node
, 0);
2642 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2643 /// tail spliced into a stack protector check success bb.
2645 /// For a high level explanation of how this fits into the stack protector
2646 /// generation see the comment on the declaration of class
2647 /// StackProtectorDescriptor.
2648 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor
&SPD
,
2649 MachineBasicBlock
*ParentBB
) {
2651 // First create the loads to the guard/stack slot for the comparison.
2652 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2653 EVT PtrTy
= TLI
.getPointerTy(DAG
.getDataLayout());
2654 EVT PtrMemTy
= TLI
.getPointerMemTy(DAG
.getDataLayout());
2656 MachineFrameInfo
&MFI
= ParentBB
->getParent()->getFrameInfo();
2657 int FI
= MFI
.getStackProtectorIndex();
2660 SDLoc dl
= getCurSDLoc();
2661 SDValue StackSlotPtr
= DAG
.getFrameIndex(FI
, PtrTy
);
2662 const Module
&M
= *ParentBB
->getParent()->getFunction().getParent();
2664 DAG
.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M
.getContext()));
2666 // Generate code to load the content of the guard slot.
2667 SDValue GuardVal
= DAG
.getLoad(
2668 PtrMemTy
, dl
, DAG
.getEntryNode(), StackSlotPtr
,
2669 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), Align
,
2670 MachineMemOperand::MOVolatile
);
2672 if (TLI
.useStackGuardXorFP())
2673 GuardVal
= TLI
.emitStackGuardXorFP(DAG
, GuardVal
, dl
);
2675 // Retrieve guard check function, nullptr if instrumentation is inlined.
2676 if (const Function
*GuardCheckFn
= TLI
.getSSPStackGuardCheck(M
)) {
2677 // The target provides a guard check function to validate the guard value.
2678 // Generate a call to that function with the content of the guard slot as
2680 FunctionType
*FnTy
= GuardCheckFn
->getFunctionType();
2681 assert(FnTy
->getNumParams() == 1 && "Invalid function signature");
2683 TargetLowering::ArgListTy Args
;
2684 TargetLowering::ArgListEntry Entry
;
2685 Entry
.Node
= GuardVal
;
2686 Entry
.Ty
= FnTy
->getParamType(0);
2687 if (GuardCheckFn
->hasParamAttribute(0, Attribute::AttrKind::InReg
))
2688 Entry
.IsInReg
= true;
2689 Args
.push_back(Entry
);
2691 TargetLowering::CallLoweringInfo
CLI(DAG
);
2692 CLI
.setDebugLoc(getCurSDLoc())
2693 .setChain(DAG
.getEntryNode())
2694 .setCallee(GuardCheckFn
->getCallingConv(), FnTy
->getReturnType(),
2695 getValue(GuardCheckFn
), std::move(Args
));
2697 std::pair
<SDValue
, SDValue
> Result
= TLI
.LowerCallTo(CLI
);
2698 DAG
.setRoot(Result
.second
);
2702 // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2703 // Otherwise, emit a volatile load to retrieve the stack guard value.
2704 SDValue Chain
= DAG
.getEntryNode();
2705 if (TLI
.useLoadStackGuardNode()) {
2706 Guard
= getLoadStackGuard(DAG
, dl
, Chain
);
2708 const Value
*IRGuard
= TLI
.getSDagStackGuard(M
);
2709 SDValue GuardPtr
= getValue(IRGuard
);
2711 Guard
= DAG
.getLoad(PtrMemTy
, dl
, Chain
, GuardPtr
,
2712 MachinePointerInfo(IRGuard
, 0), Align
,
2713 MachineMemOperand::MOVolatile
);
2716 // Perform the comparison via a getsetcc.
2717 SDValue Cmp
= DAG
.getSetCC(dl
, TLI
.getSetCCResultType(DAG
.getDataLayout(),
2719 Guard
.getValueType()),
2720 Guard
, GuardVal
, ISD::SETNE
);
2722 // If the guard/stackslot do not equal, branch to failure MBB.
2723 SDValue BrCond
= DAG
.getNode(ISD::BRCOND
, dl
,
2724 MVT::Other
, GuardVal
.getOperand(0),
2725 Cmp
, DAG
.getBasicBlock(SPD
.getFailureMBB()));
2726 // Otherwise branch to success MBB.
2727 SDValue Br
= DAG
.getNode(ISD::BR
, dl
,
2729 DAG
.getBasicBlock(SPD
.getSuccessMBB()));
2734 /// Codegen the failure basic block for a stack protector check.
2736 /// A failure stack protector machine basic block consists simply of a call to
2737 /// __stack_chk_fail().
2739 /// For a high level explanation of how this fits into the stack protector
2740 /// generation see the comment on the declaration of class
2741 /// StackProtectorDescriptor.
2743 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor
&SPD
) {
2744 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2745 TargetLowering::MakeLibCallOptions CallOptions
;
2746 CallOptions
.setDiscardResult(true);
2748 TLI
.makeLibCall(DAG
, RTLIB::STACKPROTECTOR_CHECK_FAIL
, MVT::isVoid
,
2749 None
, CallOptions
, getCurSDLoc()).second
;
2750 // On PS4/PS5, the "return address" must still be within the calling
2751 // function, even if it's at the very end, so emit an explicit TRAP here.
2752 // Passing 'true' for doesNotReturn above won't generate the trap for us.
2753 if (TM
.getTargetTriple().isPS())
2754 Chain
= DAG
.getNode(ISD::TRAP
, getCurSDLoc(), MVT::Other
, Chain
);
2755 // WebAssembly needs an unreachable instruction after a non-returning call,
2756 // because the function return type can be different from __stack_chk_fail's
2757 // return type (void).
2758 if (TM
.getTargetTriple().isWasm())
2759 Chain
= DAG
.getNode(ISD::TRAP
, getCurSDLoc(), MVT::Other
, Chain
);
2764 /// visitBitTestHeader - This function emits necessary code to produce value
2765 /// suitable for "bit tests"
2766 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock
&B
,
2767 MachineBasicBlock
*SwitchBB
) {
2768 SDLoc dl
= getCurSDLoc();
2770 // Subtract the minimum value.
2771 SDValue SwitchOp
= getValue(B
.SValue
);
2772 EVT VT
= SwitchOp
.getValueType();
2774 DAG
.getNode(ISD::SUB
, dl
, VT
, SwitchOp
, DAG
.getConstant(B
.First
, dl
, VT
));
2776 // Determine the type of the test operands.
2777 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2778 bool UsePtrType
= false;
2779 if (!TLI
.isTypeLegal(VT
)) {
2782 for (unsigned i
= 0, e
= B
.Cases
.size(); i
!= e
; ++i
)
2783 if (!isUIntN(VT
.getSizeInBits(), B
.Cases
[i
].Mask
)) {
2784 // Switch table case range are encoded into series of masks.
2785 // Just use pointer type, it's guaranteed to fit.
2790 SDValue Sub
= RangeSub
;
2792 VT
= TLI
.getPointerTy(DAG
.getDataLayout());
2793 Sub
= DAG
.getZExtOrTrunc(Sub
, dl
, VT
);
2796 B
.RegVT
= VT
.getSimpleVT();
2797 B
.Reg
= FuncInfo
.CreateReg(B
.RegVT
);
2798 SDValue CopyTo
= DAG
.getCopyToReg(getControlRoot(), dl
, B
.Reg
, Sub
);
2800 MachineBasicBlock
* MBB
= B
.Cases
[0].ThisBB
;
2802 if (!B
.FallthroughUnreachable
)
2803 addSuccessorWithProb(SwitchBB
, B
.Default
, B
.DefaultProb
);
2804 addSuccessorWithProb(SwitchBB
, MBB
, B
.Prob
);
2805 SwitchBB
->normalizeSuccProbs();
2807 SDValue Root
= CopyTo
;
2808 if (!B
.FallthroughUnreachable
) {
2809 // Conditional branch to the default block.
2810 SDValue RangeCmp
= DAG
.getSetCC(dl
,
2811 TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
2812 RangeSub
.getValueType()),
2813 RangeSub
, DAG
.getConstant(B
.Range
, dl
, RangeSub
.getValueType()),
2816 Root
= DAG
.getNode(ISD::BRCOND
, dl
, MVT::Other
, Root
, RangeCmp
,
2817 DAG
.getBasicBlock(B
.Default
));
2820 // Avoid emitting unnecessary branches to the next block.
2821 if (MBB
!= NextBlock(SwitchBB
))
2822 Root
= DAG
.getNode(ISD::BR
, dl
, MVT::Other
, Root
, DAG
.getBasicBlock(MBB
));
2827 /// visitBitTestCase - this function produces one "bit test"
2828 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock
&BB
,
2829 MachineBasicBlock
* NextMBB
,
2830 BranchProbability BranchProbToNext
,
2833 MachineBasicBlock
*SwitchBB
) {
2834 SDLoc dl
= getCurSDLoc();
2836 SDValue ShiftOp
= DAG
.getCopyFromReg(getControlRoot(), dl
, Reg
, VT
);
2838 unsigned PopCount
= countPopulation(B
.Mask
);
2839 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2840 if (PopCount
== 1) {
2841 // Testing for a single bit; just compare the shift count with what it
2842 // would need to be to shift a 1 bit in that position.
2844 dl
, TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
),
2845 ShiftOp
, DAG
.getConstant(countTrailingZeros(B
.Mask
), dl
, VT
),
2847 } else if (PopCount
== BB
.Range
) {
2848 // There is only one zero bit in the range, test for it directly.
2850 dl
, TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
),
2851 ShiftOp
, DAG
.getConstant(countTrailingOnes(B
.Mask
), dl
, VT
),
2854 // Make desired shift
2855 SDValue SwitchVal
= DAG
.getNode(ISD::SHL
, dl
, VT
,
2856 DAG
.getConstant(1, dl
, VT
), ShiftOp
);
2858 // Emit bit tests and jumps
2859 SDValue AndOp
= DAG
.getNode(ISD::AND
, dl
,
2860 VT
, SwitchVal
, DAG
.getConstant(B
.Mask
, dl
, VT
));
2862 dl
, TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
),
2863 AndOp
, DAG
.getConstant(0, dl
, VT
), ISD::SETNE
);
2866 // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2867 addSuccessorWithProb(SwitchBB
, B
.TargetBB
, B
.ExtraProb
);
2868 // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2869 addSuccessorWithProb(SwitchBB
, NextMBB
, BranchProbToNext
);
2870 // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2871 // one as they are relative probabilities (and thus work more like weights),
2872 // and hence we need to normalize them to let the sum of them become one.
2873 SwitchBB
->normalizeSuccProbs();
2875 SDValue BrAnd
= DAG
.getNode(ISD::BRCOND
, dl
,
2876 MVT::Other
, getControlRoot(),
2877 Cmp
, DAG
.getBasicBlock(B
.TargetBB
));
2879 // Avoid emitting unnecessary branches to the next block.
2880 if (NextMBB
!= NextBlock(SwitchBB
))
2881 BrAnd
= DAG
.getNode(ISD::BR
, dl
, MVT::Other
, BrAnd
,
2882 DAG
.getBasicBlock(NextMBB
));
2887 void SelectionDAGBuilder::visitInvoke(const InvokeInst
&I
) {
2888 MachineBasicBlock
*InvokeMBB
= FuncInfo
.MBB
;
2890 // Retrieve successors. Look through artificial IR level blocks like
2891 // catchswitch for successors.
2892 MachineBasicBlock
*Return
= FuncInfo
.MBBMap
[I
.getSuccessor(0)];
2893 const BasicBlock
*EHPadBB
= I
.getSuccessor(1);
2895 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2896 // have to do anything here to lower funclet bundles.
2897 assert(!I
.hasOperandBundlesOtherThan(
2898 {LLVMContext::OB_deopt
, LLVMContext::OB_gc_transition
,
2899 LLVMContext::OB_gc_live
, LLVMContext::OB_funclet
,
2900 LLVMContext::OB_cfguardtarget
,
2901 LLVMContext::OB_clang_arc_attachedcall
}) &&
2902 "Cannot lower invokes with arbitrary operand bundles yet!");
2904 const Value
*Callee(I
.getCalledOperand());
2905 const Function
*Fn
= dyn_cast
<Function
>(Callee
);
2906 if (isa
<InlineAsm
>(Callee
))
2907 visitInlineAsm(I
, EHPadBB
);
2908 else if (Fn
&& Fn
->isIntrinsic()) {
2909 switch (Fn
->getIntrinsicID()) {
2911 llvm_unreachable("Cannot invoke this intrinsic");
2912 case Intrinsic::donothing
:
2913 // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2914 case Intrinsic::seh_try_begin
:
2915 case Intrinsic::seh_scope_begin
:
2916 case Intrinsic::seh_try_end
:
2917 case Intrinsic::seh_scope_end
:
2919 case Intrinsic::experimental_patchpoint_void
:
2920 case Intrinsic::experimental_patchpoint_i64
:
2921 visitPatchpoint(I
, EHPadBB
);
2923 case Intrinsic::experimental_gc_statepoint
:
2924 LowerStatepoint(cast
<GCStatepointInst
>(I
), EHPadBB
);
2926 case Intrinsic::wasm_rethrow
: {
2927 // This is usually done in visitTargetIntrinsic, but this intrinsic is
2928 // special because it can be invoked, so we manually lower it to a DAG
2930 SmallVector
<SDValue
, 8> Ops
;
2931 Ops
.push_back(getRoot()); // inchain
2932 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2934 DAG
.getTargetConstant(Intrinsic::wasm_rethrow
, getCurSDLoc(),
2935 TLI
.getPointerTy(DAG
.getDataLayout())));
2936 SDVTList VTs
= DAG
.getVTList(ArrayRef
<EVT
>({MVT::Other
})); // outchain
2937 DAG
.setRoot(DAG
.getNode(ISD::INTRINSIC_VOID
, getCurSDLoc(), VTs
, Ops
));
2941 } else if (I
.countOperandBundlesOfType(LLVMContext::OB_deopt
)) {
2942 // Currently we do not lower any intrinsic calls with deopt operand bundles.
2943 // Eventually we will support lowering the @llvm.experimental.deoptimize
2944 // intrinsic, and right now there are no plans to support other intrinsics
2945 // with deopt state.
2946 LowerCallSiteWithDeoptBundle(&I
, getValue(Callee
), EHPadBB
);
2948 LowerCallTo(I
, getValue(Callee
), false, false, EHPadBB
);
2951 // If the value of the invoke is used outside of its defining block, make it
2952 // available as a virtual register.
2953 // We already took care of the exported value for the statepoint instruction
2954 // during call to the LowerStatepoint.
2955 if (!isa
<GCStatepointInst
>(I
)) {
2956 CopyToExportRegsIfNeeded(&I
);
2959 SmallVector
<std::pair
<MachineBasicBlock
*, BranchProbability
>, 1> UnwindDests
;
2960 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
2961 BranchProbability EHPadBBProb
=
2962 BPI
? BPI
->getEdgeProbability(InvokeMBB
->getBasicBlock(), EHPadBB
)
2963 : BranchProbability::getZero();
2964 findUnwindDestinations(FuncInfo
, EHPadBB
, EHPadBBProb
, UnwindDests
);
2966 // Update successor info.
2967 addSuccessorWithProb(InvokeMBB
, Return
);
2968 for (auto &UnwindDest
: UnwindDests
) {
2969 UnwindDest
.first
->setIsEHPad();
2970 addSuccessorWithProb(InvokeMBB
, UnwindDest
.first
, UnwindDest
.second
);
2972 InvokeMBB
->normalizeSuccProbs();
2974 // Drop into normal successor.
2975 DAG
.setRoot(DAG
.getNode(ISD::BR
, getCurSDLoc(), MVT::Other
, getControlRoot(),
2976 DAG
.getBasicBlock(Return
)));
2979 void SelectionDAGBuilder::visitCallBr(const CallBrInst
&I
) {
2980 MachineBasicBlock
*CallBrMBB
= FuncInfo
.MBB
;
2982 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2983 // have to do anything here to lower funclet bundles.
2984 assert(!I
.hasOperandBundlesOtherThan(
2985 {LLVMContext::OB_deopt
, LLVMContext::OB_funclet
}) &&
2986 "Cannot lower callbrs with arbitrary operand bundles yet!");
2988 assert(I
.isInlineAsm() && "Only know how to handle inlineasm callbr");
2990 CopyToExportRegsIfNeeded(&I
);
2992 // Retrieve successors.
2993 MachineBasicBlock
*Return
= FuncInfo
.MBBMap
[I
.getDefaultDest()];
2995 // Update successor info.
2996 addSuccessorWithProb(CallBrMBB
, Return
, BranchProbability::getOne());
2997 for (unsigned i
= 0, e
= I
.getNumIndirectDests(); i
< e
; ++i
) {
2998 MachineBasicBlock
*Target
= FuncInfo
.MBBMap
[I
.getIndirectDest(i
)];
2999 addSuccessorWithProb(CallBrMBB
, Target
, BranchProbability::getZero());
3000 Target
->setIsInlineAsmBrIndirectTarget();
3002 CallBrMBB
->normalizeSuccProbs();
3004 // Drop into default successor.
3005 DAG
.setRoot(DAG
.getNode(ISD::BR
, getCurSDLoc(),
3006 MVT::Other
, getControlRoot(),
3007 DAG
.getBasicBlock(Return
)));
3010 void SelectionDAGBuilder::visitResume(const ResumeInst
&RI
) {
3011 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
3014 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst
&LP
) {
3015 assert(FuncInfo
.MBB
->isEHPad() &&
3016 "Call to landingpad not in landing pad!");
3018 // If there aren't registers to copy the values into (e.g., during SjLj
3019 // exceptions), then don't bother to create these DAG nodes.
3020 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3021 const Constant
*PersonalityFn
= FuncInfo
.Fn
->getPersonalityFn();
3022 if (TLI
.getExceptionPointerRegister(PersonalityFn
) == 0 &&
3023 TLI
.getExceptionSelectorRegister(PersonalityFn
) == 0)
3026 // If landingpad's return type is token type, we don't create DAG nodes
3027 // for its exception pointer and selector value. The extraction of exception
3028 // pointer or selector value from token type landingpads is not currently
3030 if (LP
.getType()->isTokenTy())
3033 SmallVector
<EVT
, 2> ValueVTs
;
3034 SDLoc dl
= getCurSDLoc();
3035 ComputeValueVTs(TLI
, DAG
.getDataLayout(), LP
.getType(), ValueVTs
);
3036 assert(ValueVTs
.size() == 2 && "Only two-valued landingpads are supported");
3038 // Get the two live-in registers as SDValues. The physregs have already been
3039 // copied into virtual registers.
3041 if (FuncInfo
.ExceptionPointerVirtReg
) {
3042 Ops
[0] = DAG
.getZExtOrTrunc(
3043 DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
3044 FuncInfo
.ExceptionPointerVirtReg
,
3045 TLI
.getPointerTy(DAG
.getDataLayout())),
3048 Ops
[0] = DAG
.getConstant(0, dl
, TLI
.getPointerTy(DAG
.getDataLayout()));
3050 Ops
[1] = DAG
.getZExtOrTrunc(
3051 DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
,
3052 FuncInfo
.ExceptionSelectorVirtReg
,
3053 TLI
.getPointerTy(DAG
.getDataLayout())),
3057 SDValue Res
= DAG
.getNode(ISD::MERGE_VALUES
, dl
,
3058 DAG
.getVTList(ValueVTs
), Ops
);
3062 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock
*First
,
3063 MachineBasicBlock
*Last
) {
3065 for (JumpTableBlock
&JTB
: SL
->JTCases
)
3066 if (JTB
.first
.HeaderBB
== First
)
3067 JTB
.first
.HeaderBB
= Last
;
3069 // Update BitTestCases.
3070 for (BitTestBlock
&BTB
: SL
->BitTestCases
)
3071 if (BTB
.Parent
== First
)
3075 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst
&I
) {
3076 MachineBasicBlock
*IndirectBrMBB
= FuncInfo
.MBB
;
3078 // Update machine-CFG edges with unique successors.
3079 SmallSet
<BasicBlock
*, 32> Done
;
3080 for (unsigned i
= 0, e
= I
.getNumSuccessors(); i
!= e
; ++i
) {
3081 BasicBlock
*BB
= I
.getSuccessor(i
);
3082 bool Inserted
= Done
.insert(BB
).second
;
3086 MachineBasicBlock
*Succ
= FuncInfo
.MBBMap
[BB
];
3087 addSuccessorWithProb(IndirectBrMBB
, Succ
);
3089 IndirectBrMBB
->normalizeSuccProbs();
3091 DAG
.setRoot(DAG
.getNode(ISD::BRIND
, getCurSDLoc(),
3092 MVT::Other
, getControlRoot(),
3093 getValue(I
.getAddress())));
3096 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst
&I
) {
3097 if (!DAG
.getTarget().Options
.TrapUnreachable
)
3100 // We may be able to ignore unreachable behind a noreturn call.
3101 if (DAG
.getTarget().Options
.NoTrapAfterNoreturn
) {
3102 const BasicBlock
&BB
= *I
.getParent();
3103 if (&I
!= &BB
.front()) {
3104 BasicBlock::const_iterator PredI
=
3105 std::prev(BasicBlock::const_iterator(&I
));
3106 if (const CallInst
*Call
= dyn_cast
<CallInst
>(&*PredI
)) {
3107 if (Call
->doesNotReturn())
3113 DAG
.setRoot(DAG
.getNode(ISD::TRAP
, getCurSDLoc(), MVT::Other
, DAG
.getRoot()));
3116 void SelectionDAGBuilder::visitUnary(const User
&I
, unsigned Opcode
) {
3118 if (auto *FPOp
= dyn_cast
<FPMathOperator
>(&I
))
3119 Flags
.copyFMF(*FPOp
);
3121 SDValue Op
= getValue(I
.getOperand(0));
3122 SDValue UnNodeValue
= DAG
.getNode(Opcode
, getCurSDLoc(), Op
.getValueType(),
3124 setValue(&I
, UnNodeValue
);
3127 void SelectionDAGBuilder::visitBinary(const User
&I
, unsigned Opcode
) {
3129 if (auto *OFBinOp
= dyn_cast
<OverflowingBinaryOperator
>(&I
)) {
3130 Flags
.setNoSignedWrap(OFBinOp
->hasNoSignedWrap());
3131 Flags
.setNoUnsignedWrap(OFBinOp
->hasNoUnsignedWrap());
3133 if (auto *ExactOp
= dyn_cast
<PossiblyExactOperator
>(&I
))
3134 Flags
.setExact(ExactOp
->isExact());
3135 if (auto *FPOp
= dyn_cast
<FPMathOperator
>(&I
))
3136 Flags
.copyFMF(*FPOp
);
3138 SDValue Op1
= getValue(I
.getOperand(0));
3139 SDValue Op2
= getValue(I
.getOperand(1));
3140 SDValue BinNodeValue
= DAG
.getNode(Opcode
, getCurSDLoc(), Op1
.getValueType(),
3142 setValue(&I
, BinNodeValue
);
3145 void SelectionDAGBuilder::visitShift(const User
&I
, unsigned Opcode
) {
3146 SDValue Op1
= getValue(I
.getOperand(0));
3147 SDValue Op2
= getValue(I
.getOperand(1));
3149 EVT ShiftTy
= DAG
.getTargetLoweringInfo().getShiftAmountTy(
3150 Op1
.getValueType(), DAG
.getDataLayout());
3152 // Coerce the shift amount to the right type if we can. This exposes the
3153 // truncate or zext to optimization early.
3154 if (!I
.getType()->isVectorTy() && Op2
.getValueType() != ShiftTy
) {
3155 assert(ShiftTy
.getSizeInBits() >= Log2_32_Ceil(Op1
.getValueSizeInBits()) &&
3156 "Unexpected shift type");
3157 Op2
= DAG
.getZExtOrTrunc(Op2
, getCurSDLoc(), ShiftTy
);
3164 if (Opcode
== ISD::SRL
|| Opcode
== ISD::SRA
|| Opcode
== ISD::SHL
) {
3166 if (const OverflowingBinaryOperator
*OFBinOp
=
3167 dyn_cast
<const OverflowingBinaryOperator
>(&I
)) {
3168 nuw
= OFBinOp
->hasNoUnsignedWrap();
3169 nsw
= OFBinOp
->hasNoSignedWrap();
3171 if (const PossiblyExactOperator
*ExactOp
=
3172 dyn_cast
<const PossiblyExactOperator
>(&I
))
3173 exact
= ExactOp
->isExact();
3176 Flags
.setExact(exact
);
3177 Flags
.setNoSignedWrap(nsw
);
3178 Flags
.setNoUnsignedWrap(nuw
);
3179 SDValue Res
= DAG
.getNode(Opcode
, getCurSDLoc(), Op1
.getValueType(), Op1
, Op2
,
3184 void SelectionDAGBuilder::visitSDiv(const User
&I
) {
3185 SDValue Op1
= getValue(I
.getOperand(0));
3186 SDValue Op2
= getValue(I
.getOperand(1));
3189 Flags
.setExact(isa
<PossiblyExactOperator
>(&I
) &&
3190 cast
<PossiblyExactOperator
>(&I
)->isExact());
3191 setValue(&I
, DAG
.getNode(ISD::SDIV
, getCurSDLoc(), Op1
.getValueType(), Op1
,
3195 void SelectionDAGBuilder::visitICmp(const User
&I
) {
3196 ICmpInst::Predicate predicate
= ICmpInst::BAD_ICMP_PREDICATE
;
3197 if (const ICmpInst
*IC
= dyn_cast
<ICmpInst
>(&I
))
3198 predicate
= IC
->getPredicate();
3199 else if (const ConstantExpr
*IC
= dyn_cast
<ConstantExpr
>(&I
))
3200 predicate
= ICmpInst::Predicate(IC
->getPredicate());
3201 SDValue Op1
= getValue(I
.getOperand(0));
3202 SDValue Op2
= getValue(I
.getOperand(1));
3203 ISD::CondCode Opcode
= getICmpCondCode(predicate
);
3205 auto &TLI
= DAG
.getTargetLoweringInfo();
3207 TLI
.getMemValueType(DAG
.getDataLayout(), I
.getOperand(0)->getType());
3209 // If a pointer's DAG type is larger than its memory type then the DAG values
3210 // are zero-extended. This breaks signed comparisons so truncate back to the
3211 // underlying type before doing the compare.
3212 if (Op1
.getValueType() != MemVT
) {
3213 Op1
= DAG
.getPtrExtOrTrunc(Op1
, getCurSDLoc(), MemVT
);
3214 Op2
= DAG
.getPtrExtOrTrunc(Op2
, getCurSDLoc(), MemVT
);
3217 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3219 setValue(&I
, DAG
.getSetCC(getCurSDLoc(), DestVT
, Op1
, Op2
, Opcode
));
3222 void SelectionDAGBuilder::visitFCmp(const User
&I
) {
3223 FCmpInst::Predicate predicate
= FCmpInst::BAD_FCMP_PREDICATE
;
3224 if (const FCmpInst
*FC
= dyn_cast
<FCmpInst
>(&I
))
3225 predicate
= FC
->getPredicate();
3226 else if (const ConstantExpr
*FC
= dyn_cast
<ConstantExpr
>(&I
))
3227 predicate
= FCmpInst::Predicate(FC
->getPredicate());
3228 SDValue Op1
= getValue(I
.getOperand(0));
3229 SDValue Op2
= getValue(I
.getOperand(1));
3231 ISD::CondCode Condition
= getFCmpCondCode(predicate
);
3232 auto *FPMO
= cast
<FPMathOperator
>(&I
);
3233 if (FPMO
->hasNoNaNs() || TM
.Options
.NoNaNsFPMath
)
3234 Condition
= getFCmpCodeWithoutNaN(Condition
);
3237 Flags
.copyFMF(*FPMO
);
3238 SelectionDAG::FlagInserter
FlagsInserter(DAG
, Flags
);
3240 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3242 setValue(&I
, DAG
.getSetCC(getCurSDLoc(), DestVT
, Op1
, Op2
, Condition
));
3245 // Check if the condition of the select has one use or two users that are both
3246 // selects with the same condition.
3247 static bool hasOnlySelectUsers(const Value
*Cond
) {
3248 return llvm::all_of(Cond
->users(), [](const Value
*V
) {
3249 return isa
<SelectInst
>(V
);
3253 void SelectionDAGBuilder::visitSelect(const User
&I
) {
3254 SmallVector
<EVT
, 4> ValueVTs
;
3255 ComputeValueVTs(DAG
.getTargetLoweringInfo(), DAG
.getDataLayout(), I
.getType(),
3257 unsigned NumValues
= ValueVTs
.size();
3258 if (NumValues
== 0) return;
3260 SmallVector
<SDValue
, 4> Values(NumValues
);
3261 SDValue Cond
= getValue(I
.getOperand(0));
3262 SDValue LHSVal
= getValue(I
.getOperand(1));
3263 SDValue RHSVal
= getValue(I
.getOperand(2));
3264 SmallVector
<SDValue
, 1> BaseOps(1, Cond
);
3265 ISD::NodeType OpCode
=
3266 Cond
.getValueType().isVector() ? ISD::VSELECT
: ISD::SELECT
;
3268 bool IsUnaryAbs
= false;
3269 bool Negate
= false;
3272 if (auto *FPOp
= dyn_cast
<FPMathOperator
>(&I
))
3273 Flags
.copyFMF(*FPOp
);
3275 // Min/max matching is only viable if all output VTs are the same.
3276 if (is_splat(ValueVTs
)) {
3277 EVT VT
= ValueVTs
[0];
3278 LLVMContext
&Ctx
= *DAG
.getContext();
3279 auto &TLI
= DAG
.getTargetLoweringInfo();
3281 // We care about the legality of the operation after it has been type
3283 while (TLI
.getTypeAction(Ctx
, VT
) != TargetLoweringBase::TypeLegal
)
3284 VT
= TLI
.getTypeToTransformTo(Ctx
, VT
);
3286 // If the vselect is legal, assume we want to leave this as a vector setcc +
3287 // vselect. Otherwise, if this is going to be scalarized, we want to see if
3288 // min/max is legal on the scalar type.
3289 bool UseScalarMinMax
= VT
.isVector() &&
3290 !TLI
.isOperationLegalOrCustom(ISD::VSELECT
, VT
);
3293 auto SPR
= matchSelectPattern(const_cast<User
*>(&I
), LHS
, RHS
);
3294 ISD::NodeType Opc
= ISD::DELETED_NODE
;
3295 switch (SPR
.Flavor
) {
3296 case SPF_UMAX
: Opc
= ISD::UMAX
; break;
3297 case SPF_UMIN
: Opc
= ISD::UMIN
; break;
3298 case SPF_SMAX
: Opc
= ISD::SMAX
; break;
3299 case SPF_SMIN
: Opc
= ISD::SMIN
; break;
3301 switch (SPR
.NaNBehavior
) {
3302 case SPNB_NA
: llvm_unreachable("No NaN behavior for FP op?");
3303 case SPNB_RETURNS_NAN
: Opc
= ISD::FMINIMUM
; break;
3304 case SPNB_RETURNS_OTHER
: Opc
= ISD::FMINNUM
; break;
3305 case SPNB_RETURNS_ANY
: {
3306 if (TLI
.isOperationLegalOrCustom(ISD::FMINNUM
, VT
))
3308 else if (TLI
.isOperationLegalOrCustom(ISD::FMINIMUM
, VT
))
3309 Opc
= ISD::FMINIMUM
;
3310 else if (UseScalarMinMax
)
3311 Opc
= TLI
.isOperationLegalOrCustom(ISD::FMINNUM
, VT
.getScalarType()) ?
3312 ISD::FMINNUM
: ISD::FMINIMUM
;
3318 switch (SPR
.NaNBehavior
) {
3319 case SPNB_NA
: llvm_unreachable("No NaN behavior for FP op?");
3320 case SPNB_RETURNS_NAN
: Opc
= ISD::FMAXIMUM
; break;
3321 case SPNB_RETURNS_OTHER
: Opc
= ISD::FMAXNUM
; break;
3322 case SPNB_RETURNS_ANY
:
3324 if (TLI
.isOperationLegalOrCustom(ISD::FMAXNUM
, VT
))
3326 else if (TLI
.isOperationLegalOrCustom(ISD::FMAXIMUM
, VT
))
3327 Opc
= ISD::FMAXIMUM
;
3328 else if (UseScalarMinMax
)
3329 Opc
= TLI
.isOperationLegalOrCustom(ISD::FMAXNUM
, VT
.getScalarType()) ?
3330 ISD::FMAXNUM
: ISD::FMAXIMUM
;
3344 if (!IsUnaryAbs
&& Opc
!= ISD::DELETED_NODE
&&
3345 (TLI
.isOperationLegalOrCustom(Opc
, VT
) ||
3347 TLI
.isOperationLegalOrCustom(Opc
, VT
.getScalarType()))) &&
3348 // If the underlying comparison instruction is used by any other
3349 // instruction, the consumed instructions won't be destroyed, so it is
3350 // not profitable to convert to a min/max.
3351 hasOnlySelectUsers(cast
<SelectInst
>(I
).getCondition())) {
3353 LHSVal
= getValue(LHS
);
3354 RHSVal
= getValue(RHS
);
3360 LHSVal
= getValue(LHS
);
3366 for (unsigned i
= 0; i
!= NumValues
; ++i
) {
3367 SDLoc dl
= getCurSDLoc();
3368 EVT VT
= LHSVal
.getNode()->getValueType(LHSVal
.getResNo() + i
);
3370 DAG
.getNode(OpCode
, dl
, VT
, LHSVal
.getValue(LHSVal
.getResNo() + i
));
3372 Values
[i
] = DAG
.getNode(ISD::SUB
, dl
, VT
, DAG
.getConstant(0, dl
, VT
),
3376 for (unsigned i
= 0; i
!= NumValues
; ++i
) {
3377 SmallVector
<SDValue
, 3> Ops(BaseOps
.begin(), BaseOps
.end());
3378 Ops
.push_back(SDValue(LHSVal
.getNode(), LHSVal
.getResNo() + i
));
3379 Ops
.push_back(SDValue(RHSVal
.getNode(), RHSVal
.getResNo() + i
));
3380 Values
[i
] = DAG
.getNode(
3381 OpCode
, getCurSDLoc(),
3382 LHSVal
.getNode()->getValueType(LHSVal
.getResNo() + i
), Ops
, Flags
);
3386 setValue(&I
, DAG
.getNode(ISD::MERGE_VALUES
, getCurSDLoc(),
3387 DAG
.getVTList(ValueVTs
), Values
));
3390 void SelectionDAGBuilder::visitTrunc(const User
&I
) {
3391 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3392 SDValue N
= getValue(I
.getOperand(0));
3393 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3395 setValue(&I
, DAG
.getNode(ISD::TRUNCATE
, getCurSDLoc(), DestVT
, N
));
3398 void SelectionDAGBuilder::visitZExt(const User
&I
) {
3399 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3400 // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3401 SDValue N
= getValue(I
.getOperand(0));
3402 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3404 setValue(&I
, DAG
.getNode(ISD::ZERO_EXTEND
, getCurSDLoc(), DestVT
, N
));
3407 void SelectionDAGBuilder::visitSExt(const User
&I
) {
3408 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3409 // SExt also can't be a cast to bool for same reason. So, nothing much to do
3410 SDValue N
= getValue(I
.getOperand(0));
3411 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3413 setValue(&I
, DAG
.getNode(ISD::SIGN_EXTEND
, getCurSDLoc(), DestVT
, N
));
3416 void SelectionDAGBuilder::visitFPTrunc(const User
&I
) {
3417 // FPTrunc is never a no-op cast, no need to check
3418 SDValue N
= getValue(I
.getOperand(0));
3419 SDLoc dl
= getCurSDLoc();
3420 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3421 EVT DestVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
3422 setValue(&I
, DAG
.getNode(ISD::FP_ROUND
, dl
, DestVT
, N
,
3423 DAG
.getTargetConstant(
3424 0, dl
, TLI
.getPointerTy(DAG
.getDataLayout()))));
3427 void SelectionDAGBuilder::visitFPExt(const User
&I
) {
3428 // FPExt is never a no-op cast, no need to check
3429 SDValue N
= getValue(I
.getOperand(0));
3430 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3432 setValue(&I
, DAG
.getNode(ISD::FP_EXTEND
, getCurSDLoc(), DestVT
, N
));
3435 void SelectionDAGBuilder::visitFPToUI(const User
&I
) {
3436 // FPToUI is never a no-op cast, no need to check
3437 SDValue N
= getValue(I
.getOperand(0));
3438 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3440 setValue(&I
, DAG
.getNode(ISD::FP_TO_UINT
, getCurSDLoc(), DestVT
, N
));
3443 void SelectionDAGBuilder::visitFPToSI(const User
&I
) {
3444 // FPToSI is never a no-op cast, no need to check
3445 SDValue N
= getValue(I
.getOperand(0));
3446 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3448 setValue(&I
, DAG
.getNode(ISD::FP_TO_SINT
, getCurSDLoc(), DestVT
, N
));
3451 void SelectionDAGBuilder::visitUIToFP(const User
&I
) {
3452 // UIToFP is never a no-op cast, no need to check
3453 SDValue N
= getValue(I
.getOperand(0));
3454 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3456 setValue(&I
, DAG
.getNode(ISD::UINT_TO_FP
, getCurSDLoc(), DestVT
, N
));
3459 void SelectionDAGBuilder::visitSIToFP(const User
&I
) {
3460 // SIToFP is never a no-op cast, no need to check
3461 SDValue N
= getValue(I
.getOperand(0));
3462 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3464 setValue(&I
, DAG
.getNode(ISD::SINT_TO_FP
, getCurSDLoc(), DestVT
, N
));
3467 void SelectionDAGBuilder::visitPtrToInt(const User
&I
) {
3468 // What to do depends on the size of the integer and the size of the pointer.
3469 // We can either truncate, zero extend, or no-op, accordingly.
3470 SDValue N
= getValue(I
.getOperand(0));
3471 auto &TLI
= DAG
.getTargetLoweringInfo();
3472 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3475 TLI
.getMemValueType(DAG
.getDataLayout(), I
.getOperand(0)->getType());
3476 N
= DAG
.getPtrExtOrTrunc(N
, getCurSDLoc(), PtrMemVT
);
3477 N
= DAG
.getZExtOrTrunc(N
, getCurSDLoc(), DestVT
);
3481 void SelectionDAGBuilder::visitIntToPtr(const User
&I
) {
3482 // What to do depends on the size of the integer and the size of the pointer.
3483 // We can either truncate, zero extend, or no-op, accordingly.
3484 SDValue N
= getValue(I
.getOperand(0));
3485 auto &TLI
= DAG
.getTargetLoweringInfo();
3486 EVT DestVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
3487 EVT PtrMemVT
= TLI
.getMemValueType(DAG
.getDataLayout(), I
.getType());
3488 N
= DAG
.getZExtOrTrunc(N
, getCurSDLoc(), PtrMemVT
);
3489 N
= DAG
.getPtrExtOrTrunc(N
, getCurSDLoc(), DestVT
);
3493 void SelectionDAGBuilder::visitBitCast(const User
&I
) {
3494 SDValue N
= getValue(I
.getOperand(0));
3495 SDLoc dl
= getCurSDLoc();
3496 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
3499 // BitCast assures us that source and destination are the same size so this is
3500 // either a BITCAST or a no-op.
3501 if (DestVT
!= N
.getValueType())
3502 setValue(&I
, DAG
.getNode(ISD::BITCAST
, dl
,
3503 DestVT
, N
)); // convert types.
3504 // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3505 // might fold any kind of constant expression to an integer constant and that
3506 // is not what we are looking for. Only recognize a bitcast of a genuine
3507 // constant integer as an opaque constant.
3508 else if(ConstantInt
*C
= dyn_cast
<ConstantInt
>(I
.getOperand(0)))
3509 setValue(&I
, DAG
.getConstant(C
->getValue(), dl
, DestVT
, /*isTarget=*/false,
3512 setValue(&I
, N
); // noop cast.
3515 void SelectionDAGBuilder::visitAddrSpaceCast(const User
&I
) {
3516 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3517 const Value
*SV
= I
.getOperand(0);
3518 SDValue N
= getValue(SV
);
3519 EVT DestVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
3521 unsigned SrcAS
= SV
->getType()->getPointerAddressSpace();
3522 unsigned DestAS
= I
.getType()->getPointerAddressSpace();
3524 if (!TM
.isNoopAddrSpaceCast(SrcAS
, DestAS
))
3525 N
= DAG
.getAddrSpaceCast(getCurSDLoc(), DestVT
, N
, SrcAS
, DestAS
);
3530 void SelectionDAGBuilder::visitInsertElement(const User
&I
) {
3531 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3532 SDValue InVec
= getValue(I
.getOperand(0));
3533 SDValue InVal
= getValue(I
.getOperand(1));
3534 SDValue InIdx
= DAG
.getSExtOrTrunc(getValue(I
.getOperand(2)), getCurSDLoc(),
3535 TLI
.getVectorIdxTy(DAG
.getDataLayout()));
3536 setValue(&I
, DAG
.getNode(ISD::INSERT_VECTOR_ELT
, getCurSDLoc(),
3537 TLI
.getValueType(DAG
.getDataLayout(), I
.getType()),
3538 InVec
, InVal
, InIdx
));
3541 void SelectionDAGBuilder::visitExtractElement(const User
&I
) {
3542 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3543 SDValue InVec
= getValue(I
.getOperand(0));
3544 SDValue InIdx
= DAG
.getSExtOrTrunc(getValue(I
.getOperand(1)), getCurSDLoc(),
3545 TLI
.getVectorIdxTy(DAG
.getDataLayout()));
3546 setValue(&I
, DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, getCurSDLoc(),
3547 TLI
.getValueType(DAG
.getDataLayout(), I
.getType()),
3551 void SelectionDAGBuilder::visitShuffleVector(const User
&I
) {
3552 SDValue Src1
= getValue(I
.getOperand(0));
3553 SDValue Src2
= getValue(I
.getOperand(1));
3555 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&I
))
3556 Mask
= SVI
->getShuffleMask();
3558 Mask
= cast
<ConstantExpr
>(I
).getShuffleMask();
3559 SDLoc DL
= getCurSDLoc();
3560 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3561 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
3562 EVT SrcVT
= Src1
.getValueType();
3564 if (all_of(Mask
, [](int Elem
) { return Elem
== 0; }) &&
3565 VT
.isScalableVector()) {
3566 // Canonical splat form of first element of first input vector.
3568 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, SrcVT
.getScalarType(), Src1
,
3569 DAG
.getVectorIdxConstant(0, DL
));
3570 setValue(&I
, DAG
.getNode(ISD::SPLAT_VECTOR
, DL
, VT
, FirstElt
));
3574 // For now, we only handle splats for scalable vectors.
3575 // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
3576 // for targets that support a SPLAT_VECTOR for non-scalable vector types.
3577 assert(!VT
.isScalableVector() && "Unsupported scalable vector shuffle");
3579 unsigned SrcNumElts
= SrcVT
.getVectorNumElements();
3580 unsigned MaskNumElts
= Mask
.size();
3582 if (SrcNumElts
== MaskNumElts
) {
3583 setValue(&I
, DAG
.getVectorShuffle(VT
, DL
, Src1
, Src2
, Mask
));
3587 // Normalize the shuffle vector since mask and vector length don't match.
3588 if (SrcNumElts
< MaskNumElts
) {
3589 // Mask is longer than the source vectors. We can use concatenate vector to
3590 // make the mask and vectors lengths match.
3592 if (MaskNumElts
% SrcNumElts
== 0) {
3593 // Mask length is a multiple of the source vector length.
3594 // Check if the shuffle is some kind of concatenation of the input
3596 unsigned NumConcat
= MaskNumElts
/ SrcNumElts
;
3597 bool IsConcat
= true;
3598 SmallVector
<int, 8> ConcatSrcs(NumConcat
, -1);
3599 for (unsigned i
= 0; i
!= MaskNumElts
; ++i
) {
3603 // Ensure the indices in each SrcVT sized piece are sequential and that
3604 // the same source is used for the whole piece.
3605 if ((Idx
% SrcNumElts
!= (i
% SrcNumElts
)) ||
3606 (ConcatSrcs
[i
/ SrcNumElts
] >= 0 &&
3607 ConcatSrcs
[i
/ SrcNumElts
] != (int)(Idx
/ SrcNumElts
))) {
3611 // Remember which source this index came from.
3612 ConcatSrcs
[i
/ SrcNumElts
] = Idx
/ SrcNumElts
;
3615 // The shuffle is concatenating multiple vectors together. Just emit
3616 // a CONCAT_VECTORS operation.
3618 SmallVector
<SDValue
, 8> ConcatOps
;
3619 for (auto Src
: ConcatSrcs
) {
3621 ConcatOps
.push_back(DAG
.getUNDEF(SrcVT
));
3623 ConcatOps
.push_back(Src1
);
3625 ConcatOps
.push_back(Src2
);
3627 setValue(&I
, DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, ConcatOps
));
3632 unsigned PaddedMaskNumElts
= alignTo(MaskNumElts
, SrcNumElts
);
3633 unsigned NumConcat
= PaddedMaskNumElts
/ SrcNumElts
;
3634 EVT PaddedVT
= EVT::getVectorVT(*DAG
.getContext(), VT
.getScalarType(),
3637 // Pad both vectors with undefs to make them the same length as the mask.
3638 SDValue UndefVal
= DAG
.getUNDEF(SrcVT
);
3640 SmallVector
<SDValue
, 8> MOps1(NumConcat
, UndefVal
);
3641 SmallVector
<SDValue
, 8> MOps2(NumConcat
, UndefVal
);
3645 Src1
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, PaddedVT
, MOps1
);
3646 Src2
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, PaddedVT
, MOps2
);
3648 // Readjust mask for new input vector length.
3649 SmallVector
<int, 8> MappedOps(PaddedMaskNumElts
, -1);
3650 for (unsigned i
= 0; i
!= MaskNumElts
; ++i
) {
3652 if (Idx
>= (int)SrcNumElts
)
3653 Idx
-= SrcNumElts
- PaddedMaskNumElts
;
3657 SDValue Result
= DAG
.getVectorShuffle(PaddedVT
, DL
, Src1
, Src2
, MappedOps
);
3659 // If the concatenated vector was padded, extract a subvector with the
3660 // correct number of elements.
3661 if (MaskNumElts
!= PaddedMaskNumElts
)
3662 Result
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Result
,
3663 DAG
.getVectorIdxConstant(0, DL
));
3665 setValue(&I
, Result
);
3669 if (SrcNumElts
> MaskNumElts
) {
3670 // Analyze the access pattern of the vector to see if we can extract
3671 // two subvectors and do the shuffle.
3672 int StartIdx
[2] = { -1, -1 }; // StartIdx to extract from
3673 bool CanExtract
= true;
3674 for (int Idx
: Mask
) {
3679 if (Idx
>= (int)SrcNumElts
) {
3684 // If all the indices come from the same MaskNumElts sized portion of
3685 // the sources we can use extract. Also make sure the extract wouldn't
3686 // extract past the end of the source.
3687 int NewStartIdx
= alignDown(Idx
, MaskNumElts
);
3688 if (NewStartIdx
+ MaskNumElts
> SrcNumElts
||
3689 (StartIdx
[Input
] >= 0 && StartIdx
[Input
] != NewStartIdx
))
3691 // Make sure we always update StartIdx as we use it to track if all
3692 // elements are undef.
3693 StartIdx
[Input
] = NewStartIdx
;
3696 if (StartIdx
[0] < 0 && StartIdx
[1] < 0) {
3697 setValue(&I
, DAG
.getUNDEF(VT
)); // Vectors are not used.
3701 // Extract appropriate subvector and generate a vector shuffle
3702 for (unsigned Input
= 0; Input
< 2; ++Input
) {
3703 SDValue
&Src
= Input
== 0 ? Src1
: Src2
;
3704 if (StartIdx
[Input
] < 0)
3705 Src
= DAG
.getUNDEF(VT
);
3707 Src
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Src
,
3708 DAG
.getVectorIdxConstant(StartIdx
[Input
], DL
));
3712 // Calculate new mask.
3713 SmallVector
<int, 8> MappedOps(Mask
.begin(), Mask
.end());
3714 for (int &Idx
: MappedOps
) {
3715 if (Idx
>= (int)SrcNumElts
)
3716 Idx
-= SrcNumElts
+ StartIdx
[1] - MaskNumElts
;
3721 setValue(&I
, DAG
.getVectorShuffle(VT
, DL
, Src1
, Src2
, MappedOps
));
3726 // We can't use either concat vectors or extract subvectors so fall back to
3727 // replacing the shuffle with extract and build vector.
3728 // to insert and build vector.
3729 EVT EltVT
= VT
.getVectorElementType();
3730 SmallVector
<SDValue
,8> Ops
;
3731 for (int Idx
: Mask
) {
3735 Res
= DAG
.getUNDEF(EltVT
);
3737 SDValue
&Src
= Idx
< (int)SrcNumElts
? Src1
: Src2
;
3738 if (Idx
>= (int)SrcNumElts
) Idx
-= SrcNumElts
;
3740 Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Src
,
3741 DAG
.getVectorIdxConstant(Idx
, DL
));
3747 setValue(&I
, DAG
.getBuildVector(VT
, DL
, Ops
));
3750 void SelectionDAGBuilder::visitInsertValue(const User
&I
) {
3751 ArrayRef
<unsigned> Indices
;
3752 if (const InsertValueInst
*IV
= dyn_cast
<InsertValueInst
>(&I
))
3753 Indices
= IV
->getIndices();
3755 Indices
= cast
<ConstantExpr
>(&I
)->getIndices();
3757 const Value
*Op0
= I
.getOperand(0);
3758 const Value
*Op1
= I
.getOperand(1);
3759 Type
*AggTy
= I
.getType();
3760 Type
*ValTy
= Op1
->getType();
3761 bool IntoUndef
= isa
<UndefValue
>(Op0
);
3762 bool FromUndef
= isa
<UndefValue
>(Op1
);
3764 unsigned LinearIndex
= ComputeLinearIndex(AggTy
, Indices
);
3766 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3767 SmallVector
<EVT
, 4> AggValueVTs
;
3768 ComputeValueVTs(TLI
, DAG
.getDataLayout(), AggTy
, AggValueVTs
);
3769 SmallVector
<EVT
, 4> ValValueVTs
;
3770 ComputeValueVTs(TLI
, DAG
.getDataLayout(), ValTy
, ValValueVTs
);
3772 unsigned NumAggValues
= AggValueVTs
.size();
3773 unsigned NumValValues
= ValValueVTs
.size();
3774 SmallVector
<SDValue
, 4> Values(NumAggValues
);
3776 // Ignore an insertvalue that produces an empty object
3777 if (!NumAggValues
) {
3778 setValue(&I
, DAG
.getUNDEF(MVT(MVT::Other
)));
3782 SDValue Agg
= getValue(Op0
);
3784 // Copy the beginning value(s) from the original aggregate.
3785 for (; i
!= LinearIndex
; ++i
)
3786 Values
[i
] = IntoUndef
? DAG
.getUNDEF(AggValueVTs
[i
]) :
3787 SDValue(Agg
.getNode(), Agg
.getResNo() + i
);
3788 // Copy values from the inserted value(s).
3790 SDValue Val
= getValue(Op1
);
3791 for (; i
!= LinearIndex
+ NumValValues
; ++i
)
3792 Values
[i
] = FromUndef
? DAG
.getUNDEF(AggValueVTs
[i
]) :
3793 SDValue(Val
.getNode(), Val
.getResNo() + i
- LinearIndex
);
3795 // Copy remaining value(s) from the original aggregate.
3796 for (; i
!= NumAggValues
; ++i
)
3797 Values
[i
] = IntoUndef
? DAG
.getUNDEF(AggValueVTs
[i
]) :
3798 SDValue(Agg
.getNode(), Agg
.getResNo() + i
);
3800 setValue(&I
, DAG
.getNode(ISD::MERGE_VALUES
, getCurSDLoc(),
3801 DAG
.getVTList(AggValueVTs
), Values
));
3804 void SelectionDAGBuilder::visitExtractValue(const User
&I
) {
3805 ArrayRef
<unsigned> Indices
;
3806 if (const ExtractValueInst
*EV
= dyn_cast
<ExtractValueInst
>(&I
))
3807 Indices
= EV
->getIndices();
3809 Indices
= cast
<ConstantExpr
>(&I
)->getIndices();
3811 const Value
*Op0
= I
.getOperand(0);
3812 Type
*AggTy
= Op0
->getType();
3813 Type
*ValTy
= I
.getType();
3814 bool OutOfUndef
= isa
<UndefValue
>(Op0
);
3816 unsigned LinearIndex
= ComputeLinearIndex(AggTy
, Indices
);
3818 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3819 SmallVector
<EVT
, 4> ValValueVTs
;
3820 ComputeValueVTs(TLI
, DAG
.getDataLayout(), ValTy
, ValValueVTs
);
3822 unsigned NumValValues
= ValValueVTs
.size();
3824 // Ignore a extractvalue that produces an empty object
3825 if (!NumValValues
) {
3826 setValue(&I
, DAG
.getUNDEF(MVT(MVT::Other
)));
3830 SmallVector
<SDValue
, 4> Values(NumValValues
);
3832 SDValue Agg
= getValue(Op0
);
3833 // Copy out the selected value(s).
3834 for (unsigned i
= LinearIndex
; i
!= LinearIndex
+ NumValValues
; ++i
)
3835 Values
[i
- LinearIndex
] =
3837 DAG
.getUNDEF(Agg
.getNode()->getValueType(Agg
.getResNo() + i
)) :
3838 SDValue(Agg
.getNode(), Agg
.getResNo() + i
);
3840 setValue(&I
, DAG
.getNode(ISD::MERGE_VALUES
, getCurSDLoc(),
3841 DAG
.getVTList(ValValueVTs
), Values
));
3844 void SelectionDAGBuilder::visitGetElementPtr(const User
&I
) {
3845 Value
*Op0
= I
.getOperand(0);
3846 // Note that the pointer operand may be a vector of pointers. Take the scalar
3847 // element which holds a pointer.
3848 unsigned AS
= Op0
->getType()->getScalarType()->getPointerAddressSpace();
3849 SDValue N
= getValue(Op0
);
3850 SDLoc dl
= getCurSDLoc();
3851 auto &TLI
= DAG
.getTargetLoweringInfo();
3853 // Normalize Vector GEP - all scalar operands should be converted to the
3855 bool IsVectorGEP
= I
.getType()->isVectorTy();
3856 ElementCount VectorElementCount
=
3857 IsVectorGEP
? cast
<VectorType
>(I
.getType())->getElementCount()
3858 : ElementCount::getFixed(0);
3860 if (IsVectorGEP
&& !N
.getValueType().isVector()) {
3861 LLVMContext
&Context
= *DAG
.getContext();
3862 EVT VT
= EVT::getVectorVT(Context
, N
.getValueType(), VectorElementCount
);
3863 if (VectorElementCount
.isScalable())
3864 N
= DAG
.getSplatVector(VT
, dl
, N
);
3866 N
= DAG
.getSplatBuildVector(VT
, dl
, N
);
3869 for (gep_type_iterator GTI
= gep_type_begin(&I
), E
= gep_type_end(&I
);
3871 const Value
*Idx
= GTI
.getOperand();
3872 if (StructType
*StTy
= GTI
.getStructTypeOrNull()) {
3873 unsigned Field
= cast
<Constant
>(Idx
)->getUniqueInteger().getZExtValue();
3877 DAG
.getDataLayout().getStructLayout(StTy
)->getElementOffset(Field
);
3879 // In an inbounds GEP with an offset that is nonnegative even when
3880 // interpreted as signed, assume there is no unsigned overflow.
3882 if (int64_t(Offset
) >= 0 && cast
<GEPOperator
>(I
).isInBounds())
3883 Flags
.setNoUnsignedWrap(true);
3885 N
= DAG
.getNode(ISD::ADD
, dl
, N
.getValueType(), N
,
3886 DAG
.getConstant(Offset
, dl
, N
.getValueType()), Flags
);
3889 // IdxSize is the width of the arithmetic according to IR semantics.
3890 // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
3891 // (and fix up the result later).
3892 unsigned IdxSize
= DAG
.getDataLayout().getIndexSizeInBits(AS
);
3893 MVT IdxTy
= MVT::getIntegerVT(IdxSize
);
3894 TypeSize ElementSize
=
3895 DAG
.getDataLayout().getTypeAllocSize(GTI
.getIndexedType());
3896 // We intentionally mask away the high bits here; ElementSize may not
3898 APInt
ElementMul(IdxSize
, ElementSize
.getKnownMinSize());
3899 bool ElementScalable
= ElementSize
.isScalable();
3901 // If this is a scalar constant or a splat vector of constants,
3902 // handle it quickly.
3903 const auto *C
= dyn_cast
<Constant
>(Idx
);
3904 if (C
&& isa
<VectorType
>(C
->getType()))
3905 C
= C
->getSplatValue();
3907 const auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
);
3908 if (CI
&& CI
->isZero())
3910 if (CI
&& !ElementScalable
) {
3911 APInt Offs
= ElementMul
* CI
->getValue().sextOrTrunc(IdxSize
);
3912 LLVMContext
&Context
= *DAG
.getContext();
3915 OffsVal
= DAG
.getConstant(
3916 Offs
, dl
, EVT::getVectorVT(Context
, IdxTy
, VectorElementCount
));
3918 OffsVal
= DAG
.getConstant(Offs
, dl
, IdxTy
);
3920 // In an inbounds GEP with an offset that is nonnegative even when
3921 // interpreted as signed, assume there is no unsigned overflow.
3923 if (Offs
.isNonNegative() && cast
<GEPOperator
>(I
).isInBounds())
3924 Flags
.setNoUnsignedWrap(true);
3926 OffsVal
= DAG
.getSExtOrTrunc(OffsVal
, dl
, N
.getValueType());
3928 N
= DAG
.getNode(ISD::ADD
, dl
, N
.getValueType(), N
, OffsVal
, Flags
);
3932 // N = N + Idx * ElementMul;
3933 SDValue IdxN
= getValue(Idx
);
3935 if (!IdxN
.getValueType().isVector() && IsVectorGEP
) {
3936 EVT VT
= EVT::getVectorVT(*Context
, IdxN
.getValueType(),
3937 VectorElementCount
);
3938 if (VectorElementCount
.isScalable())
3939 IdxN
= DAG
.getSplatVector(VT
, dl
, IdxN
);
3941 IdxN
= DAG
.getSplatBuildVector(VT
, dl
, IdxN
);
3944 // If the index is smaller or larger than intptr_t, truncate or extend
3946 IdxN
= DAG
.getSExtOrTrunc(IdxN
, dl
, N
.getValueType());
3948 if (ElementScalable
) {
3949 EVT VScaleTy
= N
.getValueType().getScalarType();
3950 SDValue VScale
= DAG
.getNode(
3951 ISD::VSCALE
, dl
, VScaleTy
,
3952 DAG
.getConstant(ElementMul
.getZExtValue(), dl
, VScaleTy
));
3954 VScale
= DAG
.getSplatVector(N
.getValueType(), dl
, VScale
);
3955 IdxN
= DAG
.getNode(ISD::MUL
, dl
, N
.getValueType(), IdxN
, VScale
);
3957 // If this is a multiply by a power of two, turn it into a shl
3958 // immediately. This is a very common case.
3959 if (ElementMul
!= 1) {
3960 if (ElementMul
.isPowerOf2()) {
3961 unsigned Amt
= ElementMul
.logBase2();
3962 IdxN
= DAG
.getNode(ISD::SHL
, dl
,
3963 N
.getValueType(), IdxN
,
3964 DAG
.getConstant(Amt
, dl
, IdxN
.getValueType()));
3966 SDValue Scale
= DAG
.getConstant(ElementMul
.getZExtValue(), dl
,
3967 IdxN
.getValueType());
3968 IdxN
= DAG
.getNode(ISD::MUL
, dl
,
3969 N
.getValueType(), IdxN
, Scale
);
3974 N
= DAG
.getNode(ISD::ADD
, dl
,
3975 N
.getValueType(), N
, IdxN
);
3979 MVT PtrTy
= TLI
.getPointerTy(DAG
.getDataLayout(), AS
);
3980 MVT PtrMemTy
= TLI
.getPointerMemTy(DAG
.getDataLayout(), AS
);
3982 PtrTy
= MVT::getVectorVT(PtrTy
, VectorElementCount
);
3983 PtrMemTy
= MVT::getVectorVT(PtrMemTy
, VectorElementCount
);
3986 if (PtrMemTy
!= PtrTy
&& !cast
<GEPOperator
>(I
).isInBounds())
3987 N
= DAG
.getPtrExtendInReg(N
, dl
, PtrMemTy
);
3992 void SelectionDAGBuilder::visitAlloca(const AllocaInst
&I
) {
3993 // If this is a fixed sized alloca in the entry block of the function,
3994 // allocate it statically on the stack.
3995 if (FuncInfo
.StaticAllocaMap
.count(&I
))
3996 return; // getValue will auto-populate this.
3998 SDLoc dl
= getCurSDLoc();
3999 Type
*Ty
= I
.getAllocatedType();
4000 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4001 auto &DL
= DAG
.getDataLayout();
4002 TypeSize TySize
= DL
.getTypeAllocSize(Ty
);
4003 MaybeAlign Alignment
= std::max(DL
.getPrefTypeAlign(Ty
), I
.getAlign());
4005 SDValue AllocSize
= getValue(I
.getArraySize());
4007 EVT IntPtr
= TLI
.getPointerTy(DAG
.getDataLayout(), DL
.getAllocaAddrSpace());
4008 if (AllocSize
.getValueType() != IntPtr
)
4009 AllocSize
= DAG
.getZExtOrTrunc(AllocSize
, dl
, IntPtr
);
4011 if (TySize
.isScalable())
4012 AllocSize
= DAG
.getNode(ISD::MUL
, dl
, IntPtr
, AllocSize
,
4013 DAG
.getVScale(dl
, IntPtr
,
4014 APInt(IntPtr
.getScalarSizeInBits(),
4015 TySize
.getKnownMinValue())));
4018 DAG
.getNode(ISD::MUL
, dl
, IntPtr
, AllocSize
,
4019 DAG
.getConstant(TySize
.getFixedValue(), dl
, IntPtr
));
4021 // Handle alignment. If the requested alignment is less than or equal to
4022 // the stack alignment, ignore it. If the size is greater than or equal to
4023 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
4024 Align StackAlign
= DAG
.getSubtarget().getFrameLowering()->getStackAlign();
4025 if (*Alignment
<= StackAlign
)
4028 const uint64_t StackAlignMask
= StackAlign
.value() - 1U;
4029 // Round the size of the allocation up to the stack alignment size
4030 // by add SA-1 to the size. This doesn't overflow because we're computing
4031 // an address inside an alloca.
4033 Flags
.setNoUnsignedWrap(true);
4034 AllocSize
= DAG
.getNode(ISD::ADD
, dl
, AllocSize
.getValueType(), AllocSize
,
4035 DAG
.getConstant(StackAlignMask
, dl
, IntPtr
), Flags
);
4037 // Mask out the low bits for alignment purposes.
4038 AllocSize
= DAG
.getNode(ISD::AND
, dl
, AllocSize
.getValueType(), AllocSize
,
4039 DAG
.getConstant(~StackAlignMask
, dl
, IntPtr
));
4042 getRoot(), AllocSize
,
4043 DAG
.getConstant(Alignment
? Alignment
->value() : 0, dl
, IntPtr
)};
4044 SDVTList VTs
= DAG
.getVTList(AllocSize
.getValueType(), MVT::Other
);
4045 SDValue DSA
= DAG
.getNode(ISD::DYNAMIC_STACKALLOC
, dl
, VTs
, Ops
);
4047 DAG
.setRoot(DSA
.getValue(1));
4049 assert(FuncInfo
.MF
->getFrameInfo().hasVarSizedObjects());
4052 void SelectionDAGBuilder::visitLoad(const LoadInst
&I
) {
4054 return visitAtomicLoad(I
);
4056 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4057 const Value
*SV
= I
.getOperand(0);
4058 if (TLI
.supportSwiftError()) {
4059 // Swifterror values can come from either a function parameter with
4060 // swifterror attribute or an alloca with swifterror attribute.
4061 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
4062 if (Arg
->hasSwiftErrorAttr())
4063 return visitLoadFromSwiftError(I
);
4066 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
4067 if (Alloca
->isSwiftError())
4068 return visitLoadFromSwiftError(I
);
4072 SDValue Ptr
= getValue(SV
);
4074 Type
*Ty
= I
.getType();
4075 Align Alignment
= I
.getAlign();
4077 AAMDNodes AAInfo
= I
.getAAMetadata();
4078 const MDNode
*Ranges
= I
.getMetadata(LLVMContext::MD_range
);
4080 SmallVector
<EVT
, 4> ValueVTs
, MemVTs
;
4081 SmallVector
<uint64_t, 4> Offsets
;
4082 ComputeValueVTs(TLI
, DAG
.getDataLayout(), Ty
, ValueVTs
, &MemVTs
, &Offsets
);
4083 unsigned NumValues
= ValueVTs
.size();
4087 bool isVolatile
= I
.isVolatile();
4090 bool ConstantMemory
= false;
4092 // Serialize volatile loads with other side effects.
4094 else if (NumValues
> MaxParallelChains
)
4095 Root
= getMemoryRoot();
4097 AA
->pointsToConstantMemory(MemoryLocation(
4099 LocationSize::precise(DAG
.getDataLayout().getTypeStoreSize(Ty
)),
4101 // Do not serialize (non-volatile) loads of constant memory with anything.
4102 Root
= DAG
.getEntryNode();
4103 ConstantMemory
= true;
4105 // Do not serialize non-volatile loads against each other.
4106 Root
= DAG
.getRoot();
4109 SDLoc dl
= getCurSDLoc();
4112 Root
= TLI
.prepareVolatileOrAtomicLoad(Root
, dl
, DAG
);
4114 // An aggregate load cannot wrap around the address space, so offsets to its
4115 // parts don't wrap either.
4117 Flags
.setNoUnsignedWrap(true);
4119 SmallVector
<SDValue
, 4> Values(NumValues
);
4120 SmallVector
<SDValue
, 4> Chains(std::min(MaxParallelChains
, NumValues
));
4121 EVT PtrVT
= Ptr
.getValueType();
4123 MachineMemOperand::Flags MMOFlags
4124 = TLI
.getLoadMemOperandFlags(I
, DAG
.getDataLayout());
4126 unsigned ChainI
= 0;
4127 for (unsigned i
= 0; i
!= NumValues
; ++i
, ++ChainI
) {
4128 // Serializing loads here may result in excessive register pressure, and
4129 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
4130 // could recover a bit by hoisting nodes upward in the chain by recognizing
4131 // they are side-effect free or do not alias. The optimizer should really
4132 // avoid this case by converting large object/array copies to llvm.memcpy
4133 // (MaxParallelChains should always remain as failsafe).
4134 if (ChainI
== MaxParallelChains
) {
4135 assert(PendingLoads
.empty() && "PendingLoads must be serialized first");
4136 SDValue Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
4137 makeArrayRef(Chains
.data(), ChainI
));
4141 SDValue A
= DAG
.getNode(ISD::ADD
, dl
,
4143 DAG
.getConstant(Offsets
[i
], dl
, PtrVT
),
4146 SDValue L
= DAG
.getLoad(MemVTs
[i
], dl
, Root
, A
,
4147 MachinePointerInfo(SV
, Offsets
[i
]), Alignment
,
4148 MMOFlags
, AAInfo
, Ranges
);
4149 Chains
[ChainI
] = L
.getValue(1);
4151 if (MemVTs
[i
] != ValueVTs
[i
])
4152 L
= DAG
.getZExtOrTrunc(L
, dl
, ValueVTs
[i
]);
4157 if (!ConstantMemory
) {
4158 SDValue Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
4159 makeArrayRef(Chains
.data(), ChainI
));
4163 PendingLoads
.push_back(Chain
);
4166 setValue(&I
, DAG
.getNode(ISD::MERGE_VALUES
, dl
,
4167 DAG
.getVTList(ValueVTs
), Values
));
4170 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst
&I
) {
4171 assert(DAG
.getTargetLoweringInfo().supportSwiftError() &&
4172 "call visitStoreToSwiftError when backend supports swifterror");
4174 SmallVector
<EVT
, 4> ValueVTs
;
4175 SmallVector
<uint64_t, 4> Offsets
;
4176 const Value
*SrcV
= I
.getOperand(0);
4177 ComputeValueVTs(DAG
.getTargetLoweringInfo(), DAG
.getDataLayout(),
4178 SrcV
->getType(), ValueVTs
, &Offsets
);
4179 assert(ValueVTs
.size() == 1 && Offsets
[0] == 0 &&
4180 "expect a single EVT for swifterror");
4182 SDValue Src
= getValue(SrcV
);
4183 // Create a virtual register, then update the virtual register.
4185 SwiftError
.getOrCreateVRegDefAt(&I
, FuncInfo
.MBB
, I
.getPointerOperand());
4186 // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
4187 // Chain can be getRoot or getControlRoot.
4188 SDValue CopyNode
= DAG
.getCopyToReg(getRoot(), getCurSDLoc(), VReg
,
4189 SDValue(Src
.getNode(), Src
.getResNo()));
4190 DAG
.setRoot(CopyNode
);
4193 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst
&I
) {
4194 assert(DAG
.getTargetLoweringInfo().supportSwiftError() &&
4195 "call visitLoadFromSwiftError when backend supports swifterror");
4197 assert(!I
.isVolatile() &&
4198 !I
.hasMetadata(LLVMContext::MD_nontemporal
) &&
4199 !I
.hasMetadata(LLVMContext::MD_invariant_load
) &&
4200 "Support volatile, non temporal, invariant for load_from_swift_error");
4202 const Value
*SV
= I
.getOperand(0);
4203 Type
*Ty
= I
.getType();
4206 !AA
->pointsToConstantMemory(MemoryLocation(
4207 SV
, LocationSize::precise(DAG
.getDataLayout().getTypeStoreSize(Ty
)),
4208 I
.getAAMetadata()))) &&
4209 "load_from_swift_error should not be constant memory");
4211 SmallVector
<EVT
, 4> ValueVTs
;
4212 SmallVector
<uint64_t, 4> Offsets
;
4213 ComputeValueVTs(DAG
.getTargetLoweringInfo(), DAG
.getDataLayout(), Ty
,
4214 ValueVTs
, &Offsets
);
4215 assert(ValueVTs
.size() == 1 && Offsets
[0] == 0 &&
4216 "expect a single EVT for swifterror");
4218 // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
4219 SDValue L
= DAG
.getCopyFromReg(
4220 getRoot(), getCurSDLoc(),
4221 SwiftError
.getOrCreateVRegUseAt(&I
, FuncInfo
.MBB
, SV
), ValueVTs
[0]);
4226 void SelectionDAGBuilder::visitStore(const StoreInst
&I
) {
4228 return visitAtomicStore(I
);
4230 const Value
*SrcV
= I
.getOperand(0);
4231 const Value
*PtrV
= I
.getOperand(1);
4233 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4234 if (TLI
.supportSwiftError()) {
4235 // Swifterror values can come from either a function parameter with
4236 // swifterror attribute or an alloca with swifterror attribute.
4237 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
4238 if (Arg
->hasSwiftErrorAttr())
4239 return visitStoreToSwiftError(I
);
4242 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
4243 if (Alloca
->isSwiftError())
4244 return visitStoreToSwiftError(I
);
4248 SmallVector
<EVT
, 4> ValueVTs
, MemVTs
;
4249 SmallVector
<uint64_t, 4> Offsets
;
4250 ComputeValueVTs(DAG
.getTargetLoweringInfo(), DAG
.getDataLayout(),
4251 SrcV
->getType(), ValueVTs
, &MemVTs
, &Offsets
);
4252 unsigned NumValues
= ValueVTs
.size();
4256 // Get the lowered operands. Note that we do this after
4257 // checking if NumResults is zero, because with zero results
4258 // the operands won't have values in the map.
4259 SDValue Src
= getValue(SrcV
);
4260 SDValue Ptr
= getValue(PtrV
);
4262 SDValue Root
= I
.isVolatile() ? getRoot() : getMemoryRoot();
4263 SmallVector
<SDValue
, 4> Chains(std::min(MaxParallelChains
, NumValues
));
4264 SDLoc dl
= getCurSDLoc();
4265 Align Alignment
= I
.getAlign();
4266 AAMDNodes AAInfo
= I
.getAAMetadata();
4268 auto MMOFlags
= TLI
.getStoreMemOperandFlags(I
, DAG
.getDataLayout());
4270 // An aggregate load cannot wrap around the address space, so offsets to its
4271 // parts don't wrap either.
4273 Flags
.setNoUnsignedWrap(true);
4275 unsigned ChainI
= 0;
4276 for (unsigned i
= 0; i
!= NumValues
; ++i
, ++ChainI
) {
4277 // See visitLoad comments.
4278 if (ChainI
== MaxParallelChains
) {
4279 SDValue Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
4280 makeArrayRef(Chains
.data(), ChainI
));
4285 DAG
.getMemBasePlusOffset(Ptr
, TypeSize::Fixed(Offsets
[i
]), dl
, Flags
);
4286 SDValue Val
= SDValue(Src
.getNode(), Src
.getResNo() + i
);
4287 if (MemVTs
[i
] != ValueVTs
[i
])
4288 Val
= DAG
.getPtrExtOrTrunc(Val
, dl
, MemVTs
[i
]);
4290 DAG
.getStore(Root
, dl
, Val
, Add
, MachinePointerInfo(PtrV
, Offsets
[i
]),
4291 Alignment
, MMOFlags
, AAInfo
);
4292 Chains
[ChainI
] = St
;
4295 SDValue StoreNode
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
4296 makeArrayRef(Chains
.data(), ChainI
));
4297 DAG
.setRoot(StoreNode
);
4300 void SelectionDAGBuilder::visitMaskedStore(const CallInst
&I
,
4301 bool IsCompressing
) {
4302 SDLoc sdl
= getCurSDLoc();
4304 auto getMaskedStoreOps
= [&](Value
*&Ptr
, Value
*&Mask
, Value
*&Src0
,
4305 MaybeAlign
&Alignment
) {
4306 // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
4307 Src0
= I
.getArgOperand(0);
4308 Ptr
= I
.getArgOperand(1);
4309 Alignment
= cast
<ConstantInt
>(I
.getArgOperand(2))->getMaybeAlignValue();
4310 Mask
= I
.getArgOperand(3);
4312 auto getCompressingStoreOps
= [&](Value
*&Ptr
, Value
*&Mask
, Value
*&Src0
,
4313 MaybeAlign
&Alignment
) {
4314 // llvm.masked.compressstore.*(Src0, Ptr, Mask)
4315 Src0
= I
.getArgOperand(0);
4316 Ptr
= I
.getArgOperand(1);
4317 Mask
= I
.getArgOperand(2);
4321 Value
*PtrOperand
, *MaskOperand
, *Src0Operand
;
4322 MaybeAlign Alignment
;
4324 getCompressingStoreOps(PtrOperand
, MaskOperand
, Src0Operand
, Alignment
);
4326 getMaskedStoreOps(PtrOperand
, MaskOperand
, Src0Operand
, Alignment
);
4328 SDValue Ptr
= getValue(PtrOperand
);
4329 SDValue Src0
= getValue(Src0Operand
);
4330 SDValue Mask
= getValue(MaskOperand
);
4331 SDValue Offset
= DAG
.getUNDEF(Ptr
.getValueType());
4333 EVT VT
= Src0
.getValueType();
4335 Alignment
= DAG
.getEVTAlign(VT
);
4337 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
4338 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOStore
,
4339 MemoryLocation::UnknownSize
, *Alignment
, I
.getAAMetadata());
4341 DAG
.getMaskedStore(getMemoryRoot(), sdl
, Src0
, Ptr
, Offset
, Mask
, VT
, MMO
,
4342 ISD::UNINDEXED
, false /* Truncating */, IsCompressing
);
4343 DAG
.setRoot(StoreNode
);
4344 setValue(&I
, StoreNode
);
4347 // Get a uniform base for the Gather/Scatter intrinsic.
4348 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
4349 // We try to represent it as a base pointer + vector of indices.
4350 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
4351 // The first operand of the GEP may be a single pointer or a vector of pointers
4353 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
4355 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
4356 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
4358 // When the first GEP operand is a single pointer - it is the uniform base we
4359 // are looking for. If first operand of the GEP is a splat vector - we
4360 // extract the splat value and use it as a uniform base.
4361 // In all other cases the function returns 'false'.
4362 static bool getUniformBase(const Value
*Ptr
, SDValue
&Base
, SDValue
&Index
,
4363 ISD::MemIndexType
&IndexType
, SDValue
&Scale
,
4364 SelectionDAGBuilder
*SDB
, const BasicBlock
*CurBB
,
4365 uint64_t ElemSize
) {
4366 SelectionDAG
& DAG
= SDB
->DAG
;
4367 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4368 const DataLayout
&DL
= DAG
.getDataLayout();
4370 assert(Ptr
->getType()->isVectorTy() && "Unexpected pointer type");
4372 // Handle splat constant pointer.
4373 if (auto *C
= dyn_cast
<Constant
>(Ptr
)) {
4374 C
= C
->getSplatValue();
4378 Base
= SDB
->getValue(C
);
4380 ElementCount NumElts
= cast
<VectorType
>(Ptr
->getType())->getElementCount();
4381 EVT VT
= EVT::getVectorVT(*DAG
.getContext(), TLI
.getPointerTy(DL
), NumElts
);
4382 Index
= DAG
.getConstant(0, SDB
->getCurSDLoc(), VT
);
4383 IndexType
= ISD::SIGNED_SCALED
;
4384 Scale
= DAG
.getTargetConstant(1, SDB
->getCurSDLoc(), TLI
.getPointerTy(DL
));
4388 const GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Ptr
);
4389 if (!GEP
|| GEP
->getParent() != CurBB
)
4392 if (GEP
->getNumOperands() != 2)
4395 const Value
*BasePtr
= GEP
->getPointerOperand();
4396 const Value
*IndexVal
= GEP
->getOperand(GEP
->getNumOperands() - 1);
4398 // Make sure the base is scalar and the index is a vector.
4399 if (BasePtr
->getType()->isVectorTy() || !IndexVal
->getType()->isVectorTy())
4402 Base
= SDB
->getValue(BasePtr
);
4403 Index
= SDB
->getValue(IndexVal
);
4404 IndexType
= ISD::SIGNED_SCALED
;
4406 // MGATHER/MSCATTER are only required to support scaling by one or by the
4407 // element size. Other scales may be produced using target-specific DAG
4409 uint64_t ScaleVal
= DL
.getTypeAllocSize(GEP
->getResultElementType());
4410 if (ScaleVal
!= ElemSize
&& ScaleVal
!= 1)
4414 DAG
.getTargetConstant(ScaleVal
, SDB
->getCurSDLoc(), TLI
.getPointerTy(DL
));
4418 void SelectionDAGBuilder::visitMaskedScatter(const CallInst
&I
) {
4419 SDLoc sdl
= getCurSDLoc();
4421 // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
4422 const Value
*Ptr
= I
.getArgOperand(1);
4423 SDValue Src0
= getValue(I
.getArgOperand(0));
4424 SDValue Mask
= getValue(I
.getArgOperand(3));
4425 EVT VT
= Src0
.getValueType();
4426 Align Alignment
= cast
<ConstantInt
>(I
.getArgOperand(2))
4427 ->getMaybeAlignValue()
4428 .value_or(DAG
.getEVTAlign(VT
.getScalarType()));
4429 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4433 ISD::MemIndexType IndexType
;
4435 bool UniformBase
= getUniformBase(Ptr
, Base
, Index
, IndexType
, Scale
, this,
4436 I
.getParent(), VT
.getScalarStoreSize());
4438 unsigned AS
= Ptr
->getType()->getScalarType()->getPointerAddressSpace();
4439 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
4440 MachinePointerInfo(AS
), MachineMemOperand::MOStore
,
4441 // TODO: Make MachineMemOperands aware of scalable
4443 MemoryLocation::UnknownSize
, Alignment
, I
.getAAMetadata());
4445 Base
= DAG
.getConstant(0, sdl
, TLI
.getPointerTy(DAG
.getDataLayout()));
4446 Index
= getValue(Ptr
);
4447 IndexType
= ISD::SIGNED_SCALED
;
4448 Scale
= DAG
.getTargetConstant(1, sdl
, TLI
.getPointerTy(DAG
.getDataLayout()));
4451 EVT IdxVT
= Index
.getValueType();
4452 EVT EltTy
= IdxVT
.getVectorElementType();
4453 if (TLI
.shouldExtendGSIndex(IdxVT
, EltTy
)) {
4454 EVT NewIdxVT
= IdxVT
.changeVectorElementType(EltTy
);
4455 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, sdl
, NewIdxVT
, Index
);
4458 SDValue Ops
[] = { getMemoryRoot(), Src0
, Mask
, Base
, Index
, Scale
};
4459 SDValue Scatter
= DAG
.getMaskedScatter(DAG
.getVTList(MVT::Other
), VT
, sdl
,
4460 Ops
, MMO
, IndexType
, false);
4461 DAG
.setRoot(Scatter
);
4462 setValue(&I
, Scatter
);
4465 void SelectionDAGBuilder::visitMaskedLoad(const CallInst
&I
, bool IsExpanding
) {
4466 SDLoc sdl
= getCurSDLoc();
4468 auto getMaskedLoadOps
= [&](Value
*&Ptr
, Value
*&Mask
, Value
*&Src0
,
4469 MaybeAlign
&Alignment
) {
4470 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4471 Ptr
= I
.getArgOperand(0);
4472 Alignment
= cast
<ConstantInt
>(I
.getArgOperand(1))->getMaybeAlignValue();
4473 Mask
= I
.getArgOperand(2);
4474 Src0
= I
.getArgOperand(3);
4476 auto getExpandingLoadOps
= [&](Value
*&Ptr
, Value
*&Mask
, Value
*&Src0
,
4477 MaybeAlign
&Alignment
) {
4478 // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4479 Ptr
= I
.getArgOperand(0);
4481 Mask
= I
.getArgOperand(1);
4482 Src0
= I
.getArgOperand(2);
4485 Value
*PtrOperand
, *MaskOperand
, *Src0Operand
;
4486 MaybeAlign Alignment
;
4488 getExpandingLoadOps(PtrOperand
, MaskOperand
, Src0Operand
, Alignment
);
4490 getMaskedLoadOps(PtrOperand
, MaskOperand
, Src0Operand
, Alignment
);
4492 SDValue Ptr
= getValue(PtrOperand
);
4493 SDValue Src0
= getValue(Src0Operand
);
4494 SDValue Mask
= getValue(MaskOperand
);
4495 SDValue Offset
= DAG
.getUNDEF(Ptr
.getValueType());
4497 EVT VT
= Src0
.getValueType();
4499 Alignment
= DAG
.getEVTAlign(VT
);
4501 AAMDNodes AAInfo
= I
.getAAMetadata();
4502 const MDNode
*Ranges
= I
.getMetadata(LLVMContext::MD_range
);
4504 // Do not serialize masked loads of constant memory with anything.
4505 MemoryLocation ML
= MemoryLocation::getAfter(PtrOperand
, AAInfo
);
4506 bool AddToChain
= !AA
|| !AA
->pointsToConstantMemory(ML
);
4508 SDValue InChain
= AddToChain
? DAG
.getRoot() : DAG
.getEntryNode();
4510 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
4511 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOLoad
,
4512 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
, Ranges
);
4515 DAG
.getMaskedLoad(VT
, sdl
, InChain
, Ptr
, Offset
, Mask
, Src0
, VT
, MMO
,
4516 ISD::UNINDEXED
, ISD::NON_EXTLOAD
, IsExpanding
);
4518 PendingLoads
.push_back(Load
.getValue(1));
4522 void SelectionDAGBuilder::visitMaskedGather(const CallInst
&I
) {
4523 SDLoc sdl
= getCurSDLoc();
4525 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4526 const Value
*Ptr
= I
.getArgOperand(0);
4527 SDValue Src0
= getValue(I
.getArgOperand(3));
4528 SDValue Mask
= getValue(I
.getArgOperand(2));
4530 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4531 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
4532 Align Alignment
= cast
<ConstantInt
>(I
.getArgOperand(1))
4533 ->getMaybeAlignValue()
4534 .value_or(DAG
.getEVTAlign(VT
.getScalarType()));
4536 const MDNode
*Ranges
= I
.getMetadata(LLVMContext::MD_range
);
4538 SDValue Root
= DAG
.getRoot();
4541 ISD::MemIndexType IndexType
;
4543 bool UniformBase
= getUniformBase(Ptr
, Base
, Index
, IndexType
, Scale
, this,
4544 I
.getParent(), VT
.getScalarStoreSize());
4545 unsigned AS
= Ptr
->getType()->getScalarType()->getPointerAddressSpace();
4546 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
4547 MachinePointerInfo(AS
), MachineMemOperand::MOLoad
,
4548 // TODO: Make MachineMemOperands aware of scalable
4550 MemoryLocation::UnknownSize
, Alignment
, I
.getAAMetadata(), Ranges
);
4553 Base
= DAG
.getConstant(0, sdl
, TLI
.getPointerTy(DAG
.getDataLayout()));
4554 Index
= getValue(Ptr
);
4555 IndexType
= ISD::SIGNED_SCALED
;
4556 Scale
= DAG
.getTargetConstant(1, sdl
, TLI
.getPointerTy(DAG
.getDataLayout()));
4559 EVT IdxVT
= Index
.getValueType();
4560 EVT EltTy
= IdxVT
.getVectorElementType();
4561 if (TLI
.shouldExtendGSIndex(IdxVT
, EltTy
)) {
4562 EVT NewIdxVT
= IdxVT
.changeVectorElementType(EltTy
);
4563 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, sdl
, NewIdxVT
, Index
);
4566 SDValue Ops
[] = { Root
, Src0
, Mask
, Base
, Index
, Scale
};
4567 SDValue Gather
= DAG
.getMaskedGather(DAG
.getVTList(VT
, MVT::Other
), VT
, sdl
,
4568 Ops
, MMO
, IndexType
, ISD::NON_EXTLOAD
);
4570 PendingLoads
.push_back(Gather
.getValue(1));
4571 setValue(&I
, Gather
);
4574 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst
&I
) {
4575 SDLoc dl
= getCurSDLoc();
4576 AtomicOrdering SuccessOrdering
= I
.getSuccessOrdering();
4577 AtomicOrdering FailureOrdering
= I
.getFailureOrdering();
4578 SyncScope::ID SSID
= I
.getSyncScopeID();
4580 SDValue InChain
= getRoot();
4582 MVT MemVT
= getValue(I
.getCompareOperand()).getSimpleValueType();
4583 SDVTList VTs
= DAG
.getVTList(MemVT
, MVT::i1
, MVT::Other
);
4585 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4586 auto Flags
= TLI
.getAtomicMemOperandFlags(I
, DAG
.getDataLayout());
4588 MachineFunction
&MF
= DAG
.getMachineFunction();
4589 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
4590 MachinePointerInfo(I
.getPointerOperand()), Flags
, MemVT
.getStoreSize(),
4591 DAG
.getEVTAlign(MemVT
), AAMDNodes(), nullptr, SSID
, SuccessOrdering
,
4594 SDValue L
= DAG
.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
,
4595 dl
, MemVT
, VTs
, InChain
,
4596 getValue(I
.getPointerOperand()),
4597 getValue(I
.getCompareOperand()),
4598 getValue(I
.getNewValOperand()), MMO
);
4600 SDValue OutChain
= L
.getValue(2);
4603 DAG
.setRoot(OutChain
);
4606 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst
&I
) {
4607 SDLoc dl
= getCurSDLoc();
4609 switch (I
.getOperation()) {
4610 default: llvm_unreachable("Unknown atomicrmw operation");
4611 case AtomicRMWInst::Xchg
: NT
= ISD::ATOMIC_SWAP
; break;
4612 case AtomicRMWInst::Add
: NT
= ISD::ATOMIC_LOAD_ADD
; break;
4613 case AtomicRMWInst::Sub
: NT
= ISD::ATOMIC_LOAD_SUB
; break;
4614 case AtomicRMWInst::And
: NT
= ISD::ATOMIC_LOAD_AND
; break;
4615 case AtomicRMWInst::Nand
: NT
= ISD::ATOMIC_LOAD_NAND
; break;
4616 case AtomicRMWInst::Or
: NT
= ISD::ATOMIC_LOAD_OR
; break;
4617 case AtomicRMWInst::Xor
: NT
= ISD::ATOMIC_LOAD_XOR
; break;
4618 case AtomicRMWInst::Max
: NT
= ISD::ATOMIC_LOAD_MAX
; break;
4619 case AtomicRMWInst::Min
: NT
= ISD::ATOMIC_LOAD_MIN
; break;
4620 case AtomicRMWInst::UMax
: NT
= ISD::ATOMIC_LOAD_UMAX
; break;
4621 case AtomicRMWInst::UMin
: NT
= ISD::ATOMIC_LOAD_UMIN
; break;
4622 case AtomicRMWInst::FAdd
: NT
= ISD::ATOMIC_LOAD_FADD
; break;
4623 case AtomicRMWInst::FSub
: NT
= ISD::ATOMIC_LOAD_FSUB
; break;
4625 AtomicOrdering Ordering
= I
.getOrdering();
4626 SyncScope::ID SSID
= I
.getSyncScopeID();
4628 SDValue InChain
= getRoot();
4630 auto MemVT
= getValue(I
.getValOperand()).getSimpleValueType();
4631 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4632 auto Flags
= TLI
.getAtomicMemOperandFlags(I
, DAG
.getDataLayout());
4634 MachineFunction
&MF
= DAG
.getMachineFunction();
4635 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
4636 MachinePointerInfo(I
.getPointerOperand()), Flags
, MemVT
.getStoreSize(),
4637 DAG
.getEVTAlign(MemVT
), AAMDNodes(), nullptr, SSID
, Ordering
);
4640 DAG
.getAtomic(NT
, dl
, MemVT
, InChain
,
4641 getValue(I
.getPointerOperand()), getValue(I
.getValOperand()),
4644 SDValue OutChain
= L
.getValue(1);
4647 DAG
.setRoot(OutChain
);
4650 void SelectionDAGBuilder::visitFence(const FenceInst
&I
) {
4651 SDLoc dl
= getCurSDLoc();
4652 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4655 Ops
[1] = DAG
.getTargetConstant((unsigned)I
.getOrdering(), dl
,
4656 TLI
.getFenceOperandTy(DAG
.getDataLayout()));
4657 Ops
[2] = DAG
.getTargetConstant(I
.getSyncScopeID(), dl
,
4658 TLI
.getFenceOperandTy(DAG
.getDataLayout()));
4659 DAG
.setRoot(DAG
.getNode(ISD::ATOMIC_FENCE
, dl
, MVT::Other
, Ops
));
4662 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst
&I
) {
4663 SDLoc dl
= getCurSDLoc();
4664 AtomicOrdering Order
= I
.getOrdering();
4665 SyncScope::ID SSID
= I
.getSyncScopeID();
4667 SDValue InChain
= getRoot();
4669 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4670 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
4671 EVT MemVT
= TLI
.getMemValueType(DAG
.getDataLayout(), I
.getType());
4673 if (!TLI
.supportsUnalignedAtomics() &&
4674 I
.getAlign().value() < MemVT
.getSizeInBits() / 8)
4675 report_fatal_error("Cannot generate unaligned atomic load");
4677 auto Flags
= TLI
.getLoadMemOperandFlags(I
, DAG
.getDataLayout());
4679 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
4680 MachinePointerInfo(I
.getPointerOperand()), Flags
, MemVT
.getStoreSize(),
4681 I
.getAlign(), AAMDNodes(), nullptr, SSID
, Order
);
4683 InChain
= TLI
.prepareVolatileOrAtomicLoad(InChain
, dl
, DAG
);
4685 SDValue Ptr
= getValue(I
.getPointerOperand());
4687 if (TLI
.lowerAtomicLoadAsLoadSDNode(I
)) {
4688 // TODO: Once this is better exercised by tests, it should be merged with
4689 // the normal path for loads to prevent future divergence.
4690 SDValue L
= DAG
.getLoad(MemVT
, dl
, InChain
, Ptr
, MMO
);
4692 L
= DAG
.getPtrExtOrTrunc(L
, dl
, VT
);
4695 SDValue OutChain
= L
.getValue(1);
4696 if (!I
.isUnordered())
4697 DAG
.setRoot(OutChain
);
4699 PendingLoads
.push_back(OutChain
);
4703 SDValue L
= DAG
.getAtomic(ISD::ATOMIC_LOAD
, dl
, MemVT
, MemVT
, InChain
,
4706 SDValue OutChain
= L
.getValue(1);
4708 L
= DAG
.getPtrExtOrTrunc(L
, dl
, VT
);
4711 DAG
.setRoot(OutChain
);
4714 void SelectionDAGBuilder::visitAtomicStore(const StoreInst
&I
) {
4715 SDLoc dl
= getCurSDLoc();
4717 AtomicOrdering Ordering
= I
.getOrdering();
4718 SyncScope::ID SSID
= I
.getSyncScopeID();
4720 SDValue InChain
= getRoot();
4722 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4724 TLI
.getMemValueType(DAG
.getDataLayout(), I
.getValueOperand()->getType());
4726 if (I
.getAlign().value() < MemVT
.getSizeInBits() / 8)
4727 report_fatal_error("Cannot generate unaligned atomic store");
4729 auto Flags
= TLI
.getStoreMemOperandFlags(I
, DAG
.getDataLayout());
4731 MachineFunction
&MF
= DAG
.getMachineFunction();
4732 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
4733 MachinePointerInfo(I
.getPointerOperand()), Flags
, MemVT
.getStoreSize(),
4734 I
.getAlign(), AAMDNodes(), nullptr, SSID
, Ordering
);
4736 SDValue Val
= getValue(I
.getValueOperand());
4737 if (Val
.getValueType() != MemVT
)
4738 Val
= DAG
.getPtrExtOrTrunc(Val
, dl
, MemVT
);
4739 SDValue Ptr
= getValue(I
.getPointerOperand());
4741 if (TLI
.lowerAtomicStoreAsStoreSDNode(I
)) {
4742 // TODO: Once this is better exercised by tests, it should be merged with
4743 // the normal path for stores to prevent future divergence.
4744 SDValue S
= DAG
.getStore(InChain
, dl
, Val
, Ptr
, MMO
);
4748 SDValue OutChain
= DAG
.getAtomic(ISD::ATOMIC_STORE
, dl
, MemVT
, InChain
,
4752 DAG
.setRoot(OutChain
);
4755 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4757 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst
&I
,
4758 unsigned Intrinsic
) {
4759 // Ignore the callsite's attributes. A specific call site may be marked with
4760 // readnone, but the lowering code will expect the chain based on the
4762 const Function
*F
= I
.getCalledFunction();
4763 bool HasChain
= !F
->doesNotAccessMemory();
4764 bool OnlyLoad
= HasChain
&& F
->onlyReadsMemory();
4766 // Build the operand list.
4767 SmallVector
<SDValue
, 8> Ops
;
4768 if (HasChain
) { // If this intrinsic has side-effects, chainify it.
4770 // We don't need to serialize loads against other loads.
4771 Ops
.push_back(DAG
.getRoot());
4773 Ops
.push_back(getRoot());
4777 // Info is set by getTgtMemIntrinsic
4778 TargetLowering::IntrinsicInfo Info
;
4779 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4780 bool IsTgtIntrinsic
= TLI
.getTgtMemIntrinsic(Info
, I
,
4781 DAG
.getMachineFunction(),
4784 // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4785 if (!IsTgtIntrinsic
|| Info
.opc
== ISD::INTRINSIC_VOID
||
4786 Info
.opc
== ISD::INTRINSIC_W_CHAIN
)
4787 Ops
.push_back(DAG
.getTargetConstant(Intrinsic
, getCurSDLoc(),
4788 TLI
.getPointerTy(DAG
.getDataLayout())));
4790 // Add all operands of the call to the operand list.
4791 for (unsigned i
= 0, e
= I
.arg_size(); i
!= e
; ++i
) {
4792 const Value
*Arg
= I
.getArgOperand(i
);
4793 if (!I
.paramHasAttr(i
, Attribute::ImmArg
)) {
4794 Ops
.push_back(getValue(Arg
));
4798 // Use TargetConstant instead of a regular constant for immarg.
4799 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), Arg
->getType(), true);
4800 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Arg
)) {
4801 assert(CI
->getBitWidth() <= 64 &&
4802 "large intrinsic immediates not handled");
4803 Ops
.push_back(DAG
.getTargetConstant(*CI
, SDLoc(), VT
));
4806 DAG
.getTargetConstantFP(*cast
<ConstantFP
>(Arg
), SDLoc(), VT
));
4810 SmallVector
<EVT
, 4> ValueVTs
;
4811 ComputeValueVTs(TLI
, DAG
.getDataLayout(), I
.getType(), ValueVTs
);
4814 ValueVTs
.push_back(MVT::Other
);
4816 SDVTList VTs
= DAG
.getVTList(ValueVTs
);
4818 // Propagate fast-math-flags from IR to node(s).
4820 if (auto *FPMO
= dyn_cast
<FPMathOperator
>(&I
))
4821 Flags
.copyFMF(*FPMO
);
4822 SelectionDAG::FlagInserter
FlagsInserter(DAG
, Flags
);
4826 if (IsTgtIntrinsic
) {
4827 // This is target intrinsic that touches memory
4829 DAG
.getMemIntrinsicNode(Info
.opc
, getCurSDLoc(), VTs
, Ops
, Info
.memVT
,
4830 MachinePointerInfo(Info
.ptrVal
, Info
.offset
),
4831 Info
.align
, Info
.flags
, Info
.size
,
4833 } else if (!HasChain
) {
4834 Result
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, getCurSDLoc(), VTs
, Ops
);
4835 } else if (!I
.getType()->isVoidTy()) {
4836 Result
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
, getCurSDLoc(), VTs
, Ops
);
4838 Result
= DAG
.getNode(ISD::INTRINSIC_VOID
, getCurSDLoc(), VTs
, Ops
);
4842 SDValue Chain
= Result
.getValue(Result
.getNode()->getNumValues()-1);
4844 PendingLoads
.push_back(Chain
);
4849 if (!I
.getType()->isVoidTy()) {
4850 if (!isa
<VectorType
>(I
.getType()))
4851 Result
= lowerRangeToAssertZExt(DAG
, I
, Result
);
4853 MaybeAlign Alignment
= I
.getRetAlign();
4855 Alignment
= F
->getAttributes().getRetAlignment();
4856 // Insert `assertalign` node if there's an alignment.
4857 if (InsertAssertAlign
&& Alignment
) {
4859 DAG
.getAssertAlign(getCurSDLoc(), Result
, Alignment
.valueOrOne());
4862 setValue(&I
, Result
);
4866 /// GetSignificand - Get the significand and build it into a floating-point
4867 /// number with exponent of 1:
4869 /// Op = (Op & 0x007fffff) | 0x3f800000;
4871 /// where Op is the hexadecimal representation of floating point value.
4872 static SDValue
GetSignificand(SelectionDAG
&DAG
, SDValue Op
, const SDLoc
&dl
) {
4873 SDValue t1
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Op
,
4874 DAG
.getConstant(0x007fffff, dl
, MVT::i32
));
4875 SDValue t2
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, t1
,
4876 DAG
.getConstant(0x3f800000, dl
, MVT::i32
));
4877 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::f32
, t2
);
4880 /// GetExponent - Get the exponent:
4882 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4884 /// where Op is the hexadecimal representation of floating point value.
4885 static SDValue
GetExponent(SelectionDAG
&DAG
, SDValue Op
,
4886 const TargetLowering
&TLI
, const SDLoc
&dl
) {
4887 SDValue t0
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Op
,
4888 DAG
.getConstant(0x7f800000, dl
, MVT::i32
));
4889 SDValue t1
= DAG
.getNode(
4890 ISD::SRL
, dl
, MVT::i32
, t0
,
4891 DAG
.getConstant(23, dl
,
4892 TLI
.getShiftAmountTy(MVT::i32
, DAG
.getDataLayout())));
4893 SDValue t2
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, t1
,
4894 DAG
.getConstant(127, dl
, MVT::i32
));
4895 return DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::f32
, t2
);
4898 /// getF32Constant - Get 32-bit floating point constant.
4899 static SDValue
getF32Constant(SelectionDAG
&DAG
, unsigned Flt
,
4901 return DAG
.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt
)), dl
,
4905 static SDValue
getLimitedPrecisionExp2(SDValue t0
, const SDLoc
&dl
,
4906 SelectionDAG
&DAG
) {
4907 // TODO: What fast-math-flags should be set on the floating-point nodes?
4909 // IntegerPartOfX = ((int32_t)(t0);
4910 SDValue IntegerPartOfX
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, t0
);
4912 // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4913 SDValue t1
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::f32
, IntegerPartOfX
);
4914 SDValue X
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t0
, t1
);
4916 // IntegerPartOfX <<= 23;
4918 DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, IntegerPartOfX
,
4919 DAG
.getConstant(23, dl
,
4920 DAG
.getTargetLoweringInfo().getShiftAmountTy(
4921 MVT::i32
, DAG
.getDataLayout())));
4923 SDValue TwoToFractionalPartOfX
;
4924 if (LimitFloatPrecision
<= 6) {
4925 // For floating-point precision of 6:
4927 // TwoToFractionalPartOfX =
4929 // (0.735607626f + 0.252464424f * x) * x;
4931 // error 0.0144103317, which is 6 bits
4932 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
4933 getF32Constant(DAG
, 0x3e814304, dl
));
4934 SDValue t3
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t2
,
4935 getF32Constant(DAG
, 0x3f3c50c8, dl
));
4936 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
4937 TwoToFractionalPartOfX
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
4938 getF32Constant(DAG
, 0x3f7f5e7e, dl
));
4939 } else if (LimitFloatPrecision
<= 12) {
4940 // For floating-point precision of 12:
4942 // TwoToFractionalPartOfX =
4945 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4947 // error 0.000107046256, which is 13 to 14 bits
4948 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
4949 getF32Constant(DAG
, 0x3da235e3, dl
));
4950 SDValue t3
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t2
,
4951 getF32Constant(DAG
, 0x3e65b8f3, dl
));
4952 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
4953 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
4954 getF32Constant(DAG
, 0x3f324b07, dl
));
4955 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
4956 TwoToFractionalPartOfX
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t6
,
4957 getF32Constant(DAG
, 0x3f7ff8fd, dl
));
4958 } else { // LimitFloatPrecision <= 18
4959 // For floating-point precision of 18:
4961 // TwoToFractionalPartOfX =
4965 // (0.554906021e-1f +
4966 // (0.961591928e-2f +
4967 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4968 // error 2.47208000*10^(-7), which is better than 18 bits
4969 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
4970 getF32Constant(DAG
, 0x3924b03e, dl
));
4971 SDValue t3
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t2
,
4972 getF32Constant(DAG
, 0x3ab24b87, dl
));
4973 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
4974 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
4975 getF32Constant(DAG
, 0x3c1d8c17, dl
));
4976 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
4977 SDValue t7
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t6
,
4978 getF32Constant(DAG
, 0x3d634a1d, dl
));
4979 SDValue t8
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t7
, X
);
4980 SDValue t9
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t8
,
4981 getF32Constant(DAG
, 0x3e75fe14, dl
));
4982 SDValue t10
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t9
, X
);
4983 SDValue t11
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t10
,
4984 getF32Constant(DAG
, 0x3f317234, dl
));
4985 SDValue t12
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t11
, X
);
4986 TwoToFractionalPartOfX
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t12
,
4987 getF32Constant(DAG
, 0x3f800000, dl
));
4990 // Add the exponent into the result in integer domain.
4991 SDValue t13
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, TwoToFractionalPartOfX
);
4992 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::f32
,
4993 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, t13
, IntegerPartOfX
));
4996 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4997 /// limited-precision mode.
4998 static SDValue
expandExp(const SDLoc
&dl
, SDValue Op
, SelectionDAG
&DAG
,
4999 const TargetLowering
&TLI
, SDNodeFlags Flags
) {
5000 if (Op
.getValueType() == MVT::f32
&&
5001 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18) {
5003 // Put the exponent in the right bit position for later addition to the
5006 // t0 = Op * log2(e)
5008 // TODO: What fast-math-flags should be set here?
5009 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, Op
,
5010 DAG
.getConstantFP(numbers::log2ef
, dl
, MVT::f32
));
5011 return getLimitedPrecisionExp2(t0
, dl
, DAG
);
5014 // No special expansion.
5015 return DAG
.getNode(ISD::FEXP
, dl
, Op
.getValueType(), Op
, Flags
);
5018 /// expandLog - Lower a log intrinsic. Handles the special sequences for
5019 /// limited-precision mode.
5020 static SDValue
expandLog(const SDLoc
&dl
, SDValue Op
, SelectionDAG
&DAG
,
5021 const TargetLowering
&TLI
, SDNodeFlags Flags
) {
5022 // TODO: What fast-math-flags should be set on the floating-point nodes?
5024 if (Op
.getValueType() == MVT::f32
&&
5025 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18) {
5026 SDValue Op1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Op
);
5028 // Scale the exponent by log(2).
5029 SDValue Exp
= GetExponent(DAG
, Op1
, TLI
, dl
);
5030 SDValue LogOfExponent
=
5031 DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, Exp
,
5032 DAG
.getConstantFP(numbers::ln2f
, dl
, MVT::f32
));
5034 // Get the significand and build it into a floating-point number with
5036 SDValue X
= GetSignificand(DAG
, Op1
, dl
);
5038 SDValue LogOfMantissa
;
5039 if (LimitFloatPrecision
<= 6) {
5040 // For floating-point precision of 6:
5044 // (1.4034025f - 0.23903021f * x) * x;
5046 // error 0.0034276066, which is better than 8 bits
5047 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5048 getF32Constant(DAG
, 0xbe74c456, dl
));
5049 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5050 getF32Constant(DAG
, 0x3fb3a2b1, dl
));
5051 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5052 LogOfMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5053 getF32Constant(DAG
, 0x3f949a29, dl
));
5054 } else if (LimitFloatPrecision
<= 12) {
5055 // For floating-point precision of 12:
5061 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
5063 // error 0.000061011436, which is 14 bits
5064 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5065 getF32Constant(DAG
, 0xbd67b6d6, dl
));
5066 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5067 getF32Constant(DAG
, 0x3ee4f4b8, dl
));
5068 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5069 SDValue t3
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5070 getF32Constant(DAG
, 0x3fbc278b, dl
));
5071 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5072 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
5073 getF32Constant(DAG
, 0x40348e95, dl
));
5074 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
5075 LogOfMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t6
,
5076 getF32Constant(DAG
, 0x3fdef31a, dl
));
5077 } else { // LimitFloatPrecision <= 18
5078 // For floating-point precision of 18:
5086 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
5088 // error 0.0000023660568, which is better than 18 bits
5089 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5090 getF32Constant(DAG
, 0xbc91e5ac, dl
));
5091 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5092 getF32Constant(DAG
, 0x3e4350aa, dl
));
5093 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5094 SDValue t3
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5095 getF32Constant(DAG
, 0x3f60d3e3, dl
));
5096 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5097 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
5098 getF32Constant(DAG
, 0x4011cdf0, dl
));
5099 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
5100 SDValue t7
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t6
,
5101 getF32Constant(DAG
, 0x406cfd1c, dl
));
5102 SDValue t8
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t7
, X
);
5103 SDValue t9
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t8
,
5104 getF32Constant(DAG
, 0x408797cb, dl
));
5105 SDValue t10
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t9
, X
);
5106 LogOfMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t10
,
5107 getF32Constant(DAG
, 0x4006dcab, dl
));
5110 return DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, LogOfExponent
, LogOfMantissa
);
5113 // No special expansion.
5114 return DAG
.getNode(ISD::FLOG
, dl
, Op
.getValueType(), Op
, Flags
);
5117 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
5118 /// limited-precision mode.
5119 static SDValue
expandLog2(const SDLoc
&dl
, SDValue Op
, SelectionDAG
&DAG
,
5120 const TargetLowering
&TLI
, SDNodeFlags Flags
) {
5121 // TODO: What fast-math-flags should be set on the floating-point nodes?
5123 if (Op
.getValueType() == MVT::f32
&&
5124 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18) {
5125 SDValue Op1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Op
);
5127 // Get the exponent.
5128 SDValue LogOfExponent
= GetExponent(DAG
, Op1
, TLI
, dl
);
5130 // Get the significand and build it into a floating-point number with
5132 SDValue X
= GetSignificand(DAG
, Op1
, dl
);
5134 // Different possible minimax approximations of significand in
5135 // floating-point for various degrees of accuracy over [1,2].
5136 SDValue Log2ofMantissa
;
5137 if (LimitFloatPrecision
<= 6) {
5138 // For floating-point precision of 6:
5140 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
5142 // error 0.0049451742, which is more than 7 bits
5143 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5144 getF32Constant(DAG
, 0xbeb08fe0, dl
));
5145 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5146 getF32Constant(DAG
, 0x40019463, dl
));
5147 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5148 Log2ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5149 getF32Constant(DAG
, 0x3fd6633d, dl
));
5150 } else if (LimitFloatPrecision
<= 12) {
5151 // For floating-point precision of 12:
5157 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
5159 // error 0.0000876136000, which is better than 13 bits
5160 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5161 getF32Constant(DAG
, 0xbda7262e, dl
));
5162 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5163 getF32Constant(DAG
, 0x3f25280b, dl
));
5164 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5165 SDValue t3
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5166 getF32Constant(DAG
, 0x4007b923, dl
));
5167 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5168 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
5169 getF32Constant(DAG
, 0x40823e2f, dl
));
5170 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
5171 Log2ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t6
,
5172 getF32Constant(DAG
, 0x4020d29c, dl
));
5173 } else { // LimitFloatPrecision <= 18
5174 // For floating-point precision of 18:
5183 // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
5185 // error 0.0000018516, which is better than 18 bits
5186 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5187 getF32Constant(DAG
, 0xbcd2769e, dl
));
5188 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5189 getF32Constant(DAG
, 0x3e8ce0b9, dl
));
5190 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5191 SDValue t3
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5192 getF32Constant(DAG
, 0x3fa22ae7, dl
));
5193 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5194 SDValue t5
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t4
,
5195 getF32Constant(DAG
, 0x40525723, dl
));
5196 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
5197 SDValue t7
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t6
,
5198 getF32Constant(DAG
, 0x40aaf200, dl
));
5199 SDValue t8
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t7
, X
);
5200 SDValue t9
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t8
,
5201 getF32Constant(DAG
, 0x40c39dad, dl
));
5202 SDValue t10
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t9
, X
);
5203 Log2ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t10
,
5204 getF32Constant(DAG
, 0x4042902c, dl
));
5207 return DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, LogOfExponent
, Log2ofMantissa
);
5210 // No special expansion.
5211 return DAG
.getNode(ISD::FLOG2
, dl
, Op
.getValueType(), Op
, Flags
);
5214 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
5215 /// limited-precision mode.
5216 static SDValue
expandLog10(const SDLoc
&dl
, SDValue Op
, SelectionDAG
&DAG
,
5217 const TargetLowering
&TLI
, SDNodeFlags Flags
) {
5218 // TODO: What fast-math-flags should be set on the floating-point nodes?
5220 if (Op
.getValueType() == MVT::f32
&&
5221 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18) {
5222 SDValue Op1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Op
);
5224 // Scale the exponent by log10(2) [0.30102999f].
5225 SDValue Exp
= GetExponent(DAG
, Op1
, TLI
, dl
);
5226 SDValue LogOfExponent
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, Exp
,
5227 getF32Constant(DAG
, 0x3e9a209a, dl
));
5229 // Get the significand and build it into a floating-point number with
5231 SDValue X
= GetSignificand(DAG
, Op1
, dl
);
5233 SDValue Log10ofMantissa
;
5234 if (LimitFloatPrecision
<= 6) {
5235 // For floating-point precision of 6:
5237 // Log10ofMantissa =
5239 // (0.60948995f - 0.10380950f * x) * x;
5241 // error 0.0014886165, which is 6 bits
5242 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5243 getF32Constant(DAG
, 0xbdd49a13, dl
));
5244 SDValue t1
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t0
,
5245 getF32Constant(DAG
, 0x3f1c0789, dl
));
5246 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5247 Log10ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t2
,
5248 getF32Constant(DAG
, 0x3f011300, dl
));
5249 } else if (LimitFloatPrecision
<= 12) {
5250 // For floating-point precision of 12:
5252 // Log10ofMantissa =
5255 // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
5257 // error 0.00019228036, which is better than 12 bits
5258 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5259 getF32Constant(DAG
, 0x3d431f31, dl
));
5260 SDValue t1
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t0
,
5261 getF32Constant(DAG
, 0x3ea21fb2, dl
));
5262 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5263 SDValue t3
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t2
,
5264 getF32Constant(DAG
, 0x3f6ae232, dl
));
5265 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5266 Log10ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t4
,
5267 getF32Constant(DAG
, 0x3f25f7c3, dl
));
5268 } else { // LimitFloatPrecision <= 18
5269 // For floating-point precision of 18:
5271 // Log10ofMantissa =
5276 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
5278 // error 0.0000037995730, which is better than 18 bits
5279 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, X
,
5280 getF32Constant(DAG
, 0x3c5d51ce, dl
));
5281 SDValue t1
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t0
,
5282 getF32Constant(DAG
, 0x3e00685a, dl
));
5283 SDValue t2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t1
, X
);
5284 SDValue t3
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t2
,
5285 getF32Constant(DAG
, 0x3efb6798, dl
));
5286 SDValue t4
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t3
, X
);
5287 SDValue t5
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t4
,
5288 getF32Constant(DAG
, 0x3f88d192, dl
));
5289 SDValue t6
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t5
, X
);
5290 SDValue t7
= DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, t6
,
5291 getF32Constant(DAG
, 0x3fc4316c, dl
));
5292 SDValue t8
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, t7
, X
);
5293 Log10ofMantissa
= DAG
.getNode(ISD::FSUB
, dl
, MVT::f32
, t8
,
5294 getF32Constant(DAG
, 0x3f57ce70, dl
));
5297 return DAG
.getNode(ISD::FADD
, dl
, MVT::f32
, LogOfExponent
, Log10ofMantissa
);
5300 // No special expansion.
5301 return DAG
.getNode(ISD::FLOG10
, dl
, Op
.getValueType(), Op
, Flags
);
5304 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
5305 /// limited-precision mode.
5306 static SDValue
expandExp2(const SDLoc
&dl
, SDValue Op
, SelectionDAG
&DAG
,
5307 const TargetLowering
&TLI
, SDNodeFlags Flags
) {
5308 if (Op
.getValueType() == MVT::f32
&&
5309 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18)
5310 return getLimitedPrecisionExp2(Op
, dl
, DAG
);
5312 // No special expansion.
5313 return DAG
.getNode(ISD::FEXP2
, dl
, Op
.getValueType(), Op
, Flags
);
5316 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
5317 /// limited-precision mode with x == 10.0f.
5318 static SDValue
expandPow(const SDLoc
&dl
, SDValue LHS
, SDValue RHS
,
5319 SelectionDAG
&DAG
, const TargetLowering
&TLI
,
5320 SDNodeFlags Flags
) {
5321 bool IsExp10
= false;
5322 if (LHS
.getValueType() == MVT::f32
&& RHS
.getValueType() == MVT::f32
&&
5323 LimitFloatPrecision
> 0 && LimitFloatPrecision
<= 18) {
5324 if (ConstantFPSDNode
*LHSC
= dyn_cast
<ConstantFPSDNode
>(LHS
)) {
5326 IsExp10
= LHSC
->isExactlyValue(Ten
);
5330 // TODO: What fast-math-flags should be set on the FMUL node?
5332 // Put the exponent in the right bit position for later addition to the
5335 // #define LOG2OF10 3.3219281f
5336 // t0 = Op * LOG2OF10;
5337 SDValue t0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::f32
, RHS
,
5338 getF32Constant(DAG
, 0x40549a78, dl
));
5339 return getLimitedPrecisionExp2(t0
, dl
, DAG
);
5342 // No special expansion.
5343 return DAG
.getNode(ISD::FPOW
, dl
, LHS
.getValueType(), LHS
, RHS
, Flags
);
5346 /// ExpandPowI - Expand a llvm.powi intrinsic.
5347 static SDValue
ExpandPowI(const SDLoc
&DL
, SDValue LHS
, SDValue RHS
,
5348 SelectionDAG
&DAG
) {
5349 // If RHS is a constant, we can expand this out to a multiplication tree if
5350 // it's beneficial on the target, otherwise we end up lowering to a call to
5351 // __powidf2 (for example).
5352 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(RHS
)) {
5353 unsigned Val
= RHSC
->getSExtValue();
5355 // powi(x, 0) -> 1.0
5357 return DAG
.getConstantFP(1.0, DL
, LHS
.getValueType());
5359 if (DAG
.getTargetLoweringInfo().isBeneficialToExpandPowI(
5360 Val
, DAG
.shouldOptForSize())) {
5361 // Get the exponent as a positive value.
5364 // We use the simple binary decomposition method to generate the multiply
5365 // sequence. There are more optimal ways to do this (for example,
5366 // powi(x,15) generates one more multiply than it should), but this has
5367 // the benefit of being both really simple and much better than a libcall.
5368 SDValue Res
; // Logically starts equal to 1.0
5369 SDValue CurSquare
= LHS
;
5370 // TODO: Intrinsics should have fast-math-flags that propagate to these
5376 DAG
.getNode(ISD::FMUL
, DL
, Res
.getValueType(), Res
, CurSquare
);
5378 Res
= CurSquare
; // 1.0*CurSquare.
5381 CurSquare
= DAG
.getNode(ISD::FMUL
, DL
, CurSquare
.getValueType(),
5382 CurSquare
, CurSquare
);
5386 // If the original was negative, invert the result, producing 1/(x*x*x).
5387 if (RHSC
->getSExtValue() < 0)
5388 Res
= DAG
.getNode(ISD::FDIV
, DL
, LHS
.getValueType(),
5389 DAG
.getConstantFP(1.0, DL
, LHS
.getValueType()), Res
);
5394 // Otherwise, expand to a libcall.
5395 return DAG
.getNode(ISD::FPOWI
, DL
, LHS
.getValueType(), LHS
, RHS
);
5398 static SDValue
expandDivFix(unsigned Opcode
, const SDLoc
&DL
,
5399 SDValue LHS
, SDValue RHS
, SDValue Scale
,
5400 SelectionDAG
&DAG
, const TargetLowering
&TLI
) {
5401 EVT VT
= LHS
.getValueType();
5402 bool Signed
= Opcode
== ISD::SDIVFIX
|| Opcode
== ISD::SDIVFIXSAT
;
5403 bool Saturating
= Opcode
== ISD::SDIVFIXSAT
|| Opcode
== ISD::UDIVFIXSAT
;
5404 LLVMContext
&Ctx
= *DAG
.getContext();
5406 // If the type is legal but the operation isn't, this node might survive all
5407 // the way to operation legalization. If we end up there and we do not have
5408 // the ability to widen the type (if VT*2 is not legal), we cannot expand the
5411 // Coax the legalizer into expanding the node during type legalization instead
5412 // by bumping the size by one bit. This will force it to Promote, enabling the
5413 // early expansion and avoiding the need to expand later.
5415 // We don't have to do this if Scale is 0; that can always be expanded, unless
5416 // it's a saturating signed operation. Those can experience true integer
5417 // division overflow, a case which we must avoid.
5419 // FIXME: We wouldn't have to do this (or any of the early
5420 // expansion/promotion) if it was possible to expand a libcall of an
5421 // illegal type during operation legalization. But it's not, so things
5423 unsigned ScaleInt
= cast
<ConstantSDNode
>(Scale
)->getZExtValue();
5424 if ((ScaleInt
> 0 || (Saturating
&& Signed
)) &&
5425 (TLI
.isTypeLegal(VT
) ||
5426 (VT
.isVector() && TLI
.isTypeLegal(VT
.getVectorElementType())))) {
5427 TargetLowering::LegalizeAction Action
= TLI
.getFixedPointOperationAction(
5428 Opcode
, VT
, ScaleInt
);
5429 if (Action
!= TargetLowering::Legal
&& Action
!= TargetLowering::Custom
) {
5431 if (VT
.isScalarInteger())
5432 PromVT
= EVT::getIntegerVT(Ctx
, VT
.getSizeInBits() + 1);
5433 else if (VT
.isVector()) {
5434 PromVT
= VT
.getVectorElementType();
5435 PromVT
= EVT::getIntegerVT(Ctx
, PromVT
.getSizeInBits() + 1);
5436 PromVT
= EVT::getVectorVT(Ctx
, PromVT
, VT
.getVectorElementCount());
5438 llvm_unreachable("Wrong VT for DIVFIX?");
5440 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, PromVT
);
5441 RHS
= DAG
.getSExtOrTrunc(RHS
, DL
, PromVT
);
5443 LHS
= DAG
.getZExtOrTrunc(LHS
, DL
, PromVT
);
5444 RHS
= DAG
.getZExtOrTrunc(RHS
, DL
, PromVT
);
5446 EVT ShiftTy
= TLI
.getShiftAmountTy(PromVT
, DAG
.getDataLayout());
5447 // For saturating operations, we need to shift up the LHS to get the
5448 // proper saturation width, and then shift down again afterwards.
5450 LHS
= DAG
.getNode(ISD::SHL
, DL
, PromVT
, LHS
,
5451 DAG
.getConstant(1, DL
, ShiftTy
));
5452 SDValue Res
= DAG
.getNode(Opcode
, DL
, PromVT
, LHS
, RHS
, Scale
);
5454 Res
= DAG
.getNode(Signed
? ISD::SRA
: ISD::SRL
, DL
, PromVT
, Res
,
5455 DAG
.getConstant(1, DL
, ShiftTy
));
5456 return DAG
.getZExtOrTrunc(Res
, DL
, VT
);
5460 return DAG
.getNode(Opcode
, DL
, VT
, LHS
, RHS
, Scale
);
5463 // getUnderlyingArgRegs - Find underlying registers used for a truncated,
5464 // bitcasted, or split argument. Returns a list of <Register, size in bits>
5466 getUnderlyingArgRegs(SmallVectorImpl
<std::pair
<unsigned, TypeSize
>> &Regs
,
5468 switch (N
.getOpcode()) {
5469 case ISD::CopyFromReg
: {
5470 SDValue Op
= N
.getOperand(1);
5471 Regs
.emplace_back(cast
<RegisterSDNode
>(Op
)->getReg(),
5472 Op
.getValueType().getSizeInBits());
5476 case ISD::AssertZext
:
5477 case ISD::AssertSext
:
5479 getUnderlyingArgRegs(Regs
, N
.getOperand(0));
5481 case ISD::BUILD_PAIR
:
5482 case ISD::BUILD_VECTOR
:
5483 case ISD::CONCAT_VECTORS
:
5484 for (SDValue Op
: N
->op_values())
5485 getUnderlyingArgRegs(Regs
, Op
);
5492 /// If the DbgValueInst is a dbg_value of a function argument, create the
5493 /// corresponding DBG_VALUE machine instruction for it now. At the end of
5494 /// instruction selection, they will be inserted to the entry BB.
5495 /// We don't currently support this for variadic dbg_values, as they shouldn't
5496 /// appear for function arguments or in the prologue.
5497 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
5498 const Value
*V
, DILocalVariable
*Variable
, DIExpression
*Expr
,
5499 DILocation
*DL
, FuncArgumentDbgValueKind Kind
, const SDValue
&N
) {
5500 const Argument
*Arg
= dyn_cast
<Argument
>(V
);
5504 MachineFunction
&MF
= DAG
.getMachineFunction();
5505 const TargetInstrInfo
*TII
= DAG
.getSubtarget().getInstrInfo();
5507 // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
5508 // we've been asked to pursue.
5509 auto MakeVRegDbgValue
= [&](Register Reg
, DIExpression
*FragExpr
,
5511 if (Reg
.isVirtual() && MF
.useDebugInstrRef()) {
5512 // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
5513 // pointing at the VReg, which will be patched up later.
5514 auto &Inst
= TII
->get(TargetOpcode::DBG_INSTR_REF
);
5515 auto MIB
= BuildMI(MF
, DL
, Inst
);
5518 MIB
.addMetadata(Variable
);
5519 auto *NewDIExpr
= FragExpr
;
5520 // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
5521 // the DIExpression.
5523 NewDIExpr
= DIExpression::prepend(FragExpr
, DIExpression::DerefBefore
);
5524 MIB
.addMetadata(NewDIExpr
);
5527 // Create a completely standard DBG_VALUE.
5528 auto &Inst
= TII
->get(TargetOpcode::DBG_VALUE
);
5529 return BuildMI(MF
, DL
, Inst
, Indirect
, Reg
, Variable
, FragExpr
);
5533 if (Kind
== FuncArgumentDbgValueKind::Value
) {
5534 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5535 // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
5537 bool IsInEntryBlock
= FuncInfo
.MBB
== &FuncInfo
.MF
->front();
5538 if (!IsInEntryBlock
)
5541 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5542 // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
5543 // variable that also is a param.
5545 // Although, if we are at the top of the entry block already, we can still
5546 // emit using ArgDbgValue. This might catch some situations when the
5547 // dbg.value refers to an argument that isn't used in the entry block, so
5548 // any CopyToReg node would be optimized out and the only way to express
5549 // this DBG_VALUE is by using the physical reg (or FI) as done in this
5550 // method. ArgDbgValues are hoisted to the beginning of the entry block. So
5551 // we should only emit as ArgDbgValue if the Variable is an argument to the
5552 // current function, and the dbg.value intrinsic is found in the entry
5554 bool VariableIsFunctionInputArg
= Variable
->isParameter() &&
5555 !DL
->getInlinedAt();
5556 bool IsInPrologue
= SDNodeOrder
== LowestSDNodeOrder
;
5557 if (!IsInPrologue
&& !VariableIsFunctionInputArg
)
5560 // Here we assume that a function argument on IR level only can be used to
5561 // describe one input parameter on source level. If we for example have
5562 // source code like this
5564 // struct A { long x, y; };
5565 // void foo(struct A a, long b) {
5573 // define void @foo(i32 %a1, i32 %a2, i32 %b) {
5575 // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
5576 // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
5577 // call void @llvm.dbg.value(metadata i32 %b, "b",
5579 // call void @llvm.dbg.value(metadata i32 %a1, "b"
5582 // then the last dbg.value is describing a parameter "b" using a value that
5583 // is an argument. But since we already has used %a1 to describe a parameter
5584 // we should not handle that last dbg.value here (that would result in an
5585 // incorrect hoisting of the DBG_VALUE to the function entry).
5586 // Notice that we allow one dbg.value per IR level argument, to accommodate
5587 // for the situation with fragments above.
5588 if (VariableIsFunctionInputArg
) {
5589 unsigned ArgNo
= Arg
->getArgNo();
5590 if (ArgNo
>= FuncInfo
.DescribedArgs
.size())
5591 FuncInfo
.DescribedArgs
.resize(ArgNo
+ 1, false);
5592 else if (!IsInPrologue
&& FuncInfo
.DescribedArgs
.test(ArgNo
))
5594 FuncInfo
.DescribedArgs
.set(ArgNo
);
5598 bool IsIndirect
= false;
5599 Optional
<MachineOperand
> Op
;
5600 // Some arguments' frame index is recorded during argument lowering.
5601 int FI
= FuncInfo
.getArgumentFrameIndex(Arg
);
5602 if (FI
!= std::numeric_limits
<int>::max())
5603 Op
= MachineOperand::CreateFI(FI
);
5605 SmallVector
<std::pair
<unsigned, TypeSize
>, 8> ArgRegsAndSizes
;
5606 if (!Op
&& N
.getNode()) {
5607 getUnderlyingArgRegs(ArgRegsAndSizes
, N
);
5609 if (ArgRegsAndSizes
.size() == 1)
5610 Reg
= ArgRegsAndSizes
.front().first
;
5612 if (Reg
&& Reg
.isVirtual()) {
5613 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
5614 Register PR
= RegInfo
.getLiveInPhysReg(Reg
);
5619 Op
= MachineOperand::CreateReg(Reg
, false);
5620 IsIndirect
= Kind
!= FuncArgumentDbgValueKind::Value
;
5624 if (!Op
&& N
.getNode()) {
5625 // Check if frame index is available.
5626 SDValue LCandidate
= peekThroughBitcasts(N
);
5627 if (LoadSDNode
*LNode
= dyn_cast
<LoadSDNode
>(LCandidate
.getNode()))
5628 if (FrameIndexSDNode
*FINode
=
5629 dyn_cast
<FrameIndexSDNode
>(LNode
->getBasePtr().getNode()))
5630 Op
= MachineOperand::CreateFI(FINode
->getIndex());
5634 // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
5635 auto splitMultiRegDbgValue
= [&](ArrayRef
<std::pair
<unsigned, TypeSize
>>
5637 unsigned Offset
= 0;
5638 for (const auto &RegAndSize
: SplitRegs
) {
5639 // If the expression is already a fragment, the current register
5640 // offset+size might extend beyond the fragment. In this case, only
5641 // the register bits that are inside the fragment are relevant.
5642 int RegFragmentSizeInBits
= RegAndSize
.second
;
5643 if (auto ExprFragmentInfo
= Expr
->getFragmentInfo()) {
5644 uint64_t ExprFragmentSizeInBits
= ExprFragmentInfo
->SizeInBits
;
5645 // The register is entirely outside the expression fragment,
5646 // so is irrelevant for debug info.
5647 if (Offset
>= ExprFragmentSizeInBits
)
5649 // The register is partially outside the expression fragment, only
5650 // the low bits within the fragment are relevant for debug info.
5651 if (Offset
+ RegFragmentSizeInBits
> ExprFragmentSizeInBits
) {
5652 RegFragmentSizeInBits
= ExprFragmentSizeInBits
- Offset
;
5656 auto FragmentExpr
= DIExpression::createFragmentExpression(
5657 Expr
, Offset
, RegFragmentSizeInBits
);
5658 Offset
+= RegAndSize
.second
;
5659 // If a valid fragment expression cannot be created, the variable's
5660 // correct value cannot be determined and so it is set as Undef.
5661 if (!FragmentExpr
) {
5662 SDDbgValue
*SDV
= DAG
.getConstantDbgValue(
5663 Variable
, Expr
, UndefValue::get(V
->getType()), DL
, SDNodeOrder
);
5664 DAG
.AddDbgValue(SDV
, false);
5667 MachineInstr
*NewMI
=
5668 MakeVRegDbgValue(RegAndSize
.first
, *FragmentExpr
,
5669 Kind
!= FuncArgumentDbgValueKind::Value
);
5670 FuncInfo
.ArgDbgValues
.push_back(NewMI
);
5674 // Check if ValueMap has reg number.
5675 DenseMap
<const Value
*, Register
>::const_iterator
5676 VMI
= FuncInfo
.ValueMap
.find(V
);
5677 if (VMI
!= FuncInfo
.ValueMap
.end()) {
5678 const auto &TLI
= DAG
.getTargetLoweringInfo();
5679 RegsForValue
RFV(V
->getContext(), TLI
, DAG
.getDataLayout(), VMI
->second
,
5680 V
->getType(), None
);
5681 if (RFV
.occupiesMultipleRegs()) {
5682 splitMultiRegDbgValue(RFV
.getRegsAndSizes());
5686 Op
= MachineOperand::CreateReg(VMI
->second
, false);
5687 IsIndirect
= Kind
!= FuncArgumentDbgValueKind::Value
;
5688 } else if (ArgRegsAndSizes
.size() > 1) {
5689 // This was split due to the calling convention, and no virtual register
5690 // mapping exists for the value.
5691 splitMultiRegDbgValue(ArgRegsAndSizes
);
5699 assert(Variable
->isValidLocationForIntrinsic(DL
) &&
5700 "Expected inlined-at fields to agree");
5701 MachineInstr
*NewMI
= nullptr;
5704 NewMI
= MakeVRegDbgValue(Op
->getReg(), Expr
, IsIndirect
);
5706 NewMI
= BuildMI(MF
, DL
, TII
->get(TargetOpcode::DBG_VALUE
), true, *Op
,
5709 // Otherwise, use ArgDbgValues.
5710 FuncInfo
.ArgDbgValues
.push_back(NewMI
);
5714 /// Return the appropriate SDDbgValue based on N.
5715 SDDbgValue
*SelectionDAGBuilder::getDbgValue(SDValue N
,
5716 DILocalVariable
*Variable
,
5719 unsigned DbgSDNodeOrder
) {
5720 if (auto *FISDN
= dyn_cast
<FrameIndexSDNode
>(N
.getNode())) {
5721 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
5722 // stack slot locations.
5724 // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
5725 // debug values here after optimization:
5727 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
5728 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
5730 // Both describe the direct values of their associated variables.
5731 return DAG
.getFrameIndexDbgValue(Variable
, Expr
, FISDN
->getIndex(),
5732 /*IsIndirect*/ false, dl
, DbgSDNodeOrder
);
5734 return DAG
.getDbgValue(Variable
, Expr
, N
.getNode(), N
.getResNo(),
5735 /*IsIndirect*/ false, dl
, DbgSDNodeOrder
);
5738 static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic
) {
5739 switch (Intrinsic
) {
5740 case Intrinsic::smul_fix
:
5741 return ISD::SMULFIX
;
5742 case Intrinsic::umul_fix
:
5743 return ISD::UMULFIX
;
5744 case Intrinsic::smul_fix_sat
:
5745 return ISD::SMULFIXSAT
;
5746 case Intrinsic::umul_fix_sat
:
5747 return ISD::UMULFIXSAT
;
5748 case Intrinsic::sdiv_fix
:
5749 return ISD::SDIVFIX
;
5750 case Intrinsic::udiv_fix
:
5751 return ISD::UDIVFIX
;
5752 case Intrinsic::sdiv_fix_sat
:
5753 return ISD::SDIVFIXSAT
;
5754 case Intrinsic::udiv_fix_sat
:
5755 return ISD::UDIVFIXSAT
;
5757 llvm_unreachable("Unhandled fixed point intrinsic");
5761 void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst
&I
,
5762 const char *FunctionName
) {
5763 assert(FunctionName
&& "FunctionName must not be nullptr");
5764 SDValue Callee
= DAG
.getExternalSymbol(
5766 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()));
5767 LowerCallTo(I
, Callee
, I
.isTailCall(), I
.isMustTailCall());
5770 /// Given a @llvm.call.preallocated.setup, return the corresponding
5771 /// preallocated call.
5772 static const CallBase
*FindPreallocatedCall(const Value
*PreallocatedSetup
) {
5773 assert(cast
<CallBase
>(PreallocatedSetup
)
5774 ->getCalledFunction()
5775 ->getIntrinsicID() == Intrinsic::call_preallocated_setup
&&
5776 "expected call_preallocated_setup Value");
5777 for (auto *U
: PreallocatedSetup
->users()) {
5778 auto *UseCall
= cast
<CallBase
>(U
);
5779 const Function
*Fn
= UseCall
->getCalledFunction();
5780 if (!Fn
|| Fn
->getIntrinsicID() != Intrinsic::call_preallocated_arg
) {
5784 llvm_unreachable("expected corresponding call to preallocated setup/arg");
5787 /// Lower the call to the specified intrinsic function.
5788 void SelectionDAGBuilder::visitIntrinsicCall(const CallInst
&I
,
5789 unsigned Intrinsic
) {
5790 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
5791 SDLoc sdl
= getCurSDLoc();
5792 DebugLoc dl
= getCurDebugLoc();
5796 if (auto *FPOp
= dyn_cast
<FPMathOperator
>(&I
))
5797 Flags
.copyFMF(*FPOp
);
5799 switch (Intrinsic
) {
5801 // By default, turn this into a target intrinsic node.
5802 visitTargetIntrinsic(I
, Intrinsic
);
5804 case Intrinsic::vscale
: {
5805 match(&I
, m_VScale(DAG
.getDataLayout()));
5806 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
5807 setValue(&I
, DAG
.getVScale(sdl
, VT
, APInt(VT
.getSizeInBits(), 1)));
5810 case Intrinsic::vastart
: visitVAStart(I
); return;
5811 case Intrinsic::vaend
: visitVAEnd(I
); return;
5812 case Intrinsic::vacopy
: visitVACopy(I
); return;
5813 case Intrinsic::returnaddress
:
5814 setValue(&I
, DAG
.getNode(ISD::RETURNADDR
, sdl
,
5815 TLI
.getValueType(DAG
.getDataLayout(), I
.getType()),
5816 getValue(I
.getArgOperand(0))));
5818 case Intrinsic::addressofreturnaddress
:
5820 DAG
.getNode(ISD::ADDROFRETURNADDR
, sdl
,
5821 TLI
.getValueType(DAG
.getDataLayout(), I
.getType())));
5823 case Intrinsic::sponentry
:
5825 DAG
.getNode(ISD::SPONENTRY
, sdl
,
5826 TLI
.getValueType(DAG
.getDataLayout(), I
.getType())));
5828 case Intrinsic::frameaddress
:
5829 setValue(&I
, DAG
.getNode(ISD::FRAMEADDR
, sdl
,
5830 TLI
.getFrameIndexTy(DAG
.getDataLayout()),
5831 getValue(I
.getArgOperand(0))));
5833 case Intrinsic::read_volatile_register
:
5834 case Intrinsic::read_register
: {
5835 Value
*Reg
= I
.getArgOperand(0);
5836 SDValue Chain
= getRoot();
5838 DAG
.getMDNode(cast
<MDNode
>(cast
<MetadataAsValue
>(Reg
)->getMetadata()));
5839 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
5840 Res
= DAG
.getNode(ISD::READ_REGISTER
, sdl
,
5841 DAG
.getVTList(VT
, MVT::Other
), Chain
, RegName
);
5843 DAG
.setRoot(Res
.getValue(1));
5846 case Intrinsic::write_register
: {
5847 Value
*Reg
= I
.getArgOperand(0);
5848 Value
*RegValue
= I
.getArgOperand(1);
5849 SDValue Chain
= getRoot();
5851 DAG
.getMDNode(cast
<MDNode
>(cast
<MetadataAsValue
>(Reg
)->getMetadata()));
5852 DAG
.setRoot(DAG
.getNode(ISD::WRITE_REGISTER
, sdl
, MVT::Other
, Chain
,
5853 RegName
, getValue(RegValue
)));
5856 case Intrinsic::memcpy
: {
5857 const auto &MCI
= cast
<MemCpyInst
>(I
);
5858 SDValue Op1
= getValue(I
.getArgOperand(0));
5859 SDValue Op2
= getValue(I
.getArgOperand(1));
5860 SDValue Op3
= getValue(I
.getArgOperand(2));
5861 // @llvm.memcpy defines 0 and 1 to both mean no alignment.
5862 Align DstAlign
= MCI
.getDestAlign().valueOrOne();
5863 Align SrcAlign
= MCI
.getSourceAlign().valueOrOne();
5864 Align Alignment
= commonAlignment(DstAlign
, SrcAlign
);
5865 bool isVol
= MCI
.isVolatile();
5866 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5867 // FIXME: Support passing different dest/src alignments to the memcpy DAG
5869 SDValue Root
= isVol
? getRoot() : getMemoryRoot();
5870 SDValue MC
= DAG
.getMemcpy(Root
, sdl
, Op1
, Op2
, Op3
, Alignment
, isVol
,
5871 /* AlwaysInline */ false, isTC
,
5872 MachinePointerInfo(I
.getArgOperand(0)),
5873 MachinePointerInfo(I
.getArgOperand(1)),
5875 updateDAGForMaybeTailCall(MC
);
5878 case Intrinsic::memcpy_inline
: {
5879 const auto &MCI
= cast
<MemCpyInlineInst
>(I
);
5880 SDValue Dst
= getValue(I
.getArgOperand(0));
5881 SDValue Src
= getValue(I
.getArgOperand(1));
5882 SDValue Size
= getValue(I
.getArgOperand(2));
5883 assert(isa
<ConstantSDNode
>(Size
) && "memcpy_inline needs constant size");
5884 // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
5885 Align DstAlign
= MCI
.getDestAlign().valueOrOne();
5886 Align SrcAlign
= MCI
.getSourceAlign().valueOrOne();
5887 Align Alignment
= commonAlignment(DstAlign
, SrcAlign
);
5888 bool isVol
= MCI
.isVolatile();
5889 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5890 // FIXME: Support passing different dest/src alignments to the memcpy DAG
5892 SDValue MC
= DAG
.getMemcpy(getRoot(), sdl
, Dst
, Src
, Size
, Alignment
, isVol
,
5893 /* AlwaysInline */ true, isTC
,
5894 MachinePointerInfo(I
.getArgOperand(0)),
5895 MachinePointerInfo(I
.getArgOperand(1)),
5897 updateDAGForMaybeTailCall(MC
);
5900 case Intrinsic::memset
: {
5901 const auto &MSI
= cast
<MemSetInst
>(I
);
5902 SDValue Op1
= getValue(I
.getArgOperand(0));
5903 SDValue Op2
= getValue(I
.getArgOperand(1));
5904 SDValue Op3
= getValue(I
.getArgOperand(2));
5905 // @llvm.memset defines 0 and 1 to both mean no alignment.
5906 Align Alignment
= MSI
.getDestAlign().valueOrOne();
5907 bool isVol
= MSI
.isVolatile();
5908 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5909 SDValue Root
= isVol
? getRoot() : getMemoryRoot();
5910 SDValue MS
= DAG
.getMemset(
5911 Root
, sdl
, Op1
, Op2
, Op3
, Alignment
, isVol
, /* AlwaysInline */ false,
5912 isTC
, MachinePointerInfo(I
.getArgOperand(0)), I
.getAAMetadata());
5913 updateDAGForMaybeTailCall(MS
);
5916 case Intrinsic::memset_inline
: {
5917 const auto &MSII
= cast
<MemSetInlineInst
>(I
);
5918 SDValue Dst
= getValue(I
.getArgOperand(0));
5919 SDValue Value
= getValue(I
.getArgOperand(1));
5920 SDValue Size
= getValue(I
.getArgOperand(2));
5921 assert(isa
<ConstantSDNode
>(Size
) && "memset_inline needs constant size");
5922 // @llvm.memset defines 0 and 1 to both mean no alignment.
5923 Align DstAlign
= MSII
.getDestAlign().valueOrOne();
5924 bool isVol
= MSII
.isVolatile();
5925 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5926 SDValue Root
= isVol
? getRoot() : getMemoryRoot();
5927 SDValue MC
= DAG
.getMemset(Root
, sdl
, Dst
, Value
, Size
, DstAlign
, isVol
,
5928 /* AlwaysInline */ true, isTC
,
5929 MachinePointerInfo(I
.getArgOperand(0)),
5931 updateDAGForMaybeTailCall(MC
);
5934 case Intrinsic::memmove
: {
5935 const auto &MMI
= cast
<MemMoveInst
>(I
);
5936 SDValue Op1
= getValue(I
.getArgOperand(0));
5937 SDValue Op2
= getValue(I
.getArgOperand(1));
5938 SDValue Op3
= getValue(I
.getArgOperand(2));
5939 // @llvm.memmove defines 0 and 1 to both mean no alignment.
5940 Align DstAlign
= MMI
.getDestAlign().valueOrOne();
5941 Align SrcAlign
= MMI
.getSourceAlign().valueOrOne();
5942 Align Alignment
= commonAlignment(DstAlign
, SrcAlign
);
5943 bool isVol
= MMI
.isVolatile();
5944 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5945 // FIXME: Support passing different dest/src alignments to the memmove DAG
5947 SDValue Root
= isVol
? getRoot() : getMemoryRoot();
5948 SDValue MM
= DAG
.getMemmove(Root
, sdl
, Op1
, Op2
, Op3
, Alignment
, isVol
,
5949 isTC
, MachinePointerInfo(I
.getArgOperand(0)),
5950 MachinePointerInfo(I
.getArgOperand(1)),
5952 updateDAGForMaybeTailCall(MM
);
5955 case Intrinsic::memcpy_element_unordered_atomic
: {
5956 const AtomicMemCpyInst
&MI
= cast
<AtomicMemCpyInst
>(I
);
5957 SDValue Dst
= getValue(MI
.getRawDest());
5958 SDValue Src
= getValue(MI
.getRawSource());
5959 SDValue Length
= getValue(MI
.getLength());
5961 Type
*LengthTy
= MI
.getLength()->getType();
5962 unsigned ElemSz
= MI
.getElementSizeInBytes();
5963 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5965 DAG
.getAtomicMemcpy(getRoot(), sdl
, Dst
, Src
, Length
, LengthTy
, ElemSz
,
5966 isTC
, MachinePointerInfo(MI
.getRawDest()),
5967 MachinePointerInfo(MI
.getRawSource()));
5968 updateDAGForMaybeTailCall(MC
);
5971 case Intrinsic::memmove_element_unordered_atomic
: {
5972 auto &MI
= cast
<AtomicMemMoveInst
>(I
);
5973 SDValue Dst
= getValue(MI
.getRawDest());
5974 SDValue Src
= getValue(MI
.getRawSource());
5975 SDValue Length
= getValue(MI
.getLength());
5977 Type
*LengthTy
= MI
.getLength()->getType();
5978 unsigned ElemSz
= MI
.getElementSizeInBytes();
5979 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5981 DAG
.getAtomicMemmove(getRoot(), sdl
, Dst
, Src
, Length
, LengthTy
, ElemSz
,
5982 isTC
, MachinePointerInfo(MI
.getRawDest()),
5983 MachinePointerInfo(MI
.getRawSource()));
5984 updateDAGForMaybeTailCall(MC
);
5987 case Intrinsic::memset_element_unordered_atomic
: {
5988 auto &MI
= cast
<AtomicMemSetInst
>(I
);
5989 SDValue Dst
= getValue(MI
.getRawDest());
5990 SDValue Val
= getValue(MI
.getValue());
5991 SDValue Length
= getValue(MI
.getLength());
5993 Type
*LengthTy
= MI
.getLength()->getType();
5994 unsigned ElemSz
= MI
.getElementSizeInBytes();
5995 bool isTC
= I
.isTailCall() && isInTailCallPosition(I
, DAG
.getTarget());
5997 DAG
.getAtomicMemset(getRoot(), sdl
, Dst
, Val
, Length
, LengthTy
, ElemSz
,
5998 isTC
, MachinePointerInfo(MI
.getRawDest()));
5999 updateDAGForMaybeTailCall(MC
);
6002 case Intrinsic::call_preallocated_setup
: {
6003 const CallBase
*PreallocatedCall
= FindPreallocatedCall(&I
);
6004 SDValue SrcValue
= DAG
.getSrcValue(PreallocatedCall
);
6005 SDValue Res
= DAG
.getNode(ISD::PREALLOCATED_SETUP
, sdl
, MVT::Other
,
6006 getRoot(), SrcValue
);
6011 case Intrinsic::call_preallocated_arg
: {
6012 const CallBase
*PreallocatedCall
= FindPreallocatedCall(I
.getOperand(0));
6013 SDValue SrcValue
= DAG
.getSrcValue(PreallocatedCall
);
6017 Ops
[2] = DAG
.getTargetConstant(*cast
<ConstantInt
>(I
.getArgOperand(1)), sdl
,
6018 MVT::i32
); // arg index
6019 SDValue Res
= DAG
.getNode(
6020 ISD::PREALLOCATED_ARG
, sdl
,
6021 DAG
.getVTList(TLI
.getPointerTy(DAG
.getDataLayout()), MVT::Other
), Ops
);
6023 DAG
.setRoot(Res
.getValue(1));
6026 case Intrinsic::dbg_addr
:
6027 case Intrinsic::dbg_declare
: {
6028 // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
6029 // they are non-variadic.
6030 const auto &DI
= cast
<DbgVariableIntrinsic
>(I
);
6031 assert(!DI
.hasArgList() && "Only dbg.value should currently use DIArgList");
6032 DILocalVariable
*Variable
= DI
.getVariable();
6033 DIExpression
*Expression
= DI
.getExpression();
6034 dropDanglingDebugInfo(Variable
, Expression
);
6035 assert(Variable
&& "Missing variable");
6036 LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
6038 // Check if address has undef value.
6039 const Value
*Address
= DI
.getVariableLocationOp(0);
6040 if (!Address
|| isa
<UndefValue
>(Address
) ||
6041 (Address
->use_empty() && !isa
<Argument
>(Address
))) {
6042 LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
6043 << " (bad/undef/unused-arg address)\n");
6047 bool isParameter
= Variable
->isParameter() || isa
<Argument
>(Address
);
6049 // Check if this variable can be described by a frame index, typically
6050 // either as a static alloca or a byval parameter.
6051 int FI
= std::numeric_limits
<int>::max();
6052 if (const auto *AI
=
6053 dyn_cast
<AllocaInst
>(Address
->stripInBoundsConstantOffsets())) {
6054 if (AI
->isStaticAlloca()) {
6055 auto I
= FuncInfo
.StaticAllocaMap
.find(AI
);
6056 if (I
!= FuncInfo
.StaticAllocaMap
.end())
6059 } else if (const auto *Arg
= dyn_cast
<Argument
>(
6060 Address
->stripInBoundsConstantOffsets())) {
6061 FI
= FuncInfo
.getArgumentFrameIndex(Arg
);
6064 // llvm.dbg.addr is control dependent and always generates indirect
6065 // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
6066 // the MachineFunction variable table.
6067 if (FI
!= std::numeric_limits
<int>::max()) {
6068 if (Intrinsic
== Intrinsic::dbg_addr
) {
6069 SDDbgValue
*SDV
= DAG
.getFrameIndexDbgValue(
6070 Variable
, Expression
, FI
, getRoot().getNode(), /*IsIndirect*/ true,
6072 DAG
.AddDbgValue(SDV
, isParameter
);
6074 LLVM_DEBUG(dbgs() << "Skipping " << DI
6075 << " (variable info stashed in MF side table)\n");
6080 SDValue
&N
= NodeMap
[Address
];
6081 if (!N
.getNode() && isa
<Argument
>(Address
))
6082 // Check unused arguments map.
6083 N
= UnusedArgNodeMap
[Address
];
6086 if (const BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(Address
))
6087 Address
= BCI
->getOperand(0);
6088 // Parameters are handled specially.
6089 auto FINode
= dyn_cast
<FrameIndexSDNode
>(N
.getNode());
6090 if (isParameter
&& FINode
) {
6091 // Byval parameter. We have a frame index at this point.
6093 DAG
.getFrameIndexDbgValue(Variable
, Expression
, FINode
->getIndex(),
6094 /*IsIndirect*/ true, dl
, SDNodeOrder
);
6095 } else if (isa
<Argument
>(Address
)) {
6096 // Address is an argument, so try to emit its dbg value using
6097 // virtual register info from the FuncInfo.ValueMap.
6098 EmitFuncArgumentDbgValue(Address
, Variable
, Expression
, dl
,
6099 FuncArgumentDbgValueKind::Declare
, N
);
6102 SDV
= DAG
.getDbgValue(Variable
, Expression
, N
.getNode(), N
.getResNo(),
6103 true, dl
, SDNodeOrder
);
6105 DAG
.AddDbgValue(SDV
, isParameter
);
6107 // If Address is an argument then try to emit its dbg value using
6108 // virtual register info from the FuncInfo.ValueMap.
6109 if (!EmitFuncArgumentDbgValue(Address
, Variable
, Expression
, dl
,
6110 FuncArgumentDbgValueKind::Declare
, N
)) {
6111 LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
6112 << " (could not emit func-arg dbg_value)\n");
6117 case Intrinsic::dbg_label
: {
6118 const DbgLabelInst
&DI
= cast
<DbgLabelInst
>(I
);
6119 DILabel
*Label
= DI
.getLabel();
6120 assert(Label
&& "Missing label");
6123 SDV
= DAG
.getDbgLabel(Label
, dl
, SDNodeOrder
);
6124 DAG
.AddDbgLabel(SDV
);
6127 case Intrinsic::dbg_value
: {
6128 const DbgValueInst
&DI
= cast
<DbgValueInst
>(I
);
6129 assert(DI
.getVariable() && "Missing variable");
6131 DILocalVariable
*Variable
= DI
.getVariable();
6132 DIExpression
*Expression
= DI
.getExpression();
6133 dropDanglingDebugInfo(Variable
, Expression
);
6134 SmallVector
<Value
*, 4> Values(DI
.getValues());
6138 if (llvm::is_contained(Values
, nullptr))
6141 bool IsVariadic
= DI
.hasArgList();
6142 if (!handleDebugValue(Values
, Variable
, Expression
, dl
, DI
.getDebugLoc(),
6143 SDNodeOrder
, IsVariadic
))
6144 addDanglingDebugInfo(&DI
, dl
, SDNodeOrder
);
6148 case Intrinsic::eh_typeid_for
: {
6149 // Find the type id for the given typeinfo.
6150 GlobalValue
*GV
= ExtractTypeInfo(I
.getArgOperand(0));
6151 unsigned TypeID
= DAG
.getMachineFunction().getTypeIDFor(GV
);
6152 Res
= DAG
.getConstant(TypeID
, sdl
, MVT::i32
);
6157 case Intrinsic::eh_return_i32
:
6158 case Intrinsic::eh_return_i64
:
6159 DAG
.getMachineFunction().setCallsEHReturn(true);
6160 DAG
.setRoot(DAG
.getNode(ISD::EH_RETURN
, sdl
,
6163 getValue(I
.getArgOperand(0)),
6164 getValue(I
.getArgOperand(1))));
6166 case Intrinsic::eh_unwind_init
:
6167 DAG
.getMachineFunction().setCallsUnwindInit(true);
6169 case Intrinsic::eh_dwarf_cfa
:
6170 setValue(&I
, DAG
.getNode(ISD::EH_DWARF_CFA
, sdl
,
6171 TLI
.getPointerTy(DAG
.getDataLayout()),
6172 getValue(I
.getArgOperand(0))));
6174 case Intrinsic::eh_sjlj_callsite
: {
6175 MachineModuleInfo
&MMI
= DAG
.getMachineFunction().getMMI();
6176 ConstantInt
*CI
= cast
<ConstantInt
>(I
.getArgOperand(0));
6177 assert(MMI
.getCurrentCallSite() == 0 && "Overlapping call sites!");
6179 MMI
.setCurrentCallSite(CI
->getZExtValue());
6182 case Intrinsic::eh_sjlj_functioncontext
: {
6183 // Get and store the index of the function context.
6184 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
6186 cast
<AllocaInst
>(I
.getArgOperand(0)->stripPointerCasts());
6187 int FI
= FuncInfo
.StaticAllocaMap
[FnCtx
];
6188 MFI
.setFunctionContextIndex(FI
);
6191 case Intrinsic::eh_sjlj_setjmp
: {
6194 Ops
[1] = getValue(I
.getArgOperand(0));
6195 SDValue Op
= DAG
.getNode(ISD::EH_SJLJ_SETJMP
, sdl
,
6196 DAG
.getVTList(MVT::i32
, MVT::Other
), Ops
);
6197 setValue(&I
, Op
.getValue(0));
6198 DAG
.setRoot(Op
.getValue(1));
6201 case Intrinsic::eh_sjlj_longjmp
:
6202 DAG
.setRoot(DAG
.getNode(ISD::EH_SJLJ_LONGJMP
, sdl
, MVT::Other
,
6203 getRoot(), getValue(I
.getArgOperand(0))));
6205 case Intrinsic::eh_sjlj_setup_dispatch
:
6206 DAG
.setRoot(DAG
.getNode(ISD::EH_SJLJ_SETUP_DISPATCH
, sdl
, MVT::Other
,
6209 case Intrinsic::masked_gather
:
6210 visitMaskedGather(I
);
6212 case Intrinsic::masked_load
:
6215 case Intrinsic::masked_scatter
:
6216 visitMaskedScatter(I
);
6218 case Intrinsic::masked_store
:
6219 visitMaskedStore(I
);
6221 case Intrinsic::masked_expandload
:
6222 visitMaskedLoad(I
, true /* IsExpanding */);
6224 case Intrinsic::masked_compressstore
:
6225 visitMaskedStore(I
, true /* IsCompressing */);
6227 case Intrinsic::powi
:
6228 setValue(&I
, ExpandPowI(sdl
, getValue(I
.getArgOperand(0)),
6229 getValue(I
.getArgOperand(1)), DAG
));
6231 case Intrinsic::log
:
6232 setValue(&I
, expandLog(sdl
, getValue(I
.getArgOperand(0)), DAG
, TLI
, Flags
));
6234 case Intrinsic::log2
:
6236 expandLog2(sdl
, getValue(I
.getArgOperand(0)), DAG
, TLI
, Flags
));
6238 case Intrinsic::log10
:
6240 expandLog10(sdl
, getValue(I
.getArgOperand(0)), DAG
, TLI
, Flags
));
6242 case Intrinsic::exp
:
6243 setValue(&I
, expandExp(sdl
, getValue(I
.getArgOperand(0)), DAG
, TLI
, Flags
));
6245 case Intrinsic::exp2
:
6247 expandExp2(sdl
, getValue(I
.getArgOperand(0)), DAG
, TLI
, Flags
));
6249 case Intrinsic::pow
:
6250 setValue(&I
, expandPow(sdl
, getValue(I
.getArgOperand(0)),
6251 getValue(I
.getArgOperand(1)), DAG
, TLI
, Flags
));
6253 case Intrinsic::sqrt
:
6254 case Intrinsic::fabs
:
6255 case Intrinsic::sin
:
6256 case Intrinsic::cos
:
6257 case Intrinsic::floor
:
6258 case Intrinsic::ceil
:
6259 case Intrinsic::trunc
:
6260 case Intrinsic::rint
:
6261 case Intrinsic::nearbyint
:
6262 case Intrinsic::round
:
6263 case Intrinsic::roundeven
:
6264 case Intrinsic::canonicalize
: {
6266 switch (Intrinsic
) {
6267 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6268 case Intrinsic::sqrt
: Opcode
= ISD::FSQRT
; break;
6269 case Intrinsic::fabs
: Opcode
= ISD::FABS
; break;
6270 case Intrinsic::sin
: Opcode
= ISD::FSIN
; break;
6271 case Intrinsic::cos
: Opcode
= ISD::FCOS
; break;
6272 case Intrinsic::floor
: Opcode
= ISD::FFLOOR
; break;
6273 case Intrinsic::ceil
: Opcode
= ISD::FCEIL
; break;
6274 case Intrinsic::trunc
: Opcode
= ISD::FTRUNC
; break;
6275 case Intrinsic::rint
: Opcode
= ISD::FRINT
; break;
6276 case Intrinsic::nearbyint
: Opcode
= ISD::FNEARBYINT
; break;
6277 case Intrinsic::round
: Opcode
= ISD::FROUND
; break;
6278 case Intrinsic::roundeven
: Opcode
= ISD::FROUNDEVEN
; break;
6279 case Intrinsic::canonicalize
: Opcode
= ISD::FCANONICALIZE
; break;
6282 setValue(&I
, DAG
.getNode(Opcode
, sdl
,
6283 getValue(I
.getArgOperand(0)).getValueType(),
6284 getValue(I
.getArgOperand(0)), Flags
));
6287 case Intrinsic::lround
:
6288 case Intrinsic::llround
:
6289 case Intrinsic::lrint
:
6290 case Intrinsic::llrint
: {
6292 switch (Intrinsic
) {
6293 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6294 case Intrinsic::lround
: Opcode
= ISD::LROUND
; break;
6295 case Intrinsic::llround
: Opcode
= ISD::LLROUND
; break;
6296 case Intrinsic::lrint
: Opcode
= ISD::LRINT
; break;
6297 case Intrinsic::llrint
: Opcode
= ISD::LLRINT
; break;
6300 EVT RetVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6301 setValue(&I
, DAG
.getNode(Opcode
, sdl
, RetVT
,
6302 getValue(I
.getArgOperand(0))));
6305 case Intrinsic::minnum
:
6306 setValue(&I
, DAG
.getNode(ISD::FMINNUM
, sdl
,
6307 getValue(I
.getArgOperand(0)).getValueType(),
6308 getValue(I
.getArgOperand(0)),
6309 getValue(I
.getArgOperand(1)), Flags
));
6311 case Intrinsic::maxnum
:
6312 setValue(&I
, DAG
.getNode(ISD::FMAXNUM
, sdl
,
6313 getValue(I
.getArgOperand(0)).getValueType(),
6314 getValue(I
.getArgOperand(0)),
6315 getValue(I
.getArgOperand(1)), Flags
));
6317 case Intrinsic::minimum
:
6318 setValue(&I
, DAG
.getNode(ISD::FMINIMUM
, sdl
,
6319 getValue(I
.getArgOperand(0)).getValueType(),
6320 getValue(I
.getArgOperand(0)),
6321 getValue(I
.getArgOperand(1)), Flags
));
6323 case Intrinsic::maximum
:
6324 setValue(&I
, DAG
.getNode(ISD::FMAXIMUM
, sdl
,
6325 getValue(I
.getArgOperand(0)).getValueType(),
6326 getValue(I
.getArgOperand(0)),
6327 getValue(I
.getArgOperand(1)), Flags
));
6329 case Intrinsic::copysign
:
6330 setValue(&I
, DAG
.getNode(ISD::FCOPYSIGN
, sdl
,
6331 getValue(I
.getArgOperand(0)).getValueType(),
6332 getValue(I
.getArgOperand(0)),
6333 getValue(I
.getArgOperand(1)), Flags
));
6335 case Intrinsic::arithmetic_fence
: {
6336 setValue(&I
, DAG
.getNode(ISD::ARITH_FENCE
, sdl
,
6337 getValue(I
.getArgOperand(0)).getValueType(),
6338 getValue(I
.getArgOperand(0)), Flags
));
6341 case Intrinsic::fma
:
6342 setValue(&I
, DAG
.getNode(
6343 ISD::FMA
, sdl
, getValue(I
.getArgOperand(0)).getValueType(),
6344 getValue(I
.getArgOperand(0)), getValue(I
.getArgOperand(1)),
6345 getValue(I
.getArgOperand(2)), Flags
));
6347 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
6348 case Intrinsic::INTRINSIC:
6349 #include "llvm/IR/ConstrainedOps.def"
6350 visitConstrainedFPIntrinsic(cast
<ConstrainedFPIntrinsic
>(I
));
6352 #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
6353 #include "llvm/IR/VPIntrinsics.def"
6354 visitVectorPredicationIntrinsic(cast
<VPIntrinsic
>(I
));
6356 case Intrinsic::fptrunc_round
: {
6357 // Get the last argument, the metadata and convert it to an integer in the
6359 Metadata
*MD
= cast
<MetadataAsValue
>(I
.getArgOperand(1))->getMetadata();
6360 Optional
<RoundingMode
> RoundMode
=
6361 convertStrToRoundingMode(cast
<MDString
>(MD
)->getString());
6363 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6365 // Propagate fast-math-flags from IR to node(s).
6367 Flags
.copyFMF(*cast
<FPMathOperator
>(&I
));
6368 SelectionDAG::FlagInserter
FlagsInserter(DAG
, Flags
);
6371 Result
= DAG
.getNode(
6372 ISD::FPTRUNC_ROUND
, sdl
, VT
, getValue(I
.getArgOperand(0)),
6373 DAG
.getTargetConstant((int)*RoundMode
, sdl
,
6374 TLI
.getPointerTy(DAG
.getDataLayout())));
6375 setValue(&I
, Result
);
6379 case Intrinsic::fmuladd
: {
6380 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6381 if (TM
.Options
.AllowFPOpFusion
!= FPOpFusion::Strict
&&
6382 TLI
.isFMAFasterThanFMulAndFAdd(DAG
.getMachineFunction(), VT
)) {
6383 setValue(&I
, DAG
.getNode(ISD::FMA
, sdl
,
6384 getValue(I
.getArgOperand(0)).getValueType(),
6385 getValue(I
.getArgOperand(0)),
6386 getValue(I
.getArgOperand(1)),
6387 getValue(I
.getArgOperand(2)), Flags
));
6389 // TODO: Intrinsic calls should have fast-math-flags.
6390 SDValue Mul
= DAG
.getNode(
6391 ISD::FMUL
, sdl
, getValue(I
.getArgOperand(0)).getValueType(),
6392 getValue(I
.getArgOperand(0)), getValue(I
.getArgOperand(1)), Flags
);
6393 SDValue Add
= DAG
.getNode(ISD::FADD
, sdl
,
6394 getValue(I
.getArgOperand(0)).getValueType(),
6395 Mul
, getValue(I
.getArgOperand(2)), Flags
);
6400 case Intrinsic::convert_to_fp16
:
6401 setValue(&I
, DAG
.getNode(ISD::BITCAST
, sdl
, MVT::i16
,
6402 DAG
.getNode(ISD::FP_ROUND
, sdl
, MVT::f16
,
6403 getValue(I
.getArgOperand(0)),
6404 DAG
.getTargetConstant(0, sdl
,
6407 case Intrinsic::convert_from_fp16
:
6408 setValue(&I
, DAG
.getNode(ISD::FP_EXTEND
, sdl
,
6409 TLI
.getValueType(DAG
.getDataLayout(), I
.getType()),
6410 DAG
.getNode(ISD::BITCAST
, sdl
, MVT::f16
,
6411 getValue(I
.getArgOperand(0)))));
6413 case Intrinsic::fptosi_sat
: {
6414 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6415 setValue(&I
, DAG
.getNode(ISD::FP_TO_SINT_SAT
, sdl
, VT
,
6416 getValue(I
.getArgOperand(0)),
6417 DAG
.getValueType(VT
.getScalarType())));
6420 case Intrinsic::fptoui_sat
: {
6421 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6422 setValue(&I
, DAG
.getNode(ISD::FP_TO_UINT_SAT
, sdl
, VT
,
6423 getValue(I
.getArgOperand(0)),
6424 DAG
.getValueType(VT
.getScalarType())));
6427 case Intrinsic::set_rounding
:
6428 Res
= DAG
.getNode(ISD::SET_ROUNDING
, sdl
, MVT::Other
,
6429 {getRoot(), getValue(I
.getArgOperand(0))});
6431 DAG
.setRoot(Res
.getValue(0));
6433 case Intrinsic::is_fpclass
: {
6434 const DataLayout DLayout
= DAG
.getDataLayout();
6435 EVT DestVT
= TLI
.getValueType(DLayout
, I
.getType());
6436 EVT ArgVT
= TLI
.getValueType(DLayout
, I
.getArgOperand(0)->getType());
6437 unsigned Test
= cast
<ConstantInt
>(I
.getArgOperand(1))->getZExtValue();
6438 MachineFunction
&MF
= DAG
.getMachineFunction();
6439 const Function
&F
= MF
.getFunction();
6440 SDValue Op
= getValue(I
.getArgOperand(0));
6442 Flags
.setNoFPExcept(
6443 !F
.getAttributes().hasFnAttr(llvm::Attribute::StrictFP
));
6444 // If ISD::IS_FPCLASS should be expanded, do it right now, because the
6445 // expansion can use illegal types. Making expansion early allows
6446 // legalizing these types prior to selection.
6447 if (!TLI
.isOperationLegalOrCustom(ISD::IS_FPCLASS
, ArgVT
)) {
6448 SDValue Result
= TLI
.expandIS_FPCLASS(DestVT
, Op
, Test
, Flags
, sdl
, DAG
);
6449 setValue(&I
, Result
);
6453 SDValue Check
= DAG
.getTargetConstant(Test
, sdl
, MVT::i32
);
6454 SDValue V
= DAG
.getNode(ISD::IS_FPCLASS
, sdl
, DestVT
, {Op
, Check
}, Flags
);
6458 case Intrinsic::pcmarker
: {
6459 SDValue Tmp
= getValue(I
.getArgOperand(0));
6460 DAG
.setRoot(DAG
.getNode(ISD::PCMARKER
, sdl
, MVT::Other
, getRoot(), Tmp
));
6463 case Intrinsic::readcyclecounter
: {
6464 SDValue Op
= getRoot();
6465 Res
= DAG
.getNode(ISD::READCYCLECOUNTER
, sdl
,
6466 DAG
.getVTList(MVT::i64
, MVT::Other
), Op
);
6468 DAG
.setRoot(Res
.getValue(1));
6471 case Intrinsic::bitreverse
:
6472 setValue(&I
, DAG
.getNode(ISD::BITREVERSE
, sdl
,
6473 getValue(I
.getArgOperand(0)).getValueType(),
6474 getValue(I
.getArgOperand(0))));
6476 case Intrinsic::bswap
:
6477 setValue(&I
, DAG
.getNode(ISD::BSWAP
, sdl
,
6478 getValue(I
.getArgOperand(0)).getValueType(),
6479 getValue(I
.getArgOperand(0))));
6481 case Intrinsic::cttz
: {
6482 SDValue Arg
= getValue(I
.getArgOperand(0));
6483 ConstantInt
*CI
= cast
<ConstantInt
>(I
.getArgOperand(1));
6484 EVT Ty
= Arg
.getValueType();
6485 setValue(&I
, DAG
.getNode(CI
->isZero() ? ISD::CTTZ
: ISD::CTTZ_ZERO_UNDEF
,
6489 case Intrinsic::ctlz
: {
6490 SDValue Arg
= getValue(I
.getArgOperand(0));
6491 ConstantInt
*CI
= cast
<ConstantInt
>(I
.getArgOperand(1));
6492 EVT Ty
= Arg
.getValueType();
6493 setValue(&I
, DAG
.getNode(CI
->isZero() ? ISD::CTLZ
: ISD::CTLZ_ZERO_UNDEF
,
6497 case Intrinsic::ctpop
: {
6498 SDValue Arg
= getValue(I
.getArgOperand(0));
6499 EVT Ty
= Arg
.getValueType();
6500 setValue(&I
, DAG
.getNode(ISD::CTPOP
, sdl
, Ty
, Arg
));
6503 case Intrinsic::fshl
:
6504 case Intrinsic::fshr
: {
6505 bool IsFSHL
= Intrinsic
== Intrinsic::fshl
;
6506 SDValue X
= getValue(I
.getArgOperand(0));
6507 SDValue Y
= getValue(I
.getArgOperand(1));
6508 SDValue Z
= getValue(I
.getArgOperand(2));
6509 EVT VT
= X
.getValueType();
6512 auto RotateOpcode
= IsFSHL
? ISD::ROTL
: ISD::ROTR
;
6513 setValue(&I
, DAG
.getNode(RotateOpcode
, sdl
, VT
, X
, Z
));
6515 auto FunnelOpcode
= IsFSHL
? ISD::FSHL
: ISD::FSHR
;
6516 setValue(&I
, DAG
.getNode(FunnelOpcode
, sdl
, VT
, X
, Y
, Z
));
6520 case Intrinsic::sadd_sat
: {
6521 SDValue Op1
= getValue(I
.getArgOperand(0));
6522 SDValue Op2
= getValue(I
.getArgOperand(1));
6523 setValue(&I
, DAG
.getNode(ISD::SADDSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6526 case Intrinsic::uadd_sat
: {
6527 SDValue Op1
= getValue(I
.getArgOperand(0));
6528 SDValue Op2
= getValue(I
.getArgOperand(1));
6529 setValue(&I
, DAG
.getNode(ISD::UADDSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6532 case Intrinsic::ssub_sat
: {
6533 SDValue Op1
= getValue(I
.getArgOperand(0));
6534 SDValue Op2
= getValue(I
.getArgOperand(1));
6535 setValue(&I
, DAG
.getNode(ISD::SSUBSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6538 case Intrinsic::usub_sat
: {
6539 SDValue Op1
= getValue(I
.getArgOperand(0));
6540 SDValue Op2
= getValue(I
.getArgOperand(1));
6541 setValue(&I
, DAG
.getNode(ISD::USUBSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6544 case Intrinsic::sshl_sat
: {
6545 SDValue Op1
= getValue(I
.getArgOperand(0));
6546 SDValue Op2
= getValue(I
.getArgOperand(1));
6547 setValue(&I
, DAG
.getNode(ISD::SSHLSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6550 case Intrinsic::ushl_sat
: {
6551 SDValue Op1
= getValue(I
.getArgOperand(0));
6552 SDValue Op2
= getValue(I
.getArgOperand(1));
6553 setValue(&I
, DAG
.getNode(ISD::USHLSAT
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6556 case Intrinsic::smul_fix
:
6557 case Intrinsic::umul_fix
:
6558 case Intrinsic::smul_fix_sat
:
6559 case Intrinsic::umul_fix_sat
: {
6560 SDValue Op1
= getValue(I
.getArgOperand(0));
6561 SDValue Op2
= getValue(I
.getArgOperand(1));
6562 SDValue Op3
= getValue(I
.getArgOperand(2));
6563 setValue(&I
, DAG
.getNode(FixedPointIntrinsicToOpcode(Intrinsic
), sdl
,
6564 Op1
.getValueType(), Op1
, Op2
, Op3
));
6567 case Intrinsic::sdiv_fix
:
6568 case Intrinsic::udiv_fix
:
6569 case Intrinsic::sdiv_fix_sat
:
6570 case Intrinsic::udiv_fix_sat
: {
6571 SDValue Op1
= getValue(I
.getArgOperand(0));
6572 SDValue Op2
= getValue(I
.getArgOperand(1));
6573 SDValue Op3
= getValue(I
.getArgOperand(2));
6574 setValue(&I
, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic
), sdl
,
6575 Op1
, Op2
, Op3
, DAG
, TLI
));
6578 case Intrinsic::smax
: {
6579 SDValue Op1
= getValue(I
.getArgOperand(0));
6580 SDValue Op2
= getValue(I
.getArgOperand(1));
6581 setValue(&I
, DAG
.getNode(ISD::SMAX
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6584 case Intrinsic::smin
: {
6585 SDValue Op1
= getValue(I
.getArgOperand(0));
6586 SDValue Op2
= getValue(I
.getArgOperand(1));
6587 setValue(&I
, DAG
.getNode(ISD::SMIN
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6590 case Intrinsic::umax
: {
6591 SDValue Op1
= getValue(I
.getArgOperand(0));
6592 SDValue Op2
= getValue(I
.getArgOperand(1));
6593 setValue(&I
, DAG
.getNode(ISD::UMAX
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6596 case Intrinsic::umin
: {
6597 SDValue Op1
= getValue(I
.getArgOperand(0));
6598 SDValue Op2
= getValue(I
.getArgOperand(1));
6599 setValue(&I
, DAG
.getNode(ISD::UMIN
, sdl
, Op1
.getValueType(), Op1
, Op2
));
6602 case Intrinsic::abs
: {
6603 // TODO: Preserve "int min is poison" arg in SDAG?
6604 SDValue Op1
= getValue(I
.getArgOperand(0));
6605 setValue(&I
, DAG
.getNode(ISD::ABS
, sdl
, Op1
.getValueType(), Op1
));
6608 case Intrinsic::stacksave
: {
6609 SDValue Op
= getRoot();
6610 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6611 Res
= DAG
.getNode(ISD::STACKSAVE
, sdl
, DAG
.getVTList(VT
, MVT::Other
), Op
);
6613 DAG
.setRoot(Res
.getValue(1));
6616 case Intrinsic::stackrestore
:
6617 Res
= getValue(I
.getArgOperand(0));
6618 DAG
.setRoot(DAG
.getNode(ISD::STACKRESTORE
, sdl
, MVT::Other
, getRoot(), Res
));
6620 case Intrinsic::get_dynamic_area_offset
: {
6621 SDValue Op
= getRoot();
6622 EVT PtrTy
= TLI
.getFrameIndexTy(DAG
.getDataLayout());
6623 EVT ResTy
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6624 // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
6626 if (PtrTy
.getFixedSizeInBits() < ResTy
.getFixedSizeInBits())
6627 report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
6629 Res
= DAG
.getNode(ISD::GET_DYNAMIC_AREA_OFFSET
, sdl
, DAG
.getVTList(ResTy
),
6635 case Intrinsic::stackguard
: {
6636 MachineFunction
&MF
= DAG
.getMachineFunction();
6637 const Module
&M
= *MF
.getFunction().getParent();
6638 SDValue Chain
= getRoot();
6639 if (TLI
.useLoadStackGuardNode()) {
6640 Res
= getLoadStackGuard(DAG
, sdl
, Chain
);
6642 EVT PtrTy
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
6643 const Value
*Global
= TLI
.getSDagStackGuard(M
);
6644 Align Align
= DAG
.getDataLayout().getPrefTypeAlign(Global
->getType());
6645 Res
= DAG
.getLoad(PtrTy
, sdl
, Chain
, getValue(Global
),
6646 MachinePointerInfo(Global
, 0), Align
,
6647 MachineMemOperand::MOVolatile
);
6649 if (TLI
.useStackGuardXorFP())
6650 Res
= TLI
.emitStackGuardXorFP(DAG
, Res
, sdl
);
6655 case Intrinsic::stackprotector
: {
6656 // Emit code into the DAG to store the stack guard onto the stack.
6657 MachineFunction
&MF
= DAG
.getMachineFunction();
6658 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
6659 SDValue Src
, Chain
= getRoot();
6661 if (TLI
.useLoadStackGuardNode())
6662 Src
= getLoadStackGuard(DAG
, sdl
, Chain
);
6664 Src
= getValue(I
.getArgOperand(0)); // The guard's value.
6666 AllocaInst
*Slot
= cast
<AllocaInst
>(I
.getArgOperand(1));
6668 int FI
= FuncInfo
.StaticAllocaMap
[Slot
];
6669 MFI
.setStackProtectorIndex(FI
);
6670 EVT PtrTy
= TLI
.getFrameIndexTy(DAG
.getDataLayout());
6672 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrTy
);
6674 // Store the stack protector onto the stack.
6676 Chain
, sdl
, Src
, FIN
,
6677 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
),
6678 MaybeAlign(), MachineMemOperand::MOVolatile
);
6683 case Intrinsic::objectsize
:
6684 llvm_unreachable("llvm.objectsize.* should have been lowered already");
6686 case Intrinsic::is_constant
:
6687 llvm_unreachable("llvm.is.constant.* should have been lowered already");
6689 case Intrinsic::annotation
:
6690 case Intrinsic::ptr_annotation
:
6691 case Intrinsic::launder_invariant_group
:
6692 case Intrinsic::strip_invariant_group
:
6693 // Drop the intrinsic, but forward the value
6694 setValue(&I
, getValue(I
.getOperand(0)));
6697 case Intrinsic::assume
:
6698 case Intrinsic::experimental_noalias_scope_decl
:
6699 case Intrinsic::var_annotation
:
6700 case Intrinsic::sideeffect
:
6701 // Discard annotate attributes, noalias scope declarations, assumptions, and
6702 // artificial side-effects.
6705 case Intrinsic::codeview_annotation
: {
6706 // Emit a label associated with this metadata.
6707 MachineFunction
&MF
= DAG
.getMachineFunction();
6709 MF
.getMMI().getContext().createTempSymbol("annotation", true);
6710 Metadata
*MD
= cast
<MetadataAsValue
>(I
.getArgOperand(0))->getMetadata();
6711 MF
.addCodeViewAnnotation(Label
, cast
<MDNode
>(MD
));
6712 Res
= DAG
.getLabelNode(ISD::ANNOTATION_LABEL
, sdl
, getRoot(), Label
);
6717 case Intrinsic::init_trampoline
: {
6718 const Function
*F
= cast
<Function
>(I
.getArgOperand(1)->stripPointerCasts());
6722 Ops
[1] = getValue(I
.getArgOperand(0));
6723 Ops
[2] = getValue(I
.getArgOperand(1));
6724 Ops
[3] = getValue(I
.getArgOperand(2));
6725 Ops
[4] = DAG
.getSrcValue(I
.getArgOperand(0));
6726 Ops
[5] = DAG
.getSrcValue(F
);
6728 Res
= DAG
.getNode(ISD::INIT_TRAMPOLINE
, sdl
, MVT::Other
, Ops
);
6733 case Intrinsic::adjust_trampoline
:
6734 setValue(&I
, DAG
.getNode(ISD::ADJUST_TRAMPOLINE
, sdl
,
6735 TLI
.getPointerTy(DAG
.getDataLayout()),
6736 getValue(I
.getArgOperand(0))));
6738 case Intrinsic::gcroot
: {
6739 assert(DAG
.getMachineFunction().getFunction().hasGC() &&
6740 "only valid in functions with gc specified, enforced by Verifier");
6741 assert(GFI
&& "implied by previous");
6742 const Value
*Alloca
= I
.getArgOperand(0)->stripPointerCasts();
6743 const Constant
*TypeMap
= cast
<Constant
>(I
.getArgOperand(1));
6745 FrameIndexSDNode
*FI
= cast
<FrameIndexSDNode
>(getValue(Alloca
).getNode());
6746 GFI
->addStackRoot(FI
->getIndex(), TypeMap
);
6749 case Intrinsic::gcread
:
6750 case Intrinsic::gcwrite
:
6751 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
6752 case Intrinsic::flt_rounds
:
6753 Res
= DAG
.getNode(ISD::FLT_ROUNDS_
, sdl
, {MVT::i32
, MVT::Other
}, getRoot());
6755 DAG
.setRoot(Res
.getValue(1));
6758 case Intrinsic::expect
:
6759 // Just replace __builtin_expect(exp, c) with EXP.
6760 setValue(&I
, getValue(I
.getArgOperand(0)));
6763 case Intrinsic::ubsantrap
:
6764 case Intrinsic::debugtrap
:
6765 case Intrinsic::trap
: {
6766 StringRef TrapFuncName
=
6767 I
.getAttributes().getFnAttr("trap-func-name").getValueAsString();
6768 if (TrapFuncName
.empty()) {
6769 switch (Intrinsic
) {
6770 case Intrinsic::trap
:
6771 DAG
.setRoot(DAG
.getNode(ISD::TRAP
, sdl
, MVT::Other
, getRoot()));
6773 case Intrinsic::debugtrap
:
6774 DAG
.setRoot(DAG
.getNode(ISD::DEBUGTRAP
, sdl
, MVT::Other
, getRoot()));
6776 case Intrinsic::ubsantrap
:
6777 DAG
.setRoot(DAG
.getNode(
6778 ISD::UBSANTRAP
, sdl
, MVT::Other
, getRoot(),
6779 DAG
.getTargetConstant(
6780 cast
<ConstantInt
>(I
.getArgOperand(0))->getZExtValue(), sdl
,
6783 default: llvm_unreachable("unknown trap intrinsic");
6787 TargetLowering::ArgListTy Args
;
6788 if (Intrinsic
== Intrinsic::ubsantrap
) {
6789 Args
.push_back(TargetLoweringBase::ArgListEntry());
6790 Args
[0].Val
= I
.getArgOperand(0);
6791 Args
[0].Node
= getValue(Args
[0].Val
);
6792 Args
[0].Ty
= Args
[0].Val
->getType();
6795 TargetLowering::CallLoweringInfo
CLI(DAG
);
6796 CLI
.setDebugLoc(sdl
).setChain(getRoot()).setLibCallee(
6797 CallingConv::C
, I
.getType(),
6798 DAG
.getExternalSymbol(TrapFuncName
.data(),
6799 TLI
.getPointerTy(DAG
.getDataLayout())),
6802 std::pair
<SDValue
, SDValue
> Result
= TLI
.LowerCallTo(CLI
);
6803 DAG
.setRoot(Result
.second
);
6807 case Intrinsic::uadd_with_overflow
:
6808 case Intrinsic::sadd_with_overflow
:
6809 case Intrinsic::usub_with_overflow
:
6810 case Intrinsic::ssub_with_overflow
:
6811 case Intrinsic::umul_with_overflow
:
6812 case Intrinsic::smul_with_overflow
: {
6814 switch (Intrinsic
) {
6815 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6816 case Intrinsic::uadd_with_overflow
: Op
= ISD::UADDO
; break;
6817 case Intrinsic::sadd_with_overflow
: Op
= ISD::SADDO
; break;
6818 case Intrinsic::usub_with_overflow
: Op
= ISD::USUBO
; break;
6819 case Intrinsic::ssub_with_overflow
: Op
= ISD::SSUBO
; break;
6820 case Intrinsic::umul_with_overflow
: Op
= ISD::UMULO
; break;
6821 case Intrinsic::smul_with_overflow
: Op
= ISD::SMULO
; break;
6823 SDValue Op1
= getValue(I
.getArgOperand(0));
6824 SDValue Op2
= getValue(I
.getArgOperand(1));
6826 EVT ResultVT
= Op1
.getValueType();
6827 EVT OverflowVT
= MVT::i1
;
6828 if (ResultVT
.isVector())
6829 OverflowVT
= EVT::getVectorVT(
6830 *Context
, OverflowVT
, ResultVT
.getVectorElementCount());
6832 SDVTList VTs
= DAG
.getVTList(ResultVT
, OverflowVT
);
6833 setValue(&I
, DAG
.getNode(Op
, sdl
, VTs
, Op1
, Op2
));
6836 case Intrinsic::prefetch
: {
6838 unsigned rw
= cast
<ConstantInt
>(I
.getArgOperand(1))->getZExtValue();
6839 auto Flags
= rw
== 0 ? MachineMemOperand::MOLoad
:MachineMemOperand::MOStore
;
6840 Ops
[0] = DAG
.getRoot();
6841 Ops
[1] = getValue(I
.getArgOperand(0));
6842 Ops
[2] = getValue(I
.getArgOperand(1));
6843 Ops
[3] = getValue(I
.getArgOperand(2));
6844 Ops
[4] = getValue(I
.getArgOperand(3));
6845 SDValue Result
= DAG
.getMemIntrinsicNode(
6846 ISD::PREFETCH
, sdl
, DAG
.getVTList(MVT::Other
), Ops
,
6847 EVT::getIntegerVT(*Context
, 8), MachinePointerInfo(I
.getArgOperand(0)),
6848 /* align */ None
, Flags
);
6850 // Chain the prefetch in parallell with any pending loads, to stay out of
6851 // the way of later optimizations.
6852 PendingLoads
.push_back(Result
);
6854 DAG
.setRoot(Result
);
6857 case Intrinsic::lifetime_start
:
6858 case Intrinsic::lifetime_end
: {
6859 bool IsStart
= (Intrinsic
== Intrinsic::lifetime_start
);
6860 // Stack coloring is not enabled in O0, discard region information.
6861 if (TM
.getOptLevel() == CodeGenOpt::None
)
6864 const int64_t ObjectSize
=
6865 cast
<ConstantInt
>(I
.getArgOperand(0))->getSExtValue();
6866 Value
*const ObjectPtr
= I
.getArgOperand(1);
6867 SmallVector
<const Value
*, 4> Allocas
;
6868 getUnderlyingObjects(ObjectPtr
, Allocas
);
6870 for (const Value
*Alloca
: Allocas
) {
6871 const AllocaInst
*LifetimeObject
= dyn_cast_or_null
<AllocaInst
>(Alloca
);
6873 // Could not find an Alloca.
6874 if (!LifetimeObject
)
6877 // First check that the Alloca is static, otherwise it won't have a
6878 // valid frame index.
6879 auto SI
= FuncInfo
.StaticAllocaMap
.find(LifetimeObject
);
6880 if (SI
== FuncInfo
.StaticAllocaMap
.end())
6883 const int FrameIndex
= SI
->second
;
6885 if (GetPointerBaseWithConstantOffset(
6886 ObjectPtr
, Offset
, DAG
.getDataLayout()) != LifetimeObject
)
6887 Offset
= -1; // Cannot determine offset from alloca to lifetime object.
6888 Res
= DAG
.getLifetimeNode(IsStart
, sdl
, getRoot(), FrameIndex
, ObjectSize
,
6894 case Intrinsic::pseudoprobe
: {
6895 auto Guid
= cast
<ConstantInt
>(I
.getArgOperand(0))->getZExtValue();
6896 auto Index
= cast
<ConstantInt
>(I
.getArgOperand(1))->getZExtValue();
6897 auto Attr
= cast
<ConstantInt
>(I
.getArgOperand(2))->getZExtValue();
6898 Res
= DAG
.getPseudoProbeNode(sdl
, getRoot(), Guid
, Index
, Attr
);
6902 case Intrinsic::invariant_start
:
6903 // Discard region information.
6905 DAG
.getUNDEF(TLI
.getValueType(DAG
.getDataLayout(), I
.getType())));
6907 case Intrinsic::invariant_end
:
6908 // Discard region information.
6910 case Intrinsic::clear_cache
:
6911 /// FunctionName may be null.
6912 if (const char *FunctionName
= TLI
.getClearCacheBuiltinName())
6913 lowerCallToExternalSymbol(I
, FunctionName
);
6915 case Intrinsic::donothing
:
6916 case Intrinsic::seh_try_begin
:
6917 case Intrinsic::seh_scope_begin
:
6918 case Intrinsic::seh_try_end
:
6919 case Intrinsic::seh_scope_end
:
6922 case Intrinsic::experimental_stackmap
:
6925 case Intrinsic::experimental_patchpoint_void
:
6926 case Intrinsic::experimental_patchpoint_i64
:
6929 case Intrinsic::experimental_gc_statepoint
:
6930 LowerStatepoint(cast
<GCStatepointInst
>(I
));
6932 case Intrinsic::experimental_gc_result
:
6933 visitGCResult(cast
<GCResultInst
>(I
));
6935 case Intrinsic::experimental_gc_relocate
:
6936 visitGCRelocate(cast
<GCRelocateInst
>(I
));
6938 case Intrinsic::instrprof_cover
:
6939 llvm_unreachable("instrprof failed to lower a cover");
6940 case Intrinsic::instrprof_increment
:
6941 llvm_unreachable("instrprof failed to lower an increment");
6942 case Intrinsic::instrprof_value_profile
:
6943 llvm_unreachable("instrprof failed to lower a value profiling call");
6944 case Intrinsic::localescape
: {
6945 MachineFunction
&MF
= DAG
.getMachineFunction();
6946 const TargetInstrInfo
*TII
= DAG
.getSubtarget().getInstrInfo();
6948 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
6949 // is the same on all targets.
6950 for (unsigned Idx
= 0, E
= I
.arg_size(); Idx
< E
; ++Idx
) {
6951 Value
*Arg
= I
.getArgOperand(Idx
)->stripPointerCasts();
6952 if (isa
<ConstantPointerNull
>(Arg
))
6953 continue; // Skip null pointers. They represent a hole in index space.
6954 AllocaInst
*Slot
= cast
<AllocaInst
>(Arg
);
6955 assert(FuncInfo
.StaticAllocaMap
.count(Slot
) &&
6956 "can only escape static allocas");
6957 int FI
= FuncInfo
.StaticAllocaMap
[Slot
];
6958 MCSymbol
*FrameAllocSym
=
6959 MF
.getMMI().getContext().getOrCreateFrameAllocSymbol(
6960 GlobalValue::dropLLVMManglingEscape(MF
.getName()), Idx
);
6961 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, dl
,
6962 TII
->get(TargetOpcode::LOCAL_ESCAPE
))
6963 .addSym(FrameAllocSym
)
6970 case Intrinsic::localrecover
: {
6971 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
6972 MachineFunction
&MF
= DAG
.getMachineFunction();
6974 // Get the symbol that defines the frame offset.
6975 auto *Fn
= cast
<Function
>(I
.getArgOperand(0)->stripPointerCasts());
6976 auto *Idx
= cast
<ConstantInt
>(I
.getArgOperand(2));
6978 unsigned(Idx
->getLimitedValue(std::numeric_limits
<int>::max()));
6979 MCSymbol
*FrameAllocSym
=
6980 MF
.getMMI().getContext().getOrCreateFrameAllocSymbol(
6981 GlobalValue::dropLLVMManglingEscape(Fn
->getName()), IdxVal
);
6983 Value
*FP
= I
.getArgOperand(1);
6984 SDValue FPVal
= getValue(FP
);
6985 EVT PtrVT
= FPVal
.getValueType();
6987 // Create a MCSymbol for the label to avoid any target lowering
6988 // that would make this PC relative.
6989 SDValue OffsetSym
= DAG
.getMCSymbol(FrameAllocSym
, PtrVT
);
6991 DAG
.getNode(ISD::LOCAL_RECOVER
, sdl
, PtrVT
, OffsetSym
);
6993 // Add the offset to the FP.
6994 SDValue Add
= DAG
.getMemBasePlusOffset(FPVal
, OffsetVal
, sdl
);
7000 case Intrinsic::eh_exceptionpointer
:
7001 case Intrinsic::eh_exceptioncode
: {
7002 // Get the exception pointer vreg, copy from it, and resize it to fit.
7003 const auto *CPI
= cast
<CatchPadInst
>(I
.getArgOperand(0));
7004 MVT PtrVT
= TLI
.getPointerTy(DAG
.getDataLayout());
7005 const TargetRegisterClass
*PtrRC
= TLI
.getRegClassFor(PtrVT
);
7006 unsigned VReg
= FuncInfo
.getCatchPadExceptionPointerVReg(CPI
, PtrRC
);
7007 SDValue N
= DAG
.getCopyFromReg(DAG
.getEntryNode(), sdl
, VReg
, PtrVT
);
7008 if (Intrinsic
== Intrinsic::eh_exceptioncode
)
7009 N
= DAG
.getZExtOrTrunc(N
, sdl
, MVT::i32
);
7013 case Intrinsic::xray_customevent
: {
7014 // Here we want to make sure that the intrinsic behaves as if it has a
7015 // specific calling convention, and only for x86_64.
7016 // FIXME: Support other platforms later.
7017 const auto &Triple
= DAG
.getTarget().getTargetTriple();
7018 if (Triple
.getArch() != Triple::x86_64
)
7021 SmallVector
<SDValue
, 8> Ops
;
7023 // We want to say that we always want the arguments in registers.
7024 SDValue LogEntryVal
= getValue(I
.getArgOperand(0));
7025 SDValue StrSizeVal
= getValue(I
.getArgOperand(1));
7026 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
7027 SDValue Chain
= getRoot();
7028 Ops
.push_back(LogEntryVal
);
7029 Ops
.push_back(StrSizeVal
);
7030 Ops
.push_back(Chain
);
7032 // We need to enforce the calling convention for the callsite, so that
7033 // argument ordering is enforced correctly, and that register allocation can
7034 // see that some registers may be assumed clobbered and have to preserve
7035 // them across calls to the intrinsic.
7036 MachineSDNode
*MN
= DAG
.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL
,
7038 SDValue patchableNode
= SDValue(MN
, 0);
7039 DAG
.setRoot(patchableNode
);
7040 setValue(&I
, patchableNode
);
7043 case Intrinsic::xray_typedevent
: {
7044 // Here we want to make sure that the intrinsic behaves as if it has a
7045 // specific calling convention, and only for x86_64.
7046 // FIXME: Support other platforms later.
7047 const auto &Triple
= DAG
.getTarget().getTargetTriple();
7048 if (Triple
.getArch() != Triple::x86_64
)
7051 SmallVector
<SDValue
, 8> Ops
;
7053 // We want to say that we always want the arguments in registers.
7054 // It's unclear to me how manipulating the selection DAG here forces callers
7055 // to provide arguments in registers instead of on the stack.
7056 SDValue LogTypeId
= getValue(I
.getArgOperand(0));
7057 SDValue LogEntryVal
= getValue(I
.getArgOperand(1));
7058 SDValue StrSizeVal
= getValue(I
.getArgOperand(2));
7059 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
7060 SDValue Chain
= getRoot();
7061 Ops
.push_back(LogTypeId
);
7062 Ops
.push_back(LogEntryVal
);
7063 Ops
.push_back(StrSizeVal
);
7064 Ops
.push_back(Chain
);
7066 // We need to enforce the calling convention for the callsite, so that
7067 // argument ordering is enforced correctly, and that register allocation can
7068 // see that some registers may be assumed clobbered and have to preserve
7069 // them across calls to the intrinsic.
7070 MachineSDNode
*MN
= DAG
.getMachineNode(
7071 TargetOpcode::PATCHABLE_TYPED_EVENT_CALL
, sdl
, NodeTys
, Ops
);
7072 SDValue patchableNode
= SDValue(MN
, 0);
7073 DAG
.setRoot(patchableNode
);
7074 setValue(&I
, patchableNode
);
7077 case Intrinsic::experimental_deoptimize
:
7078 LowerDeoptimizeCall(&I
);
7080 case Intrinsic::experimental_stepvector
:
7083 case Intrinsic::vector_reduce_fadd
:
7084 case Intrinsic::vector_reduce_fmul
:
7085 case Intrinsic::vector_reduce_add
:
7086 case Intrinsic::vector_reduce_mul
:
7087 case Intrinsic::vector_reduce_and
:
7088 case Intrinsic::vector_reduce_or
:
7089 case Intrinsic::vector_reduce_xor
:
7090 case Intrinsic::vector_reduce_smax
:
7091 case Intrinsic::vector_reduce_smin
:
7092 case Intrinsic::vector_reduce_umax
:
7093 case Intrinsic::vector_reduce_umin
:
7094 case Intrinsic::vector_reduce_fmax
:
7095 case Intrinsic::vector_reduce_fmin
:
7096 visitVectorReduce(I
, Intrinsic
);
7099 case Intrinsic::icall_branch_funnel
: {
7100 SmallVector
<SDValue
, 16> Ops
;
7101 Ops
.push_back(getValue(I
.getArgOperand(0)));
7104 auto *Base
= dyn_cast
<GlobalObject
>(GetPointerBaseWithConstantOffset(
7105 I
.getArgOperand(1), Offset
, DAG
.getDataLayout()));
7108 "llvm.icall.branch.funnel operand must be a GlobalValue");
7109 Ops
.push_back(DAG
.getTargetGlobalAddress(Base
, sdl
, MVT::i64
, 0));
7111 struct BranchFunnelTarget
{
7115 SmallVector
<BranchFunnelTarget
, 8> Targets
;
7117 for (unsigned Op
= 1, N
= I
.arg_size(); Op
!= N
; Op
+= 2) {
7118 auto *ElemBase
= dyn_cast
<GlobalObject
>(GetPointerBaseWithConstantOffset(
7119 I
.getArgOperand(Op
), Offset
, DAG
.getDataLayout()));
7120 if (ElemBase
!= Base
)
7121 report_fatal_error("all llvm.icall.branch.funnel operands must refer "
7122 "to the same GlobalValue");
7124 SDValue Val
= getValue(I
.getArgOperand(Op
+ 1));
7125 auto *GA
= dyn_cast
<GlobalAddressSDNode
>(Val
);
7128 "llvm.icall.branch.funnel operand must be a GlobalValue");
7129 Targets
.push_back({Offset
, DAG
.getTargetGlobalAddress(
7130 GA
->getGlobal(), sdl
, Val
.getValueType(),
7134 [](const BranchFunnelTarget
&T1
, const BranchFunnelTarget
&T2
) {
7135 return T1
.Offset
< T2
.Offset
;
7138 for (auto &T
: Targets
) {
7139 Ops
.push_back(DAG
.getTargetConstant(T
.Offset
, sdl
, MVT::i32
));
7140 Ops
.push_back(T
.Target
);
7143 Ops
.push_back(DAG
.getRoot()); // Chain
7144 SDValue
N(DAG
.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL
, sdl
,
7153 case Intrinsic::wasm_landingpad_index
:
7154 // Information this intrinsic contained has been transferred to
7155 // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
7159 case Intrinsic::aarch64_settag
:
7160 case Intrinsic::aarch64_settag_zero
: {
7161 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
7162 bool ZeroMemory
= Intrinsic
== Intrinsic::aarch64_settag_zero
;
7163 SDValue Val
= TSI
.EmitTargetCodeForSetTag(
7164 DAG
, sdl
, getRoot(), getValue(I
.getArgOperand(0)),
7165 getValue(I
.getArgOperand(1)), MachinePointerInfo(I
.getArgOperand(0)),
7171 case Intrinsic::ptrmask
: {
7172 SDValue Ptr
= getValue(I
.getOperand(0));
7173 SDValue Const
= getValue(I
.getOperand(1));
7175 EVT PtrVT
= Ptr
.getValueType();
7176 setValue(&I
, DAG
.getNode(ISD::AND
, sdl
, PtrVT
, Ptr
,
7177 DAG
.getZExtOrTrunc(Const
, sdl
, PtrVT
)));
7180 case Intrinsic::get_active_lane_mask
: {
7181 EVT CCVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
7182 SDValue Index
= getValue(I
.getOperand(0));
7183 EVT ElementVT
= Index
.getValueType();
7185 if (!TLI
.shouldExpandGetActiveLaneMask(CCVT
, ElementVT
)) {
7186 visitTargetIntrinsic(I
, Intrinsic
);
7190 SDValue TripCount
= getValue(I
.getOperand(1));
7191 auto VecTy
= CCVT
.changeVectorElementType(ElementVT
);
7193 SDValue VectorIndex
, VectorTripCount
;
7194 if (VecTy
.isScalableVector()) {
7195 VectorIndex
= DAG
.getSplatVector(VecTy
, sdl
, Index
);
7196 VectorTripCount
= DAG
.getSplatVector(VecTy
, sdl
, TripCount
);
7198 VectorIndex
= DAG
.getSplatBuildVector(VecTy
, sdl
, Index
);
7199 VectorTripCount
= DAG
.getSplatBuildVector(VecTy
, sdl
, TripCount
);
7201 SDValue VectorStep
= DAG
.getStepVector(sdl
, VecTy
);
7202 SDValue VectorInduction
= DAG
.getNode(
7203 ISD::UADDSAT
, sdl
, VecTy
, VectorIndex
, VectorStep
);
7204 SDValue SetCC
= DAG
.getSetCC(sdl
, CCVT
, VectorInduction
,
7205 VectorTripCount
, ISD::CondCode::SETULT
);
7206 setValue(&I
, SetCC
);
7209 case Intrinsic::experimental_vector_insert
: {
7210 SDValue Vec
= getValue(I
.getOperand(0));
7211 SDValue SubVec
= getValue(I
.getOperand(1));
7212 SDValue Index
= getValue(I
.getOperand(2));
7214 // The intrinsic's index type is i64, but the SDNode requires an index type
7215 // suitable for the target. Convert the index as required.
7216 MVT VectorIdxTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
7217 if (Index
.getValueType() != VectorIdxTy
)
7218 Index
= DAG
.getVectorIdxConstant(
7219 cast
<ConstantSDNode
>(Index
)->getZExtValue(), sdl
);
7221 EVT ResultVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
7222 setValue(&I
, DAG
.getNode(ISD::INSERT_SUBVECTOR
, sdl
, ResultVT
, Vec
, SubVec
,
7226 case Intrinsic::experimental_vector_extract
: {
7227 SDValue Vec
= getValue(I
.getOperand(0));
7228 SDValue Index
= getValue(I
.getOperand(1));
7229 EVT ResultVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
7231 // The intrinsic's index type is i64, but the SDNode requires an index type
7232 // suitable for the target. Convert the index as required.
7233 MVT VectorIdxTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
7234 if (Index
.getValueType() != VectorIdxTy
)
7235 Index
= DAG
.getVectorIdxConstant(
7236 cast
<ConstantSDNode
>(Index
)->getZExtValue(), sdl
);
7239 DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, sdl
, ResultVT
, Vec
, Index
));
7242 case Intrinsic::experimental_vector_reverse
:
7243 visitVectorReverse(I
);
7245 case Intrinsic::experimental_vector_splice
:
7246 visitVectorSplice(I
);
7251 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
7252 const ConstrainedFPIntrinsic
&FPI
) {
7253 SDLoc sdl
= getCurSDLoc();
7255 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7256 SmallVector
<EVT
, 4> ValueVTs
;
7257 ComputeValueVTs(TLI
, DAG
.getDataLayout(), FPI
.getType(), ValueVTs
);
7258 ValueVTs
.push_back(MVT::Other
); // Out chain
7260 // We do not need to serialize constrained FP intrinsics against
7261 // each other or against (nonvolatile) loads, so they can be
7262 // chained like loads.
7263 SDValue Chain
= DAG
.getRoot();
7264 SmallVector
<SDValue
, 4> Opers
;
7265 Opers
.push_back(Chain
);
7266 if (FPI
.isUnaryOp()) {
7267 Opers
.push_back(getValue(FPI
.getArgOperand(0)));
7268 } else if (FPI
.isTernaryOp()) {
7269 Opers
.push_back(getValue(FPI
.getArgOperand(0)));
7270 Opers
.push_back(getValue(FPI
.getArgOperand(1)));
7271 Opers
.push_back(getValue(FPI
.getArgOperand(2)));
7273 Opers
.push_back(getValue(FPI
.getArgOperand(0)));
7274 Opers
.push_back(getValue(FPI
.getArgOperand(1)));
7277 auto pushOutChain
= [this](SDValue Result
, fp::ExceptionBehavior EB
) {
7278 assert(Result
.getNode()->getNumValues() == 2);
7280 // Push node to the appropriate list so that future instructions can be
7281 // chained up correctly.
7282 SDValue OutChain
= Result
.getValue(1);
7284 case fp::ExceptionBehavior::ebIgnore
:
7285 // The only reason why ebIgnore nodes still need to be chained is that
7286 // they might depend on the current rounding mode, and therefore must
7287 // not be moved across instruction that may change that mode.
7289 case fp::ExceptionBehavior::ebMayTrap
:
7290 // These must not be moved across calls or instructions that may change
7291 // floating-point exception masks.
7292 PendingConstrainedFP
.push_back(OutChain
);
7294 case fp::ExceptionBehavior::ebStrict
:
7295 // These must not be moved across calls or instructions that may change
7296 // floating-point exception masks or read floating-point exception flags.
7297 // In addition, they cannot be optimized out even if unused.
7298 PendingConstrainedFPStrict
.push_back(OutChain
);
7303 SDVTList VTs
= DAG
.getVTList(ValueVTs
);
7304 fp::ExceptionBehavior EB
= *FPI
.getExceptionBehavior();
7307 if (EB
== fp::ExceptionBehavior::ebIgnore
)
7308 Flags
.setNoFPExcept(true);
7310 if (auto *FPOp
= dyn_cast
<FPMathOperator
>(&FPI
))
7311 Flags
.copyFMF(*FPOp
);
7314 switch (FPI
.getIntrinsicID()) {
7315 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
7316 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
7317 case Intrinsic::INTRINSIC: \
7318 Opcode = ISD::STRICT_##DAGN; \
7320 #include "llvm/IR/ConstrainedOps.def"
7321 case Intrinsic::experimental_constrained_fmuladd
: {
7322 Opcode
= ISD::STRICT_FMA
;
7323 // Break fmuladd into fmul and fadd.
7324 if (TM
.Options
.AllowFPOpFusion
== FPOpFusion::Strict
||
7325 !TLI
.isFMAFasterThanFMulAndFAdd(DAG
.getMachineFunction(),
7328 SDValue Mul
= DAG
.getNode(ISD::STRICT_FMUL
, sdl
, VTs
, Opers
, Flags
);
7329 pushOutChain(Mul
, EB
);
7330 Opcode
= ISD::STRICT_FADD
;
7332 Opers
.push_back(Mul
.getValue(1));
7333 Opers
.push_back(Mul
.getValue(0));
7334 Opers
.push_back(getValue(FPI
.getArgOperand(2)));
7340 // A few strict DAG nodes carry additional operands that are not
7341 // set up by the default code above.
7344 case ISD::STRICT_FP_ROUND
:
7346 DAG
.getTargetConstant(0, sdl
, TLI
.getPointerTy(DAG
.getDataLayout())));
7348 case ISD::STRICT_FSETCC
:
7349 case ISD::STRICT_FSETCCS
: {
7350 auto *FPCmp
= dyn_cast
<ConstrainedFPCmpIntrinsic
>(&FPI
);
7351 ISD::CondCode Condition
= getFCmpCondCode(FPCmp
->getPredicate());
7352 if (TM
.Options
.NoNaNsFPMath
)
7353 Condition
= getFCmpCodeWithoutNaN(Condition
);
7354 Opers
.push_back(DAG
.getCondCode(Condition
));
7359 SDValue Result
= DAG
.getNode(Opcode
, sdl
, VTs
, Opers
, Flags
);
7360 pushOutChain(Result
, EB
);
7362 SDValue FPResult
= Result
.getValue(0);
7363 setValue(&FPI
, FPResult
);
7366 static unsigned getISDForVPIntrinsic(const VPIntrinsic
&VPIntrin
) {
7367 Optional
<unsigned> ResOPC
;
7368 switch (VPIntrin
.getIntrinsicID()) {
7369 #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
7370 case Intrinsic::VPID: \
7371 ResOPC = ISD::VPSD; \
7373 #include "llvm/IR/VPIntrinsics.def"
7378 "Inconsistency: no SDNode available for this VPIntrinsic!");
7380 if (*ResOPC
== ISD::VP_REDUCE_SEQ_FADD
||
7381 *ResOPC
== ISD::VP_REDUCE_SEQ_FMUL
) {
7382 if (VPIntrin
.getFastMathFlags().allowReassoc())
7383 return *ResOPC
== ISD::VP_REDUCE_SEQ_FADD
? ISD::VP_REDUCE_FADD
7384 : ISD::VP_REDUCE_FMUL
;
7390 void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic
&VPIntrin
, EVT VT
,
7391 SmallVector
<SDValue
, 7> &OpValues
,
7393 SDLoc DL
= getCurSDLoc();
7394 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7395 Value
*PtrOperand
= VPIntrin
.getArgOperand(0);
7396 MaybeAlign Alignment
= VPIntrin
.getPointerAlignment();
7397 AAMDNodes AAInfo
= VPIntrin
.getAAMetadata();
7398 const MDNode
*Ranges
= VPIntrin
.getMetadata(LLVMContext::MD_range
);
7400 bool AddToChain
= true;
7402 // Do not serialize variable-length loads of constant memory with
7405 Alignment
= DAG
.getEVTAlign(VT
);
7406 MemoryLocation ML
= MemoryLocation::getAfter(PtrOperand
, AAInfo
);
7407 AddToChain
= !AA
|| !AA
->pointsToConstantMemory(ML
);
7408 SDValue InChain
= AddToChain
? DAG
.getRoot() : DAG
.getEntryNode();
7409 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7410 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOLoad
,
7411 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
, Ranges
);
7412 LD
= DAG
.getLoadVP(VT
, DL
, InChain
, OpValues
[0], OpValues
[1], OpValues
[2],
7413 MMO
, false /*IsExpanding */);
7416 Alignment
= DAG
.getEVTAlign(VT
.getScalarType());
7418 PtrOperand
->getType()->getScalarType()->getPointerAddressSpace();
7419 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7420 MachinePointerInfo(AS
), MachineMemOperand::MOLoad
,
7421 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
, Ranges
);
7422 SDValue Base
, Index
, Scale
;
7423 ISD::MemIndexType IndexType
;
7424 bool UniformBase
= getUniformBase(PtrOperand
, Base
, Index
, IndexType
, Scale
,
7425 this, VPIntrin
.getParent(),
7426 VT
.getScalarStoreSize());
7428 Base
= DAG
.getConstant(0, DL
, TLI
.getPointerTy(DAG
.getDataLayout()));
7429 Index
= getValue(PtrOperand
);
7430 IndexType
= ISD::SIGNED_SCALED
;
7432 DAG
.getTargetConstant(1, DL
, TLI
.getPointerTy(DAG
.getDataLayout()));
7434 EVT IdxVT
= Index
.getValueType();
7435 EVT EltTy
= IdxVT
.getVectorElementType();
7436 if (TLI
.shouldExtendGSIndex(IdxVT
, EltTy
)) {
7437 EVT NewIdxVT
= IdxVT
.changeVectorElementType(EltTy
);
7438 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewIdxVT
, Index
);
7440 LD
= DAG
.getGatherVP(
7441 DAG
.getVTList(VT
, MVT::Other
), VT
, DL
,
7442 {DAG
.getRoot(), Base
, Index
, Scale
, OpValues
[1], OpValues
[2]}, MMO
,
7446 PendingLoads
.push_back(LD
.getValue(1));
7447 setValue(&VPIntrin
, LD
);
7450 void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic
&VPIntrin
,
7451 SmallVector
<SDValue
, 7> &OpValues
,
7453 SDLoc DL
= getCurSDLoc();
7454 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7455 Value
*PtrOperand
= VPIntrin
.getArgOperand(1);
7456 EVT VT
= OpValues
[0].getValueType();
7457 MaybeAlign Alignment
= VPIntrin
.getPointerAlignment();
7458 AAMDNodes AAInfo
= VPIntrin
.getAAMetadata();
7462 Alignment
= DAG
.getEVTAlign(VT
);
7463 SDValue Ptr
= OpValues
[1];
7464 SDValue Offset
= DAG
.getUNDEF(Ptr
.getValueType());
7465 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7466 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOStore
,
7467 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
);
7468 ST
= DAG
.getStoreVP(getMemoryRoot(), DL
, OpValues
[0], Ptr
, Offset
,
7469 OpValues
[2], OpValues
[3], VT
, MMO
, ISD::UNINDEXED
,
7470 /* IsTruncating */ false, /*IsCompressing*/ false);
7473 Alignment
= DAG
.getEVTAlign(VT
.getScalarType());
7475 PtrOperand
->getType()->getScalarType()->getPointerAddressSpace();
7476 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7477 MachinePointerInfo(AS
), MachineMemOperand::MOStore
,
7478 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
);
7479 SDValue Base
, Index
, Scale
;
7480 ISD::MemIndexType IndexType
;
7481 bool UniformBase
= getUniformBase(PtrOperand
, Base
, Index
, IndexType
, Scale
,
7482 this, VPIntrin
.getParent(),
7483 VT
.getScalarStoreSize());
7485 Base
= DAG
.getConstant(0, DL
, TLI
.getPointerTy(DAG
.getDataLayout()));
7486 Index
= getValue(PtrOperand
);
7487 IndexType
= ISD::SIGNED_SCALED
;
7489 DAG
.getTargetConstant(1, DL
, TLI
.getPointerTy(DAG
.getDataLayout()));
7491 EVT IdxVT
= Index
.getValueType();
7492 EVT EltTy
= IdxVT
.getVectorElementType();
7493 if (TLI
.shouldExtendGSIndex(IdxVT
, EltTy
)) {
7494 EVT NewIdxVT
= IdxVT
.changeVectorElementType(EltTy
);
7495 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewIdxVT
, Index
);
7497 ST
= DAG
.getScatterVP(DAG
.getVTList(MVT::Other
), VT
, DL
,
7498 {getMemoryRoot(), OpValues
[0], Base
, Index
, Scale
,
7499 OpValues
[2], OpValues
[3]},
7503 setValue(&VPIntrin
, ST
);
7506 void SelectionDAGBuilder::visitVPStridedLoad(
7507 const VPIntrinsic
&VPIntrin
, EVT VT
, SmallVectorImpl
<SDValue
> &OpValues
) {
7508 SDLoc DL
= getCurSDLoc();
7509 Value
*PtrOperand
= VPIntrin
.getArgOperand(0);
7510 MaybeAlign Alignment
= VPIntrin
.getPointerAlignment();
7512 Alignment
= DAG
.getEVTAlign(VT
.getScalarType());
7513 AAMDNodes AAInfo
= VPIntrin
.getAAMetadata();
7514 const MDNode
*Ranges
= VPIntrin
.getMetadata(LLVMContext::MD_range
);
7515 MemoryLocation ML
= MemoryLocation::getAfter(PtrOperand
, AAInfo
);
7516 bool AddToChain
= !AA
|| !AA
->pointsToConstantMemory(ML
);
7517 SDValue InChain
= AddToChain
? DAG
.getRoot() : DAG
.getEntryNode();
7518 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7519 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOLoad
,
7520 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
, Ranges
);
7522 SDValue LD
= DAG
.getStridedLoadVP(VT
, DL
, InChain
, OpValues
[0], OpValues
[1],
7523 OpValues
[2], OpValues
[3], MMO
,
7524 false /*IsExpanding*/);
7527 PendingLoads
.push_back(LD
.getValue(1));
7528 setValue(&VPIntrin
, LD
);
7531 void SelectionDAGBuilder::visitVPStridedStore(
7532 const VPIntrinsic
&VPIntrin
, SmallVectorImpl
<SDValue
> &OpValues
) {
7533 SDLoc DL
= getCurSDLoc();
7534 Value
*PtrOperand
= VPIntrin
.getArgOperand(1);
7535 EVT VT
= OpValues
[0].getValueType();
7536 MaybeAlign Alignment
= VPIntrin
.getPointerAlignment();
7538 Alignment
= DAG
.getEVTAlign(VT
.getScalarType());
7539 AAMDNodes AAInfo
= VPIntrin
.getAAMetadata();
7540 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
7541 MachinePointerInfo(PtrOperand
), MachineMemOperand::MOStore
,
7542 MemoryLocation::UnknownSize
, *Alignment
, AAInfo
);
7544 SDValue ST
= DAG
.getStridedStoreVP(
7545 getMemoryRoot(), DL
, OpValues
[0], OpValues
[1],
7546 DAG
.getUNDEF(OpValues
[1].getValueType()), OpValues
[2], OpValues
[3],
7547 OpValues
[4], VT
, MMO
, ISD::UNINDEXED
, /*IsTruncating*/ false,
7548 /*IsCompressing*/ false);
7551 setValue(&VPIntrin
, ST
);
7554 void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic
&VPIntrin
) {
7555 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7556 SDLoc DL
= getCurSDLoc();
7558 ISD::CondCode Condition
;
7559 CmpInst::Predicate CondCode
= VPIntrin
.getPredicate();
7560 bool IsFP
= VPIntrin
.getOperand(0)->getType()->isFPOrFPVectorTy();
7562 // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
7563 // flags, but calls that don't return floating-point types can't be
7564 // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
7565 Condition
= getFCmpCondCode(CondCode
);
7566 if (TM
.Options
.NoNaNsFPMath
)
7567 Condition
= getFCmpCodeWithoutNaN(Condition
);
7569 Condition
= getICmpCondCode(CondCode
);
7572 SDValue Op1
= getValue(VPIntrin
.getOperand(0));
7573 SDValue Op2
= getValue(VPIntrin
.getOperand(1));
7574 // #2 is the condition code
7575 SDValue MaskOp
= getValue(VPIntrin
.getOperand(3));
7576 SDValue EVL
= getValue(VPIntrin
.getOperand(4));
7577 MVT EVLParamVT
= TLI
.getVPExplicitVectorLengthTy();
7578 assert(EVLParamVT
.isScalarInteger() && EVLParamVT
.bitsGE(MVT::i32
) &&
7579 "Unexpected target EVL type");
7580 EVL
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, EVLParamVT
, EVL
);
7582 EVT DestVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
7583 VPIntrin
.getType());
7585 DAG
.getSetCCVP(DL
, DestVT
, Op1
, Op2
, Condition
, MaskOp
, EVL
));
7588 void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
7589 const VPIntrinsic
&VPIntrin
) {
7590 SDLoc DL
= getCurSDLoc();
7591 unsigned Opcode
= getISDForVPIntrinsic(VPIntrin
);
7593 auto IID
= VPIntrin
.getIntrinsicID();
7595 if (const auto *CmpI
= dyn_cast
<VPCmpIntrinsic
>(&VPIntrin
))
7596 return visitVPCmp(*CmpI
);
7598 SmallVector
<EVT
, 4> ValueVTs
;
7599 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7600 ComputeValueVTs(TLI
, DAG
.getDataLayout(), VPIntrin
.getType(), ValueVTs
);
7601 SDVTList VTs
= DAG
.getVTList(ValueVTs
);
7603 auto EVLParamPos
= VPIntrinsic::getVectorLengthParamPos(IID
);
7605 MVT EVLParamVT
= TLI
.getVPExplicitVectorLengthTy();
7606 assert(EVLParamVT
.isScalarInteger() && EVLParamVT
.bitsGE(MVT::i32
) &&
7607 "Unexpected target EVL type");
7609 // Request operands.
7610 SmallVector
<SDValue
, 7> OpValues
;
7611 for (unsigned I
= 0; I
< VPIntrin
.arg_size(); ++I
) {
7612 auto Op
= getValue(VPIntrin
.getArgOperand(I
));
7613 if (I
== EVLParamPos
)
7614 Op
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, EVLParamVT
, Op
);
7615 OpValues
.push_back(Op
);
7620 SDNodeFlags SDFlags
;
7621 if (auto *FPMO
= dyn_cast
<FPMathOperator
>(&VPIntrin
))
7622 SDFlags
.copyFMF(*FPMO
);
7623 SDValue Result
= DAG
.getNode(Opcode
, DL
, VTs
, OpValues
, SDFlags
);
7624 setValue(&VPIntrin
, Result
);
7628 case ISD::VP_GATHER
:
7629 visitVPLoadGather(VPIntrin
, ValueVTs
[0], OpValues
,
7630 Opcode
== ISD::VP_GATHER
);
7632 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD
:
7633 visitVPStridedLoad(VPIntrin
, ValueVTs
[0], OpValues
);
7636 case ISD::VP_SCATTER
:
7637 visitVPStoreScatter(VPIntrin
, OpValues
, Opcode
== ISD::VP_SCATTER
);
7639 case ISD::EXPERIMENTAL_VP_STRIDED_STORE
:
7640 visitVPStridedStore(VPIntrin
, OpValues
);
7645 SDValue
SelectionDAGBuilder::lowerStartEH(SDValue Chain
,
7646 const BasicBlock
*EHPadBB
,
7647 MCSymbol
*&BeginLabel
) {
7648 MachineFunction
&MF
= DAG
.getMachineFunction();
7649 MachineModuleInfo
&MMI
= MF
.getMMI();
7651 // Insert a label before the invoke call to mark the try range. This can be
7652 // used to detect deletion of the invoke via the MachineModuleInfo.
7653 BeginLabel
= MMI
.getContext().createTempSymbol();
7655 // For SjLj, keep track of which landing pads go with which invokes
7656 // so as to maintain the ordering of pads in the LSDA.
7657 unsigned CallSiteIndex
= MMI
.getCurrentCallSite();
7658 if (CallSiteIndex
) {
7659 MF
.setCallSiteBeginLabel(BeginLabel
, CallSiteIndex
);
7660 LPadToCallSiteMap
[FuncInfo
.MBBMap
[EHPadBB
]].push_back(CallSiteIndex
);
7662 // Now that the call site is handled, stop tracking it.
7663 MMI
.setCurrentCallSite(0);
7666 return DAG
.getEHLabel(getCurSDLoc(), Chain
, BeginLabel
);
7669 SDValue
SelectionDAGBuilder::lowerEndEH(SDValue Chain
, const InvokeInst
*II
,
7670 const BasicBlock
*EHPadBB
,
7671 MCSymbol
*BeginLabel
) {
7672 assert(BeginLabel
&& "BeginLabel should've been set");
7674 MachineFunction
&MF
= DAG
.getMachineFunction();
7675 MachineModuleInfo
&MMI
= MF
.getMMI();
7677 // Insert a label at the end of the invoke call to mark the try range. This
7678 // can be used to detect deletion of the invoke via the MachineModuleInfo.
7679 MCSymbol
*EndLabel
= MMI
.getContext().createTempSymbol();
7680 Chain
= DAG
.getEHLabel(getCurSDLoc(), Chain
, EndLabel
);
7682 // Inform MachineModuleInfo of range.
7683 auto Pers
= classifyEHPersonality(FuncInfo
.Fn
->getPersonalityFn());
7684 // There is a platform (e.g. wasm) that uses funclet style IR but does not
7685 // actually use outlined funclets and their LSDA info style.
7686 if (MF
.hasEHFunclets() && isFuncletEHPersonality(Pers
)) {
7687 assert(II
&& "II should've been set");
7688 WinEHFuncInfo
*EHInfo
= MF
.getWinEHFuncInfo();
7689 EHInfo
->addIPToStateRange(II
, BeginLabel
, EndLabel
);
7690 } else if (!isScopedEHPersonality(Pers
)) {
7692 MF
.addInvoke(FuncInfo
.MBBMap
[EHPadBB
], BeginLabel
, EndLabel
);
7698 std::pair
<SDValue
, SDValue
>
7699 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo
&CLI
,
7700 const BasicBlock
*EHPadBB
) {
7701 MCSymbol
*BeginLabel
= nullptr;
7704 // Both PendingLoads and PendingExports must be flushed here;
7705 // this call might not return.
7707 DAG
.setRoot(lowerStartEH(getControlRoot(), EHPadBB
, BeginLabel
));
7708 CLI
.setChain(getRoot());
7711 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7712 std::pair
<SDValue
, SDValue
> Result
= TLI
.LowerCallTo(CLI
);
7714 assert((CLI
.IsTailCall
|| Result
.second
.getNode()) &&
7715 "Non-null chain expected with non-tail call!");
7716 assert((Result
.second
.getNode() || !Result
.first
.getNode()) &&
7717 "Null value expected with tail call!");
7719 if (!Result
.second
.getNode()) {
7720 // As a special case, a null chain means that a tail call has been emitted
7721 // and the DAG root is already updated.
7724 // Since there's no actual continuation from this block, nothing can be
7725 // relying on us setting vregs for them.
7726 PendingExports
.clear();
7728 DAG
.setRoot(Result
.second
);
7732 DAG
.setRoot(lowerEndEH(getRoot(), cast_or_null
<InvokeInst
>(CLI
.CB
), EHPadBB
,
7739 void SelectionDAGBuilder::LowerCallTo(const CallBase
&CB
, SDValue Callee
,
7741 bool isMustTailCall
,
7742 const BasicBlock
*EHPadBB
) {
7743 auto &DL
= DAG
.getDataLayout();
7744 FunctionType
*FTy
= CB
.getFunctionType();
7745 Type
*RetTy
= CB
.getType();
7747 TargetLowering::ArgListTy Args
;
7748 Args
.reserve(CB
.arg_size());
7750 const Value
*SwiftErrorVal
= nullptr;
7751 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7754 // Avoid emitting tail calls in functions with the disable-tail-calls
7756 auto *Caller
= CB
.getParent()->getParent();
7757 if (Caller
->getFnAttribute("disable-tail-calls").getValueAsString() ==
7758 "true" && !isMustTailCall
)
7761 // We can't tail call inside a function with a swifterror argument. Lowering
7762 // does not support this yet. It would have to move into the swifterror
7763 // register before the call.
7764 if (TLI
.supportSwiftError() &&
7765 Caller
->getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
7769 for (auto I
= CB
.arg_begin(), E
= CB
.arg_end(); I
!= E
; ++I
) {
7770 TargetLowering::ArgListEntry Entry
;
7771 const Value
*V
= *I
;
7774 if (V
->getType()->isEmptyTy())
7777 SDValue ArgNode
= getValue(V
);
7778 Entry
.Node
= ArgNode
; Entry
.Ty
= V
->getType();
7780 Entry
.setAttributes(&CB
, I
- CB
.arg_begin());
7782 // Use swifterror virtual register as input to the call.
7783 if (Entry
.IsSwiftError
&& TLI
.supportSwiftError()) {
7785 // We find the virtual register for the actual swifterror argument.
7786 // Instead of using the Value, we use the virtual register instead.
7788 DAG
.getRegister(SwiftError
.getOrCreateVRegUseAt(&CB
, FuncInfo
.MBB
, V
),
7789 EVT(TLI
.getPointerTy(DL
)));
7792 Args
.push_back(Entry
);
7794 // If we have an explicit sret argument that is an Instruction, (i.e., it
7795 // might point to function-local memory), we can't meaningfully tail-call.
7796 if (Entry
.IsSRet
&& isa
<Instruction
>(V
))
7800 // If call site has a cfguardtarget operand bundle, create and add an
7801 // additional ArgListEntry.
7802 if (auto Bundle
= CB
.getOperandBundle(LLVMContext::OB_cfguardtarget
)) {
7803 TargetLowering::ArgListEntry Entry
;
7804 Value
*V
= Bundle
->Inputs
[0];
7805 SDValue ArgNode
= getValue(V
);
7806 Entry
.Node
= ArgNode
;
7807 Entry
.Ty
= V
->getType();
7808 Entry
.IsCFGuardTarget
= true;
7809 Args
.push_back(Entry
);
7812 // Check if target-independent constraints permit a tail call here.
7813 // Target-dependent constraints are checked within TLI->LowerCallTo.
7814 if (isTailCall
&& !isInTailCallPosition(CB
, DAG
.getTarget()))
7817 // Disable tail calls if there is an swifterror argument. Targets have not
7818 // been updated to support tail calls.
7819 if (TLI
.supportSwiftError() && SwiftErrorVal
)
7822 TargetLowering::CallLoweringInfo
CLI(DAG
);
7823 CLI
.setDebugLoc(getCurSDLoc())
7824 .setChain(getRoot())
7825 .setCallee(RetTy
, FTy
, Callee
, std::move(Args
), CB
)
7826 .setTailCall(isTailCall
)
7827 .setConvergent(CB
.isConvergent())
7829 CB
.countOperandBundlesOfType(LLVMContext::OB_preallocated
) != 0);
7830 std::pair
<SDValue
, SDValue
> Result
= lowerInvokable(CLI
, EHPadBB
);
7832 if (Result
.first
.getNode()) {
7833 Result
.first
= lowerRangeToAssertZExt(DAG
, CB
, Result
.first
);
7834 setValue(&CB
, Result
.first
);
7837 // The last element of CLI.InVals has the SDValue for swifterror return.
7838 // Here we copy it to a virtual register and update SwiftErrorMap for
7840 if (SwiftErrorVal
&& TLI
.supportSwiftError()) {
7841 // Get the last element of InVals.
7842 SDValue Src
= CLI
.InVals
.back();
7844 SwiftError
.getOrCreateVRegDefAt(&CB
, FuncInfo
.MBB
, SwiftErrorVal
);
7845 SDValue CopyNode
= CLI
.DAG
.getCopyToReg(Result
.second
, CLI
.DL
, VReg
, Src
);
7846 DAG
.setRoot(CopyNode
);
7850 static SDValue
getMemCmpLoad(const Value
*PtrVal
, MVT LoadVT
,
7851 SelectionDAGBuilder
&Builder
) {
7852 // Check to see if this load can be trivially constant folded, e.g. if the
7853 // input is from a string literal.
7854 if (const Constant
*LoadInput
= dyn_cast
<Constant
>(PtrVal
)) {
7855 // Cast pointer to the type we really want to load.
7857 Type::getIntNTy(PtrVal
->getContext(), LoadVT
.getScalarSizeInBits());
7858 if (LoadVT
.isVector())
7859 LoadTy
= FixedVectorType::get(LoadTy
, LoadVT
.getVectorNumElements());
7861 LoadInput
= ConstantExpr::getBitCast(const_cast<Constant
*>(LoadInput
),
7862 PointerType::getUnqual(LoadTy
));
7864 if (const Constant
*LoadCst
=
7865 ConstantFoldLoadFromConstPtr(const_cast<Constant
*>(LoadInput
),
7866 LoadTy
, Builder
.DAG
.getDataLayout()))
7867 return Builder
.getValue(LoadCst
);
7870 // Otherwise, we have to emit the load. If the pointer is to unfoldable but
7871 // still constant memory, the input chain can be the entry node.
7873 bool ConstantMemory
= false;
7875 // Do not serialize (non-volatile) loads of constant memory with anything.
7876 if (Builder
.AA
&& Builder
.AA
->pointsToConstantMemory(PtrVal
)) {
7877 Root
= Builder
.DAG
.getEntryNode();
7878 ConstantMemory
= true;
7880 // Do not serialize non-volatile loads against each other.
7881 Root
= Builder
.DAG
.getRoot();
7884 SDValue Ptr
= Builder
.getValue(PtrVal
);
7886 Builder
.DAG
.getLoad(LoadVT
, Builder
.getCurSDLoc(), Root
, Ptr
,
7887 MachinePointerInfo(PtrVal
), Align(1));
7889 if (!ConstantMemory
)
7890 Builder
.PendingLoads
.push_back(LoadVal
.getValue(1));
7894 /// Record the value for an instruction that produces an integer result,
7895 /// converting the type where necessary.
7896 void SelectionDAGBuilder::processIntegerCallValue(const Instruction
&I
,
7899 EVT VT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
7902 Value
= DAG
.getSExtOrTrunc(Value
, getCurSDLoc(), VT
);
7904 Value
= DAG
.getZExtOrTrunc(Value
, getCurSDLoc(), VT
);
7905 setValue(&I
, Value
);
7908 /// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
7909 /// true and lower it. Otherwise return false, and it will be lowered like a
7911 /// The caller already checked that \p I calls the appropriate LibFunc with a
7912 /// correct prototype.
7913 bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst
&I
) {
7914 const Value
*LHS
= I
.getArgOperand(0), *RHS
= I
.getArgOperand(1);
7915 const Value
*Size
= I
.getArgOperand(2);
7916 const ConstantSDNode
*CSize
= dyn_cast
<ConstantSDNode
>(getValue(Size
));
7917 if (CSize
&& CSize
->getZExtValue() == 0) {
7918 EVT CallVT
= DAG
.getTargetLoweringInfo().getValueType(DAG
.getDataLayout(),
7920 setValue(&I
, DAG
.getConstant(0, getCurSDLoc(), CallVT
));
7924 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
7925 std::pair
<SDValue
, SDValue
> Res
= TSI
.EmitTargetCodeForMemcmp(
7926 DAG
, getCurSDLoc(), DAG
.getRoot(), getValue(LHS
), getValue(RHS
),
7927 getValue(Size
), MachinePointerInfo(LHS
), MachinePointerInfo(RHS
));
7928 if (Res
.first
.getNode()) {
7929 processIntegerCallValue(I
, Res
.first
, true);
7930 PendingLoads
.push_back(Res
.second
);
7934 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
7935 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
7936 if (!CSize
|| !isOnlyUsedInZeroEqualityComparison(&I
))
7939 // If the target has a fast compare for the given size, it will return a
7940 // preferred load type for that size. Require that the load VT is legal and
7941 // that the target supports unaligned loads of that type. Otherwise, return
7943 auto hasFastLoadsAndCompare
= [&](unsigned NumBits
) {
7944 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
7945 MVT LVT
= TLI
.hasFastEqualityCompare(NumBits
);
7946 if (LVT
!= MVT::INVALID_SIMPLE_VALUE_TYPE
) {
7947 // TODO: Handle 5 byte compare as 4-byte + 1 byte.
7948 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
7949 // TODO: Check alignment of src and dest ptrs.
7950 unsigned DstAS
= LHS
->getType()->getPointerAddressSpace();
7951 unsigned SrcAS
= RHS
->getType()->getPointerAddressSpace();
7952 if (!TLI
.isTypeLegal(LVT
) ||
7953 !TLI
.allowsMisalignedMemoryAccesses(LVT
, SrcAS
) ||
7954 !TLI
.allowsMisalignedMemoryAccesses(LVT
, DstAS
))
7955 LVT
= MVT::INVALID_SIMPLE_VALUE_TYPE
;
7961 // This turns into unaligned loads. We only do this if the target natively
7962 // supports the MVT we'll be loading or if it is small enough (<= 4) that
7963 // we'll only produce a small number of byte loads.
7965 unsigned NumBitsToCompare
= CSize
->getZExtValue() * 8;
7966 switch (NumBitsToCompare
) {
7978 LoadVT
= hasFastLoadsAndCompare(NumBitsToCompare
);
7982 if (LoadVT
== MVT::INVALID_SIMPLE_VALUE_TYPE
)
7985 SDValue LoadL
= getMemCmpLoad(LHS
, LoadVT
, *this);
7986 SDValue LoadR
= getMemCmpLoad(RHS
, LoadVT
, *this);
7988 // Bitcast to a wide integer type if the loads are vectors.
7989 if (LoadVT
.isVector()) {
7990 EVT CmpVT
= EVT::getIntegerVT(LHS
->getContext(), LoadVT
.getSizeInBits());
7991 LoadL
= DAG
.getBitcast(CmpVT
, LoadL
);
7992 LoadR
= DAG
.getBitcast(CmpVT
, LoadR
);
7995 SDValue Cmp
= DAG
.getSetCC(getCurSDLoc(), MVT::i1
, LoadL
, LoadR
, ISD::SETNE
);
7996 processIntegerCallValue(I
, Cmp
, false);
8000 /// See if we can lower a memchr call into an optimized form. If so, return
8001 /// true and lower it. Otherwise return false, and it will be lowered like a
8003 /// The caller already checked that \p I calls the appropriate LibFunc with a
8004 /// correct prototype.
8005 bool SelectionDAGBuilder::visitMemChrCall(const CallInst
&I
) {
8006 const Value
*Src
= I
.getArgOperand(0);
8007 const Value
*Char
= I
.getArgOperand(1);
8008 const Value
*Length
= I
.getArgOperand(2);
8010 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
8011 std::pair
<SDValue
, SDValue
> Res
=
8012 TSI
.EmitTargetCodeForMemchr(DAG
, getCurSDLoc(), DAG
.getRoot(),
8013 getValue(Src
), getValue(Char
), getValue(Length
),
8014 MachinePointerInfo(Src
));
8015 if (Res
.first
.getNode()) {
8016 setValue(&I
, Res
.first
);
8017 PendingLoads
.push_back(Res
.second
);
8024 /// See if we can lower a mempcpy call into an optimized form. If so, return
8025 /// true and lower it. Otherwise return false, and it will be lowered like a
8027 /// The caller already checked that \p I calls the appropriate LibFunc with a
8028 /// correct prototype.
8029 bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst
&I
) {
8030 SDValue Dst
= getValue(I
.getArgOperand(0));
8031 SDValue Src
= getValue(I
.getArgOperand(1));
8032 SDValue Size
= getValue(I
.getArgOperand(2));
8034 Align DstAlign
= DAG
.InferPtrAlign(Dst
).valueOrOne();
8035 Align SrcAlign
= DAG
.InferPtrAlign(Src
).valueOrOne();
8036 // DAG::getMemcpy needs Alignment to be defined.
8037 Align Alignment
= std::min(DstAlign
, SrcAlign
);
8040 SDLoc sdl
= getCurSDLoc();
8042 // In the mempcpy context we need to pass in a false value for isTailCall
8043 // because the return pointer needs to be adjusted by the size of
8044 // the copied memory.
8045 SDValue Root
= isVol
? getRoot() : getMemoryRoot();
8046 SDValue MC
= DAG
.getMemcpy(Root
, sdl
, Dst
, Src
, Size
, Alignment
, isVol
, false,
8047 /*isTailCall=*/false,
8048 MachinePointerInfo(I
.getArgOperand(0)),
8049 MachinePointerInfo(I
.getArgOperand(1)),
8051 assert(MC
.getNode() != nullptr &&
8052 "** memcpy should not be lowered as TailCall in mempcpy context **");
8055 // Check if Size needs to be truncated or extended.
8056 Size
= DAG
.getSExtOrTrunc(Size
, sdl
, Dst
.getValueType());
8058 // Adjust return pointer to point just past the last dst byte.
8059 SDValue DstPlusSize
= DAG
.getNode(ISD::ADD
, sdl
, Dst
.getValueType(),
8061 setValue(&I
, DstPlusSize
);
8065 /// See if we can lower a strcpy call into an optimized form. If so, return
8066 /// true and lower it, otherwise return false and it will be lowered like a
8068 /// The caller already checked that \p I calls the appropriate LibFunc with a
8069 /// correct prototype.
8070 bool SelectionDAGBuilder::visitStrCpyCall(const CallInst
&I
, bool isStpcpy
) {
8071 const Value
*Arg0
= I
.getArgOperand(0), *Arg1
= I
.getArgOperand(1);
8073 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
8074 std::pair
<SDValue
, SDValue
> Res
=
8075 TSI
.EmitTargetCodeForStrcpy(DAG
, getCurSDLoc(), getRoot(),
8076 getValue(Arg0
), getValue(Arg1
),
8077 MachinePointerInfo(Arg0
),
8078 MachinePointerInfo(Arg1
), isStpcpy
);
8079 if (Res
.first
.getNode()) {
8080 setValue(&I
, Res
.first
);
8081 DAG
.setRoot(Res
.second
);
8088 /// See if we can lower a strcmp call into an optimized form. If so, return
8089 /// true and lower it, otherwise return false and it will be lowered like a
8091 /// The caller already checked that \p I calls the appropriate LibFunc with a
8092 /// correct prototype.
8093 bool SelectionDAGBuilder::visitStrCmpCall(const CallInst
&I
) {
8094 const Value
*Arg0
= I
.getArgOperand(0), *Arg1
= I
.getArgOperand(1);
8096 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
8097 std::pair
<SDValue
, SDValue
> Res
=
8098 TSI
.EmitTargetCodeForStrcmp(DAG
, getCurSDLoc(), DAG
.getRoot(),
8099 getValue(Arg0
), getValue(Arg1
),
8100 MachinePointerInfo(Arg0
),
8101 MachinePointerInfo(Arg1
));
8102 if (Res
.first
.getNode()) {
8103 processIntegerCallValue(I
, Res
.first
, true);
8104 PendingLoads
.push_back(Res
.second
);
8111 /// See if we can lower a strlen call into an optimized form. If so, return
8112 /// true and lower it, otherwise return false and it will be lowered like a
8114 /// The caller already checked that \p I calls the appropriate LibFunc with a
8115 /// correct prototype.
8116 bool SelectionDAGBuilder::visitStrLenCall(const CallInst
&I
) {
8117 const Value
*Arg0
= I
.getArgOperand(0);
8119 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
8120 std::pair
<SDValue
, SDValue
> Res
=
8121 TSI
.EmitTargetCodeForStrlen(DAG
, getCurSDLoc(), DAG
.getRoot(),
8122 getValue(Arg0
), MachinePointerInfo(Arg0
));
8123 if (Res
.first
.getNode()) {
8124 processIntegerCallValue(I
, Res
.first
, false);
8125 PendingLoads
.push_back(Res
.second
);
8132 /// See if we can lower a strnlen call into an optimized form. If so, return
8133 /// true and lower it, otherwise return false and it will be lowered like a
8135 /// The caller already checked that \p I calls the appropriate LibFunc with a
8136 /// correct prototype.
8137 bool SelectionDAGBuilder::visitStrNLenCall(const CallInst
&I
) {
8138 const Value
*Arg0
= I
.getArgOperand(0), *Arg1
= I
.getArgOperand(1);
8140 const SelectionDAGTargetInfo
&TSI
= DAG
.getSelectionDAGInfo();
8141 std::pair
<SDValue
, SDValue
> Res
=
8142 TSI
.EmitTargetCodeForStrnlen(DAG
, getCurSDLoc(), DAG
.getRoot(),
8143 getValue(Arg0
), getValue(Arg1
),
8144 MachinePointerInfo(Arg0
));
8145 if (Res
.first
.getNode()) {
8146 processIntegerCallValue(I
, Res
.first
, false);
8147 PendingLoads
.push_back(Res
.second
);
8154 /// See if we can lower a unary floating-point operation into an SDNode with
8155 /// the specified Opcode. If so, return true and lower it, otherwise return
8156 /// false and it will be lowered like a normal call.
8157 /// The caller already checked that \p I calls the appropriate LibFunc with a
8158 /// correct prototype.
8159 bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst
&I
,
8161 // We already checked this call's prototype; verify it doesn't modify errno.
8162 if (!I
.onlyReadsMemory())
8166 Flags
.copyFMF(cast
<FPMathOperator
>(I
));
8168 SDValue Tmp
= getValue(I
.getArgOperand(0));
8170 DAG
.getNode(Opcode
, getCurSDLoc(), Tmp
.getValueType(), Tmp
, Flags
));
8174 /// See if we can lower a binary floating-point operation into an SDNode with
8175 /// the specified Opcode. If so, return true and lower it. Otherwise return
8176 /// false, and it will be lowered like a normal call.
8177 /// The caller already checked that \p I calls the appropriate LibFunc with a
8178 /// correct prototype.
8179 bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst
&I
,
8181 // We already checked this call's prototype; verify it doesn't modify errno.
8182 if (!I
.onlyReadsMemory())
8186 Flags
.copyFMF(cast
<FPMathOperator
>(I
));
8188 SDValue Tmp0
= getValue(I
.getArgOperand(0));
8189 SDValue Tmp1
= getValue(I
.getArgOperand(1));
8190 EVT VT
= Tmp0
.getValueType();
8191 setValue(&I
, DAG
.getNode(Opcode
, getCurSDLoc(), VT
, Tmp0
, Tmp1
, Flags
));
8195 void SelectionDAGBuilder::visitCall(const CallInst
&I
) {
8196 // Handle inline assembly differently.
8197 if (I
.isInlineAsm()) {
8202 if (Function
*F
= I
.getCalledFunction()) {
8203 diagnoseDontCall(I
);
8205 if (F
->isDeclaration()) {
8206 // Is this an LLVM intrinsic or a target-specific intrinsic?
8207 unsigned IID
= F
->getIntrinsicID();
8209 if (const TargetIntrinsicInfo
*II
= TM
.getIntrinsicInfo())
8210 IID
= II
->getIntrinsicID(F
);
8213 visitIntrinsicCall(I
, IID
);
8218 // Check for well-known libc/libm calls. If the function is internal, it
8219 // can't be a library call. Don't do the check if marked as nobuiltin for
8220 // some reason or the call site requires strict floating point semantics.
8222 if (!I
.isNoBuiltin() && !I
.isStrictFP() && !F
->hasLocalLinkage() &&
8223 F
->hasName() && LibInfo
->getLibFunc(*F
, Func
) &&
8224 LibInfo
->hasOptimizedCodeGen(Func
)) {
8228 if (visitMemCmpBCmpCall(I
))
8231 case LibFunc_copysign
:
8232 case LibFunc_copysignf
:
8233 case LibFunc_copysignl
:
8234 // We already checked this call's prototype; verify it doesn't modify
8236 if (I
.onlyReadsMemory()) {
8237 SDValue LHS
= getValue(I
.getArgOperand(0));
8238 SDValue RHS
= getValue(I
.getArgOperand(1));
8239 setValue(&I
, DAG
.getNode(ISD::FCOPYSIGN
, getCurSDLoc(),
8240 LHS
.getValueType(), LHS
, RHS
));
8247 if (visitUnaryFloatCall(I
, ISD::FABS
))
8253 if (visitBinaryFloatCall(I
, ISD::FMINNUM
))
8259 if (visitBinaryFloatCall(I
, ISD::FMAXNUM
))
8265 if (visitUnaryFloatCall(I
, ISD::FSIN
))
8271 if (visitUnaryFloatCall(I
, ISD::FCOS
))
8277 case LibFunc_sqrt_finite
:
8278 case LibFunc_sqrtf_finite
:
8279 case LibFunc_sqrtl_finite
:
8280 if (visitUnaryFloatCall(I
, ISD::FSQRT
))
8284 case LibFunc_floorf
:
8285 case LibFunc_floorl
:
8286 if (visitUnaryFloatCall(I
, ISD::FFLOOR
))
8289 case LibFunc_nearbyint
:
8290 case LibFunc_nearbyintf
:
8291 case LibFunc_nearbyintl
:
8292 if (visitUnaryFloatCall(I
, ISD::FNEARBYINT
))
8298 if (visitUnaryFloatCall(I
, ISD::FCEIL
))
8304 if (visitUnaryFloatCall(I
, ISD::FRINT
))
8308 case LibFunc_roundf
:
8309 case LibFunc_roundl
:
8310 if (visitUnaryFloatCall(I
, ISD::FROUND
))
8314 case LibFunc_truncf
:
8315 case LibFunc_truncl
:
8316 if (visitUnaryFloatCall(I
, ISD::FTRUNC
))
8322 if (visitUnaryFloatCall(I
, ISD::FLOG2
))
8328 if (visitUnaryFloatCall(I
, ISD::FEXP2
))
8331 case LibFunc_memcmp
:
8332 if (visitMemCmpBCmpCall(I
))
8335 case LibFunc_mempcpy
:
8336 if (visitMemPCpyCall(I
))
8339 case LibFunc_memchr
:
8340 if (visitMemChrCall(I
))
8343 case LibFunc_strcpy
:
8344 if (visitStrCpyCall(I
, false))
8347 case LibFunc_stpcpy
:
8348 if (visitStrCpyCall(I
, true))
8351 case LibFunc_strcmp
:
8352 if (visitStrCmpCall(I
))
8355 case LibFunc_strlen
:
8356 if (visitStrLenCall(I
))
8359 case LibFunc_strnlen
:
8360 if (visitStrNLenCall(I
))
8367 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
8368 // have to do anything here to lower funclet bundles.
8369 // CFGuardTarget bundles are lowered in LowerCallTo.
8370 assert(!I
.hasOperandBundlesOtherThan(
8371 {LLVMContext::OB_deopt
, LLVMContext::OB_funclet
,
8372 LLVMContext::OB_cfguardtarget
, LLVMContext::OB_preallocated
,
8373 LLVMContext::OB_clang_arc_attachedcall
}) &&
8374 "Cannot lower calls with arbitrary operand bundles!");
8376 SDValue Callee
= getValue(I
.getCalledOperand());
8378 if (I
.countOperandBundlesOfType(LLVMContext::OB_deopt
))
8379 LowerCallSiteWithDeoptBundle(&I
, Callee
, nullptr);
8381 // Check if we can potentially perform a tail call. More detailed checking
8382 // is be done within LowerCallTo, after more information about the call is
8384 LowerCallTo(I
, Callee
, I
.isTailCall(), I
.isMustTailCall());
8389 /// AsmOperandInfo - This contains information for each constraint that we are
8391 class SDISelAsmOperandInfo
: public TargetLowering::AsmOperandInfo
{
8393 /// CallOperand - If this is the result output operand or a clobber
8394 /// this is null, otherwise it is the incoming operand to the CallInst.
8395 /// This gets modified as the asm is processed.
8396 SDValue CallOperand
;
8398 /// AssignedRegs - If this is a register or register class operand, this
8399 /// contains the set of register corresponding to the operand.
8400 RegsForValue AssignedRegs
;
8402 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo
&info
)
8403 : TargetLowering::AsmOperandInfo(info
), CallOperand(nullptr, 0) {
8406 /// Whether or not this operand accesses memory
8407 bool hasMemory(const TargetLowering
&TLI
) const {
8408 // Indirect operand accesses access memory.
8412 for (const auto &Code
: Codes
)
8413 if (TLI
.getConstraintType(Code
) == TargetLowering::C_Memory
)
8419 /// getCallOperandValEVT - Return the EVT of the Value* that this operand
8420 /// corresponds to. If there is no Value* for this operand, it returns
8422 EVT
getCallOperandValEVT(LLVMContext
&Context
, const TargetLowering
&TLI
,
8423 const DataLayout
&DL
,
8424 llvm::Type
*ParamElemType
) const {
8425 if (!CallOperandVal
) return MVT::Other
;
8427 if (isa
<BasicBlock
>(CallOperandVal
))
8428 return TLI
.getProgramPointerTy(DL
);
8430 llvm::Type
*OpTy
= CallOperandVal
->getType();
8432 // FIXME: code duplicated from TargetLowering::ParseConstraints().
8433 // If this is an indirect operand, the operand is a pointer to the
8436 OpTy
= ParamElemType
;
8437 assert(OpTy
&& "Indirect operand must have elementtype attribute");
8440 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
8441 if (StructType
*STy
= dyn_cast
<StructType
>(OpTy
))
8442 if (STy
->getNumElements() == 1)
8443 OpTy
= STy
->getElementType(0);
8445 // If OpTy is not a single value, it may be a struct/union that we
8446 // can tile with integers.
8447 if (!OpTy
->isSingleValueType() && OpTy
->isSized()) {
8448 unsigned BitSize
= DL
.getTypeSizeInBits(OpTy
);
8457 OpTy
= IntegerType::get(Context
, BitSize
);
8462 return TLI
.getAsmOperandValueType(DL
, OpTy
, true);
8467 } // end anonymous namespace
8469 /// Make sure that the output operand \p OpInfo and its corresponding input
8470 /// operand \p MatchingOpInfo have compatible constraint types (otherwise error
8472 static void patchMatchingInput(const SDISelAsmOperandInfo
&OpInfo
,
8473 SDISelAsmOperandInfo
&MatchingOpInfo
,
8474 SelectionDAG
&DAG
) {
8475 if (OpInfo
.ConstraintVT
== MatchingOpInfo
.ConstraintVT
)
8478 const TargetRegisterInfo
*TRI
= DAG
.getSubtarget().getRegisterInfo();
8479 const auto &TLI
= DAG
.getTargetLoweringInfo();
8481 std::pair
<unsigned, const TargetRegisterClass
*> MatchRC
=
8482 TLI
.getRegForInlineAsmConstraint(TRI
, OpInfo
.ConstraintCode
,
8483 OpInfo
.ConstraintVT
);
8484 std::pair
<unsigned, const TargetRegisterClass
*> InputRC
=
8485 TLI
.getRegForInlineAsmConstraint(TRI
, MatchingOpInfo
.ConstraintCode
,
8486 MatchingOpInfo
.ConstraintVT
);
8487 if ((OpInfo
.ConstraintVT
.isInteger() !=
8488 MatchingOpInfo
.ConstraintVT
.isInteger()) ||
8489 (MatchRC
.second
!= InputRC
.second
)) {
8490 // FIXME: error out in a more elegant fashion
8491 report_fatal_error("Unsupported asm: input constraint"
8492 " with a matching output constraint of"
8493 " incompatible type!");
8495 MatchingOpInfo
.ConstraintVT
= OpInfo
.ConstraintVT
;
8498 /// Get a direct memory input to behave well as an indirect operand.
8499 /// This may introduce stores, hence the need for a \p Chain.
8500 /// \return The (possibly updated) chain.
8501 static SDValue
getAddressForMemoryInput(SDValue Chain
, const SDLoc
&Location
,
8502 SDISelAsmOperandInfo
&OpInfo
,
8503 SelectionDAG
&DAG
) {
8504 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
8506 // If we don't have an indirect input, put it in the constpool if we can,
8507 // otherwise spill it to a stack slot.
8508 // TODO: This isn't quite right. We need to handle these according to
8509 // the addressing mode that the constraint wants. Also, this may take
8510 // an additional register for the computation and we don't want that
8513 // If the operand is a float, integer, or vector constant, spill to a
8514 // constant pool entry to get its address.
8515 const Value
*OpVal
= OpInfo
.CallOperandVal
;
8516 if (isa
<ConstantFP
>(OpVal
) || isa
<ConstantInt
>(OpVal
) ||
8517 isa
<ConstantVector
>(OpVal
) || isa
<ConstantDataVector
>(OpVal
)) {
8518 OpInfo
.CallOperand
= DAG
.getConstantPool(
8519 cast
<Constant
>(OpVal
), TLI
.getPointerTy(DAG
.getDataLayout()));
8523 // Otherwise, create a stack slot and emit a store to it before the asm.
8524 Type
*Ty
= OpVal
->getType();
8525 auto &DL
= DAG
.getDataLayout();
8526 uint64_t TySize
= DL
.getTypeAllocSize(Ty
);
8527 MachineFunction
&MF
= DAG
.getMachineFunction();
8528 int SSFI
= MF
.getFrameInfo().CreateStackObject(
8529 TySize
, DL
.getPrefTypeAlign(Ty
), false);
8530 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, TLI
.getFrameIndexTy(DL
));
8531 Chain
= DAG
.getTruncStore(Chain
, Location
, OpInfo
.CallOperand
, StackSlot
,
8532 MachinePointerInfo::getFixedStack(MF
, SSFI
),
8533 TLI
.getMemValueType(DL
, Ty
));
8534 OpInfo
.CallOperand
= StackSlot
;
8539 /// GetRegistersForValue - Assign registers (virtual or physical) for the
8540 /// specified operand. We prefer to assign virtual registers, to allow the
8541 /// register allocator to handle the assignment process. However, if the asm
8542 /// uses features that we can't model on machineinstrs, we have SDISel do the
8543 /// allocation. This produces generally horrible, but correct, code.
8545 /// OpInfo describes the operand
8546 /// RefOpInfo describes the matching operand if any, the operand otherwise
8547 static llvm::Optional
<unsigned>
8548 getRegistersForValue(SelectionDAG
&DAG
, const SDLoc
&DL
,
8549 SDISelAsmOperandInfo
&OpInfo
,
8550 SDISelAsmOperandInfo
&RefOpInfo
) {
8551 LLVMContext
&Context
= *DAG
.getContext();
8552 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
8554 MachineFunction
&MF
= DAG
.getMachineFunction();
8555 SmallVector
<unsigned, 4> Regs
;
8556 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
8558 // No work to do for memory/address operands.
8559 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
||
8560 OpInfo
.ConstraintType
== TargetLowering::C_Address
)
8563 // If this is a constraint for a single physreg, or a constraint for a
8564 // register class, find it.
8565 unsigned AssignedReg
;
8566 const TargetRegisterClass
*RC
;
8567 std::tie(AssignedReg
, RC
) = TLI
.getRegForInlineAsmConstraint(
8568 &TRI
, RefOpInfo
.ConstraintCode
, RefOpInfo
.ConstraintVT
);
8569 // RC is unset only on failure. Return immediately.
8573 // Get the actual register value type. This is important, because the user
8574 // may have asked for (e.g.) the AX register in i32 type. We need to
8575 // remember that AX is actually i16 to get the right extension.
8576 const MVT RegVT
= *TRI
.legalclasstypes_begin(*RC
);
8578 if (OpInfo
.ConstraintVT
!= MVT::Other
&& RegVT
!= MVT::Untyped
) {
8579 // If this is an FP operand in an integer register (or visa versa), or more
8580 // generally if the operand value disagrees with the register class we plan
8581 // to stick it in, fix the operand type.
8583 // If this is an input value, the bitcast to the new type is done now.
8584 // Bitcast for output value is done at the end of visitInlineAsm().
8585 if ((OpInfo
.Type
== InlineAsm::isOutput
||
8586 OpInfo
.Type
== InlineAsm::isInput
) &&
8587 !TRI
.isTypeLegalForClass(*RC
, OpInfo
.ConstraintVT
)) {
8588 // Try to convert to the first EVT that the reg class contains. If the
8589 // types are identical size, use a bitcast to convert (e.g. two differing
8590 // vector types). Note: output bitcast is done at the end of
8591 // visitInlineAsm().
8592 if (RegVT
.getSizeInBits() == OpInfo
.ConstraintVT
.getSizeInBits()) {
8593 // Exclude indirect inputs while they are unsupported because the code
8594 // to perform the load is missing and thus OpInfo.CallOperand still
8595 // refers to the input address rather than the pointed-to value.
8596 if (OpInfo
.Type
== InlineAsm::isInput
&& !OpInfo
.isIndirect
)
8597 OpInfo
.CallOperand
=
8598 DAG
.getNode(ISD::BITCAST
, DL
, RegVT
, OpInfo
.CallOperand
);
8599 OpInfo
.ConstraintVT
= RegVT
;
8600 // If the operand is an FP value and we want it in integer registers,
8601 // use the corresponding integer type. This turns an f64 value into
8602 // i64, which can be passed with two i32 values on a 32-bit machine.
8603 } else if (RegVT
.isInteger() && OpInfo
.ConstraintVT
.isFloatingPoint()) {
8604 MVT VT
= MVT::getIntegerVT(OpInfo
.ConstraintVT
.getSizeInBits());
8605 if (OpInfo
.Type
== InlineAsm::isInput
)
8606 OpInfo
.CallOperand
=
8607 DAG
.getNode(ISD::BITCAST
, DL
, VT
, OpInfo
.CallOperand
);
8608 OpInfo
.ConstraintVT
= VT
;
8613 // No need to allocate a matching input constraint since the constraint it's
8614 // matching to has already been allocated.
8615 if (OpInfo
.isMatchingInputConstraint())
8618 EVT ValueVT
= OpInfo
.ConstraintVT
;
8619 if (OpInfo
.ConstraintVT
== MVT::Other
)
8622 // Initialize NumRegs.
8623 unsigned NumRegs
= 1;
8624 if (OpInfo
.ConstraintVT
!= MVT::Other
)
8625 NumRegs
= TLI
.getNumRegisters(Context
, OpInfo
.ConstraintVT
, RegVT
);
8627 // If this is a constraint for a specific physical register, like {r17},
8630 // If this associated to a specific register, initialize iterator to correct
8631 // place. If virtual, make sure we have enough registers
8633 // Initialize iterator if necessary
8634 TargetRegisterClass::iterator I
= RC
->begin();
8635 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
8637 // Do not check for single registers.
8639 I
= std::find(I
, RC
->end(), AssignedReg
);
8640 if (I
== RC
->end()) {
8641 // RC does not contain the selected register, which indicates a
8642 // mismatch between the register and the required type/bitwidth.
8643 return {AssignedReg
};
8647 for (; NumRegs
; --NumRegs
, ++I
) {
8648 assert(I
!= RC
->end() && "Ran out of registers to allocate!");
8649 Register R
= AssignedReg
? Register(*I
) : RegInfo
.createVirtualRegister(RC
);
8653 OpInfo
.AssignedRegs
= RegsForValue(Regs
, RegVT
, ValueVT
);
8658 findMatchingInlineAsmOperand(unsigned OperandNo
,
8659 const std::vector
<SDValue
> &AsmNodeOperands
) {
8660 // Scan until we find the definition we already emitted of this operand.
8661 unsigned CurOp
= InlineAsm::Op_FirstOperand
;
8662 for (; OperandNo
; --OperandNo
) {
8663 // Advance to the next operand.
8665 cast
<ConstantSDNode
>(AsmNodeOperands
[CurOp
])->getZExtValue();
8666 assert((InlineAsm::isRegDefKind(OpFlag
) ||
8667 InlineAsm::isRegDefEarlyClobberKind(OpFlag
) ||
8668 InlineAsm::isMemKind(OpFlag
)) &&
8669 "Skipped past definitions?");
8670 CurOp
+= InlineAsm::getNumOperandRegisters(OpFlag
) + 1;
8681 explicit ExtraFlags(const CallBase
&Call
) {
8682 const InlineAsm
*IA
= cast
<InlineAsm
>(Call
.getCalledOperand());
8683 if (IA
->hasSideEffects())
8684 Flags
|= InlineAsm::Extra_HasSideEffects
;
8685 if (IA
->isAlignStack())
8686 Flags
|= InlineAsm::Extra_IsAlignStack
;
8687 if (Call
.isConvergent())
8688 Flags
|= InlineAsm::Extra_IsConvergent
;
8689 Flags
|= IA
->getDialect() * InlineAsm::Extra_AsmDialect
;
8692 void update(const TargetLowering::AsmOperandInfo
&OpInfo
) {
8693 // Ideally, we would only check against memory constraints. However, the
8694 // meaning of an Other constraint can be target-specific and we can't easily
8695 // reason about it. Therefore, be conservative and set MayLoad/MayStore
8696 // for Other constraints as well.
8697 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
||
8698 OpInfo
.ConstraintType
== TargetLowering::C_Other
) {
8699 if (OpInfo
.Type
== InlineAsm::isInput
)
8700 Flags
|= InlineAsm::Extra_MayLoad
;
8701 else if (OpInfo
.Type
== InlineAsm::isOutput
)
8702 Flags
|= InlineAsm::Extra_MayStore
;
8703 else if (OpInfo
.Type
== InlineAsm::isClobber
)
8704 Flags
|= (InlineAsm::Extra_MayLoad
| InlineAsm::Extra_MayStore
);
8708 unsigned get() const { return Flags
; }
8711 } // end anonymous namespace
8713 /// visitInlineAsm - Handle a call to an InlineAsm object.
8714 void SelectionDAGBuilder::visitInlineAsm(const CallBase
&Call
,
8715 const BasicBlock
*EHPadBB
) {
8716 const InlineAsm
*IA
= cast
<InlineAsm
>(Call
.getCalledOperand());
8718 /// ConstraintOperands - Information about all of the constraints.
8719 SmallVector
<SDISelAsmOperandInfo
, 16> ConstraintOperands
;
8721 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
8722 TargetLowering::AsmOperandInfoVector TargetConstraints
= TLI
.ParseConstraints(
8723 DAG
.getDataLayout(), DAG
.getSubtarget().getRegisterInfo(), Call
);
8725 // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
8726 // AsmDialect, MayLoad, MayStore).
8727 bool HasSideEffect
= IA
->hasSideEffects();
8728 ExtraFlags
ExtraInfo(Call
);
8730 unsigned ArgNo
= 0; // ArgNo - The argument of the CallInst.
8731 unsigned ResNo
= 0; // ResNo - The result number of the next output.
8732 for (auto &T
: TargetConstraints
) {
8733 ConstraintOperands
.push_back(SDISelAsmOperandInfo(T
));
8734 SDISelAsmOperandInfo
&OpInfo
= ConstraintOperands
.back();
8736 // Compute the value type for each operand.
8737 if (OpInfo
.hasArg()) {
8738 OpInfo
.CallOperandVal
= Call
.getArgOperand(ArgNo
);
8739 OpInfo
.CallOperand
= getValue(OpInfo
.CallOperandVal
);
8740 Type
*ParamElemTy
= Call
.getParamElementType(ArgNo
);
8741 EVT VT
= OpInfo
.getCallOperandValEVT(*DAG
.getContext(), TLI
,
8742 DAG
.getDataLayout(), ParamElemTy
);
8743 OpInfo
.ConstraintVT
= VT
.isSimple() ? VT
.getSimpleVT() : MVT::Other
;
8745 } else if (OpInfo
.Type
== InlineAsm::isOutput
&& !OpInfo
.isIndirect
) {
8746 // The return value of the call is this value. As such, there is no
8747 // corresponding argument.
8748 assert(!Call
.getType()->isVoidTy() && "Bad inline asm!");
8749 if (StructType
*STy
= dyn_cast
<StructType
>(Call
.getType())) {
8750 OpInfo
.ConstraintVT
= TLI
.getSimpleValueType(
8751 DAG
.getDataLayout(), STy
->getElementType(ResNo
));
8753 assert(ResNo
== 0 && "Asm only has one result!");
8754 OpInfo
.ConstraintVT
= TLI
.getAsmOperandValueType(
8755 DAG
.getDataLayout(), Call
.getType()).getSimpleVT();
8759 OpInfo
.ConstraintVT
= MVT::Other
;
8763 HasSideEffect
= OpInfo
.hasMemory(TLI
);
8765 // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
8766 // FIXME: Could we compute this on OpInfo rather than T?
8768 // Compute the constraint code and ConstraintType to use.
8769 TLI
.ComputeConstraintToUse(T
, SDValue());
8771 if (T
.ConstraintType
== TargetLowering::C_Immediate
&&
8772 OpInfo
.CallOperand
&& !isa
<ConstantSDNode
>(OpInfo
.CallOperand
))
8773 // We've delayed emitting a diagnostic like the "n" constraint because
8774 // inlining could cause an integer showing up.
8775 return emitInlineAsmError(Call
, "constraint '" + Twine(T
.ConstraintCode
) +
8776 "' expects an integer constant "
8779 ExtraInfo
.update(T
);
8782 // We won't need to flush pending loads if this asm doesn't touch
8783 // memory and is nonvolatile.
8784 SDValue Flag
, Chain
= (HasSideEffect
) ? getRoot() : DAG
.getRoot();
8786 bool EmitEHLabels
= isa
<InvokeInst
>(Call
) && IA
->canThrow();
8788 assert(EHPadBB
&& "InvokeInst must have an EHPadBB");
8790 bool IsCallBr
= isa
<CallBrInst
>(Call
);
8792 if (IsCallBr
|| EmitEHLabels
) {
8793 // If this is a callbr or invoke we need to flush pending exports since
8794 // inlineasm_br and invoke are terminators.
8795 // We need to do this before nodes are glued to the inlineasm_br node.
8796 Chain
= getControlRoot();
8799 MCSymbol
*BeginLabel
= nullptr;
8801 Chain
= lowerStartEH(Chain
, EHPadBB
, BeginLabel
);
8804 // Second pass over the constraints: compute which constraint option to use.
8805 for (SDISelAsmOperandInfo
&OpInfo
: ConstraintOperands
) {
8806 // If this is an output operand with a matching input operand, look up the
8807 // matching input. If their types mismatch, e.g. one is an integer, the
8808 // other is floating point, or their sizes are different, flag it as an
8810 if (OpInfo
.hasMatchingInput()) {
8811 SDISelAsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
8812 patchMatchingInput(OpInfo
, Input
, DAG
);
8815 // Compute the constraint code and ConstraintType to use.
8816 TLI
.ComputeConstraintToUse(OpInfo
, OpInfo
.CallOperand
, &DAG
);
8818 if ((OpInfo
.ConstraintType
== TargetLowering::C_Memory
&&
8819 OpInfo
.Type
== InlineAsm::isClobber
) ||
8820 OpInfo
.ConstraintType
== TargetLowering::C_Address
)
8823 // If this is a memory input, and if the operand is not indirect, do what we
8824 // need to provide an address for the memory input.
8825 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
&&
8826 !OpInfo
.isIndirect
) {
8827 assert((OpInfo
.isMultipleAlternative
||
8828 (OpInfo
.Type
== InlineAsm::isInput
)) &&
8829 "Can only indirectify direct input operands!");
8831 // Memory operands really want the address of the value.
8832 Chain
= getAddressForMemoryInput(Chain
, getCurSDLoc(), OpInfo
, DAG
);
8834 // There is no longer a Value* corresponding to this operand.
8835 OpInfo
.CallOperandVal
= nullptr;
8837 // It is now an indirect operand.
8838 OpInfo
.isIndirect
= true;
8843 // AsmNodeOperands - The operands for the ISD::INLINEASM node.
8844 std::vector
<SDValue
> AsmNodeOperands
;
8845 AsmNodeOperands
.push_back(SDValue()); // reserve space for input chain
8846 AsmNodeOperands
.push_back(DAG
.getTargetExternalSymbol(
8847 IA
->getAsmString().c_str(), TLI
.getProgramPointerTy(DAG
.getDataLayout())));
8849 // If we have a !srcloc metadata node associated with it, we want to attach
8850 // this to the ultimately generated inline asm machineinstr. To do this, we
8851 // pass in the third operand as this (potentially null) inline asm MDNode.
8852 const MDNode
*SrcLoc
= Call
.getMetadata("srcloc");
8853 AsmNodeOperands
.push_back(DAG
.getMDNode(SrcLoc
));
8855 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
8856 // bits as operand 3.
8857 AsmNodeOperands
.push_back(DAG
.getTargetConstant(
8858 ExtraInfo
.get(), getCurSDLoc(), TLI
.getPointerTy(DAG
.getDataLayout())));
8860 // Third pass: Loop over operands to prepare DAG-level operands.. As part of
8861 // this, assign virtual and physical registers for inputs and otput.
8862 for (SDISelAsmOperandInfo
&OpInfo
: ConstraintOperands
) {
8863 // Assign Registers.
8864 SDISelAsmOperandInfo
&RefOpInfo
=
8865 OpInfo
.isMatchingInputConstraint()
8866 ? ConstraintOperands
[OpInfo
.getMatchedOperand()]
8868 const auto RegError
=
8869 getRegistersForValue(DAG
, getCurSDLoc(), OpInfo
, RefOpInfo
);
8870 if (RegError
.hasValue()) {
8871 const MachineFunction
&MF
= DAG
.getMachineFunction();
8872 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
8873 const char *RegName
= TRI
.getName(RegError
.getValue());
8874 emitInlineAsmError(Call
, "register '" + Twine(RegName
) +
8875 "' allocated for constraint '" +
8876 Twine(OpInfo
.ConstraintCode
) +
8877 "' does not match required type");
8881 auto DetectWriteToReservedRegister
= [&]() {
8882 const MachineFunction
&MF
= DAG
.getMachineFunction();
8883 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
8884 for (unsigned Reg
: OpInfo
.AssignedRegs
.Regs
) {
8885 if (Register::isPhysicalRegister(Reg
) &&
8886 TRI
.isInlineAsmReadOnlyReg(MF
, Reg
)) {
8887 const char *RegName
= TRI
.getName(Reg
);
8888 emitInlineAsmError(Call
, "write to reserved register '" +
8889 Twine(RegName
) + "'");
8895 assert((OpInfo
.ConstraintType
!= TargetLowering::C_Address
||
8896 (OpInfo
.Type
== InlineAsm::isInput
&&
8897 !OpInfo
.isMatchingInputConstraint())) &&
8898 "Only address as input operand is allowed.");
8900 switch (OpInfo
.Type
) {
8901 case InlineAsm::isOutput
:
8902 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
) {
8903 unsigned ConstraintID
=
8904 TLI
.getInlineAsmMemConstraint(OpInfo
.ConstraintCode
);
8905 assert(ConstraintID
!= InlineAsm::Constraint_Unknown
&&
8906 "Failed to convert memory constraint code to constraint id.");
8908 // Add information to the INLINEASM node to know about this output.
8909 unsigned OpFlags
= InlineAsm::getFlagWord(InlineAsm::Kind_Mem
, 1);
8910 OpFlags
= InlineAsm::getFlagWordForMem(OpFlags
, ConstraintID
);
8911 AsmNodeOperands
.push_back(DAG
.getTargetConstant(OpFlags
, getCurSDLoc(),
8913 AsmNodeOperands
.push_back(OpInfo
.CallOperand
);
8915 // Otherwise, this outputs to a register (directly for C_Register /
8916 // C_RegisterClass, and a target-defined fashion for
8917 // C_Immediate/C_Other). Find a register that we can use.
8918 if (OpInfo
.AssignedRegs
.Regs
.empty()) {
8920 Call
, "couldn't allocate output register for constraint '" +
8921 Twine(OpInfo
.ConstraintCode
) + "'");
8925 if (DetectWriteToReservedRegister())
8928 // Add information to the INLINEASM node to know that this register is
8930 OpInfo
.AssignedRegs
.AddInlineAsmOperands(
8931 OpInfo
.isEarlyClobber
? InlineAsm::Kind_RegDefEarlyClobber
8932 : InlineAsm::Kind_RegDef
,
8933 false, 0, getCurSDLoc(), DAG
, AsmNodeOperands
);
8937 case InlineAsm::isInput
: {
8938 SDValue InOperandVal
= OpInfo
.CallOperand
;
8940 if (OpInfo
.isMatchingInputConstraint()) {
8941 // If this is required to match an output register we have already set,
8942 // just use its register.
8943 auto CurOp
= findMatchingInlineAsmOperand(OpInfo
.getMatchedOperand(),
8946 cast
<ConstantSDNode
>(AsmNodeOperands
[CurOp
])->getZExtValue();
8947 if (InlineAsm::isRegDefKind(OpFlag
) ||
8948 InlineAsm::isRegDefEarlyClobberKind(OpFlag
)) {
8949 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
8950 if (OpInfo
.isIndirect
) {
8951 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
8952 emitInlineAsmError(Call
, "inline asm not supported yet: "
8953 "don't know how to handle tied "
8954 "indirect register inputs");
8958 SmallVector
<unsigned, 4> Regs
;
8959 MachineFunction
&MF
= DAG
.getMachineFunction();
8960 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
8961 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
8962 auto *R
= cast
<RegisterSDNode
>(AsmNodeOperands
[CurOp
+1]);
8963 Register TiedReg
= R
->getReg();
8964 MVT RegVT
= R
->getSimpleValueType(0);
8965 const TargetRegisterClass
*RC
=
8966 TiedReg
.isVirtual() ? MRI
.getRegClass(TiedReg
)
8967 : RegVT
!= MVT::Untyped
? TLI
.getRegClassFor(RegVT
)
8968 : TRI
.getMinimalPhysRegClass(TiedReg
);
8969 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(OpFlag
);
8970 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
8971 Regs
.push_back(MRI
.createVirtualRegister(RC
));
8973 RegsForValue
MatchedRegs(Regs
, RegVT
, InOperandVal
.getValueType());
8975 SDLoc dl
= getCurSDLoc();
8976 // Use the produced MatchedRegs object to
8977 MatchedRegs
.getCopyToRegs(InOperandVal
, DAG
, dl
, Chain
, &Flag
, &Call
);
8978 MatchedRegs
.AddInlineAsmOperands(InlineAsm::Kind_RegUse
,
8979 true, OpInfo
.getMatchedOperand(), dl
,
8980 DAG
, AsmNodeOperands
);
8984 assert(InlineAsm::isMemKind(OpFlag
) && "Unknown matching constraint!");
8985 assert(InlineAsm::getNumOperandRegisters(OpFlag
) == 1 &&
8986 "Unexpected number of operands");
8987 // Add information to the INLINEASM node to know about this input.
8988 // See InlineAsm.h isUseOperandTiedToDef.
8989 OpFlag
= InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag
);
8990 OpFlag
= InlineAsm::getFlagWordForMatchingOp(OpFlag
,
8991 OpInfo
.getMatchedOperand());
8992 AsmNodeOperands
.push_back(DAG
.getTargetConstant(
8993 OpFlag
, getCurSDLoc(), TLI
.getPointerTy(DAG
.getDataLayout())));
8994 AsmNodeOperands
.push_back(AsmNodeOperands
[CurOp
+1]);
8998 // Treat indirect 'X' constraint as memory.
8999 if (OpInfo
.ConstraintType
== TargetLowering::C_Other
&&
9001 OpInfo
.ConstraintType
= TargetLowering::C_Memory
;
9003 if (OpInfo
.ConstraintType
== TargetLowering::C_Immediate
||
9004 OpInfo
.ConstraintType
== TargetLowering::C_Other
) {
9005 std::vector
<SDValue
> Ops
;
9006 TLI
.LowerAsmOperandForConstraint(InOperandVal
, OpInfo
.ConstraintCode
,
9009 if (OpInfo
.ConstraintType
== TargetLowering::C_Immediate
)
9010 if (isa
<ConstantSDNode
>(InOperandVal
)) {
9011 emitInlineAsmError(Call
, "value out of range for constraint '" +
9012 Twine(OpInfo
.ConstraintCode
) + "'");
9016 emitInlineAsmError(Call
,
9017 "invalid operand for inline asm constraint '" +
9018 Twine(OpInfo
.ConstraintCode
) + "'");
9022 // Add information to the INLINEASM node to know about this input.
9023 unsigned ResOpType
=
9024 InlineAsm::getFlagWord(InlineAsm::Kind_Imm
, Ops
.size());
9025 AsmNodeOperands
.push_back(DAG
.getTargetConstant(
9026 ResOpType
, getCurSDLoc(), TLI
.getPointerTy(DAG
.getDataLayout())));
9027 llvm::append_range(AsmNodeOperands
, Ops
);
9031 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
||
9032 OpInfo
.ConstraintType
== TargetLowering::C_Address
) {
9033 assert((OpInfo
.isIndirect
||
9034 OpInfo
.ConstraintType
!= TargetLowering::C_Memory
) &&
9035 "Operand must be indirect to be a mem!");
9036 assert(InOperandVal
.getValueType() ==
9037 TLI
.getPointerTy(DAG
.getDataLayout()) &&
9038 "Memory operands expect pointer values");
9040 unsigned ConstraintID
=
9041 TLI
.getInlineAsmMemConstraint(OpInfo
.ConstraintCode
);
9042 assert(ConstraintID
!= InlineAsm::Constraint_Unknown
&&
9043 "Failed to convert memory constraint code to constraint id.");
9045 // Add information to the INLINEASM node to know about this input.
9046 unsigned ResOpType
= InlineAsm::getFlagWord(InlineAsm::Kind_Mem
, 1);
9047 ResOpType
= InlineAsm::getFlagWordForMem(ResOpType
, ConstraintID
);
9048 AsmNodeOperands
.push_back(DAG
.getTargetConstant(ResOpType
,
9051 AsmNodeOperands
.push_back(InOperandVal
);
9055 assert((OpInfo
.ConstraintType
== TargetLowering::C_RegisterClass
||
9056 OpInfo
.ConstraintType
== TargetLowering::C_Register
) &&
9057 "Unknown constraint type!");
9059 // TODO: Support this.
9060 if (OpInfo
.isIndirect
) {
9062 Call
, "Don't know how to handle indirect register inputs yet "
9063 "for constraint '" +
9064 Twine(OpInfo
.ConstraintCode
) + "'");
9068 // Copy the input into the appropriate registers.
9069 if (OpInfo
.AssignedRegs
.Regs
.empty()) {
9070 emitInlineAsmError(Call
,
9071 "couldn't allocate input reg for constraint '" +
9072 Twine(OpInfo
.ConstraintCode
) + "'");
9076 if (DetectWriteToReservedRegister())
9079 SDLoc dl
= getCurSDLoc();
9081 OpInfo
.AssignedRegs
.getCopyToRegs(InOperandVal
, DAG
, dl
, Chain
, &Flag
,
9084 OpInfo
.AssignedRegs
.AddInlineAsmOperands(InlineAsm::Kind_RegUse
, false, 0,
9085 dl
, DAG
, AsmNodeOperands
);
9088 case InlineAsm::isClobber
:
9089 // Add the clobbered value to the operand list, so that the register
9090 // allocator is aware that the physreg got clobbered.
9091 if (!OpInfo
.AssignedRegs
.Regs
.empty())
9092 OpInfo
.AssignedRegs
.AddInlineAsmOperands(InlineAsm::Kind_Clobber
,
9093 false, 0, getCurSDLoc(), DAG
,
9099 // Finish up input operands. Set the input chain and add the flag last.
9100 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
9101 if (Flag
.getNode()) AsmNodeOperands
.push_back(Flag
);
9103 unsigned ISDOpc
= IsCallBr
? ISD::INLINEASM_BR
: ISD::INLINEASM
;
9104 Chain
= DAG
.getNode(ISDOpc
, getCurSDLoc(),
9105 DAG
.getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
9106 Flag
= Chain
.getValue(1);
9108 // Do additional work to generate outputs.
9110 SmallVector
<EVT
, 1> ResultVTs
;
9111 SmallVector
<SDValue
, 1> ResultValues
;
9112 SmallVector
<SDValue
, 8> OutChains
;
9114 llvm::Type
*CallResultType
= Call
.getType();
9115 ArrayRef
<Type
*> ResultTypes
;
9116 if (StructType
*StructResult
= dyn_cast
<StructType
>(CallResultType
))
9117 ResultTypes
= StructResult
->elements();
9118 else if (!CallResultType
->isVoidTy())
9119 ResultTypes
= makeArrayRef(CallResultType
);
9121 auto CurResultType
= ResultTypes
.begin();
9122 auto handleRegAssign
= [&](SDValue V
) {
9123 assert(CurResultType
!= ResultTypes
.end() && "Unexpected value");
9124 assert((*CurResultType
)->isSized() && "Unexpected unsized type");
9125 EVT ResultVT
= TLI
.getValueType(DAG
.getDataLayout(), *CurResultType
);
9127 // If the type of the inline asm call site return value is different but has
9128 // same size as the type of the asm output bitcast it. One example of this
9129 // is for vectors with different width / number of elements. This can
9130 // happen for register classes that can contain multiple different value
9131 // types. The preg or vreg allocated may not have the same VT as was
9134 // This can also happen for a return value that disagrees with the register
9135 // class it is put in, eg. a double in a general-purpose register on a
9137 if (ResultVT
!= V
.getValueType() &&
9138 ResultVT
.getSizeInBits() == V
.getValueSizeInBits())
9139 V
= DAG
.getNode(ISD::BITCAST
, getCurSDLoc(), ResultVT
, V
);
9140 else if (ResultVT
!= V
.getValueType() && ResultVT
.isInteger() &&
9141 V
.getValueType().isInteger()) {
9142 // If a result value was tied to an input value, the computed result
9143 // may have a wider width than the expected result. Extract the
9144 // relevant portion.
9145 V
= DAG
.getNode(ISD::TRUNCATE
, getCurSDLoc(), ResultVT
, V
);
9147 assert(ResultVT
== V
.getValueType() && "Asm result value mismatch!");
9148 ResultVTs
.push_back(ResultVT
);
9149 ResultValues
.push_back(V
);
9152 // Deal with output operands.
9153 for (SDISelAsmOperandInfo
&OpInfo
: ConstraintOperands
) {
9154 if (OpInfo
.Type
== InlineAsm::isOutput
) {
9156 // Skip trivial output operands.
9157 if (OpInfo
.AssignedRegs
.Regs
.empty())
9160 switch (OpInfo
.ConstraintType
) {
9161 case TargetLowering::C_Register
:
9162 case TargetLowering::C_RegisterClass
:
9163 Val
= OpInfo
.AssignedRegs
.getCopyFromRegs(DAG
, FuncInfo
, getCurSDLoc(),
9164 Chain
, &Flag
, &Call
);
9166 case TargetLowering::C_Immediate
:
9167 case TargetLowering::C_Other
:
9168 Val
= TLI
.LowerAsmOutputForConstraint(Chain
, Flag
, getCurSDLoc(),
9171 case TargetLowering::C_Memory
:
9172 break; // Already handled.
9173 case TargetLowering::C_Address
:
9174 break; // Silence warning.
9175 case TargetLowering::C_Unknown
:
9176 assert(false && "Unexpected unknown constraint");
9179 // Indirect output manifest as stores. Record output chains.
9180 if (OpInfo
.isIndirect
) {
9181 const Value
*Ptr
= OpInfo
.CallOperandVal
;
9182 assert(Ptr
&& "Expected value CallOperandVal for indirect asm operand");
9183 SDValue Store
= DAG
.getStore(Chain
, getCurSDLoc(), Val
, getValue(Ptr
),
9184 MachinePointerInfo(Ptr
));
9185 OutChains
.push_back(Store
);
9187 // generate CopyFromRegs to associated registers.
9188 assert(!Call
.getType()->isVoidTy() && "Bad inline asm!");
9189 if (Val
.getOpcode() == ISD::MERGE_VALUES
) {
9190 for (const SDValue
&V
: Val
->op_values())
9193 handleRegAssign(Val
);
9199 if (!ResultValues
.empty()) {
9200 assert(CurResultType
== ResultTypes
.end() &&
9201 "Mismatch in number of ResultTypes");
9202 assert(ResultValues
.size() == ResultTypes
.size() &&
9203 "Mismatch in number of output operands in asm result");
9205 SDValue V
= DAG
.getNode(ISD::MERGE_VALUES
, getCurSDLoc(),
9206 DAG
.getVTList(ResultVTs
), ResultValues
);
9210 // Collect store chains.
9211 if (!OutChains
.empty())
9212 Chain
= DAG
.getNode(ISD::TokenFactor
, getCurSDLoc(), MVT::Other
, OutChains
);
9215 Chain
= lowerEndEH(Chain
, cast
<InvokeInst
>(&Call
), EHPadBB
, BeginLabel
);
9218 // Only Update Root if inline assembly has a memory effect.
9219 if (ResultValues
.empty() || HasSideEffect
|| !OutChains
.empty() || IsCallBr
||
9224 void SelectionDAGBuilder::emitInlineAsmError(const CallBase
&Call
,
9225 const Twine
&Message
) {
9226 LLVMContext
&Ctx
= *DAG
.getContext();
9227 Ctx
.emitError(&Call
, Message
);
9229 // Make sure we leave the DAG in a valid state
9230 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
9231 SmallVector
<EVT
, 1> ValueVTs
;
9232 ComputeValueVTs(TLI
, DAG
.getDataLayout(), Call
.getType(), ValueVTs
);
9234 if (ValueVTs
.empty())
9237 SmallVector
<SDValue
, 1> Ops
;
9238 for (unsigned i
= 0, e
= ValueVTs
.size(); i
!= e
; ++i
)
9239 Ops
.push_back(DAG
.getUNDEF(ValueVTs
[i
]));
9241 setValue(&Call
, DAG
.getMergeValues(Ops
, getCurSDLoc()));
9244 void SelectionDAGBuilder::visitVAStart(const CallInst
&I
) {
9245 DAG
.setRoot(DAG
.getNode(ISD::VASTART
, getCurSDLoc(),
9246 MVT::Other
, getRoot(),
9247 getValue(I
.getArgOperand(0)),
9248 DAG
.getSrcValue(I
.getArgOperand(0))));
9251 void SelectionDAGBuilder::visitVAArg(const VAArgInst
&I
) {
9252 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
9253 const DataLayout
&DL
= DAG
.getDataLayout();
9254 SDValue V
= DAG
.getVAArg(
9255 TLI
.getMemValueType(DAG
.getDataLayout(), I
.getType()), getCurSDLoc(),
9256 getRoot(), getValue(I
.getOperand(0)), DAG
.getSrcValue(I
.getOperand(0)),
9257 DL
.getABITypeAlign(I
.getType()).value());
9258 DAG
.setRoot(V
.getValue(1));
9260 if (I
.getType()->isPointerTy())
9261 V
= DAG
.getPtrExtOrTrunc(
9262 V
, getCurSDLoc(), TLI
.getValueType(DAG
.getDataLayout(), I
.getType()));
9266 void SelectionDAGBuilder::visitVAEnd(const CallInst
&I
) {
9267 DAG
.setRoot(DAG
.getNode(ISD::VAEND
, getCurSDLoc(),
9268 MVT::Other
, getRoot(),
9269 getValue(I
.getArgOperand(0)),
9270 DAG
.getSrcValue(I
.getArgOperand(0))));
9273 void SelectionDAGBuilder::visitVACopy(const CallInst
&I
) {
9274 DAG
.setRoot(DAG
.getNode(ISD::VACOPY
, getCurSDLoc(),
9275 MVT::Other
, getRoot(),
9276 getValue(I
.getArgOperand(0)),
9277 getValue(I
.getArgOperand(1)),
9278 DAG
.getSrcValue(I
.getArgOperand(0)),
9279 DAG
.getSrcValue(I
.getArgOperand(1))));
9282 SDValue
SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG
&DAG
,
9283 const Instruction
&I
,
9285 const MDNode
*Range
= I
.getMetadata(LLVMContext::MD_range
);
9289 ConstantRange CR
= getConstantRangeFromMetadata(*Range
);
9290 if (CR
.isFullSet() || CR
.isEmptySet() || CR
.isUpperWrapped())
9293 APInt Lo
= CR
.getUnsignedMin();
9294 if (!Lo
.isMinValue())
9297 APInt Hi
= CR
.getUnsignedMax();
9298 unsigned Bits
= std::max(Hi
.getActiveBits(),
9299 static_cast<unsigned>(IntegerType::MIN_INT_BITS
));
9301 EVT SmallVT
= EVT::getIntegerVT(*DAG
.getContext(), Bits
);
9303 SDLoc SL
= getCurSDLoc();
9305 SDValue ZExt
= DAG
.getNode(ISD::AssertZext
, SL
, Op
.getValueType(), Op
,
9306 DAG
.getValueType(SmallVT
));
9307 unsigned NumVals
= Op
.getNode()->getNumValues();
9311 SmallVector
<SDValue
, 4> Ops
;
9313 Ops
.push_back(ZExt
);
9314 for (unsigned I
= 1; I
!= NumVals
; ++I
)
9315 Ops
.push_back(Op
.getValue(I
));
9317 return DAG
.getMergeValues(Ops
, SL
);
9320 /// Populate a CallLowerinInfo (into \p CLI) based on the properties of
9321 /// the call being lowered.
9323 /// This is a helper for lowering intrinsics that follow a target calling
9324 /// convention or require stack pointer adjustment. Only a subset of the
9325 /// intrinsic's operands need to participate in the calling convention.
9326 void SelectionDAGBuilder::populateCallLoweringInfo(
9327 TargetLowering::CallLoweringInfo
&CLI
, const CallBase
*Call
,
9328 unsigned ArgIdx
, unsigned NumArgs
, SDValue Callee
, Type
*ReturnTy
,
9329 bool IsPatchPoint
) {
9330 TargetLowering::ArgListTy Args
;
9331 Args
.reserve(NumArgs
);
9333 // Populate the argument list.
9334 // Attributes for args start at offset 1, after the return attribute.
9335 for (unsigned ArgI
= ArgIdx
, ArgE
= ArgIdx
+ NumArgs
;
9336 ArgI
!= ArgE
; ++ArgI
) {
9337 const Value
*V
= Call
->getOperand(ArgI
);
9339 assert(!V
->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
9341 TargetLowering::ArgListEntry Entry
;
9342 Entry
.Node
= getValue(V
);
9343 Entry
.Ty
= V
->getType();
9344 Entry
.setAttributes(Call
, ArgI
);
9345 Args
.push_back(Entry
);
9348 CLI
.setDebugLoc(getCurSDLoc())
9349 .setChain(getRoot())
9350 .setCallee(Call
->getCallingConv(), ReturnTy
, Callee
, std::move(Args
))
9351 .setDiscardResult(Call
->use_empty())
9352 .setIsPatchPoint(IsPatchPoint
)
9354 Call
->countOperandBundlesOfType(LLVMContext::OB_preallocated
) != 0);
9357 /// Add a stack map intrinsic call's live variable operands to a stackmap
9358 /// or patchpoint target node's operand list.
9360 /// Constants are converted to TargetConstants purely as an optimization to
9361 /// avoid constant materialization and register allocation.
9363 /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
9364 /// generate addess computation nodes, and so FinalizeISel can convert the
9365 /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
9366 /// address materialization and register allocation, but may also be required
9367 /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
9368 /// alloca in the entry block, then the runtime may assume that the alloca's
9369 /// StackMap location can be read immediately after compilation and that the
9370 /// location is valid at any point during execution (this is similar to the
9371 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
9372 /// only available in a register, then the runtime would need to trap when
9373 /// execution reaches the StackMap in order to read the alloca's location.
9374 static void addStackMapLiveVars(const CallBase
&Call
, unsigned StartIdx
,
9375 const SDLoc
&DL
, SmallVectorImpl
<SDValue
> &Ops
,
9376 SelectionDAGBuilder
&Builder
) {
9377 for (unsigned i
= StartIdx
, e
= Call
.arg_size(); i
!= e
; ++i
) {
9378 SDValue OpVal
= Builder
.getValue(Call
.getArgOperand(i
));
9379 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
9381 Builder
.DAG
.getTargetConstant(StackMaps::ConstantOp
, DL
, MVT::i64
));
9383 Builder
.DAG
.getTargetConstant(C
->getSExtValue(), DL
, MVT::i64
));
9384 } else if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(OpVal
)) {
9385 const TargetLowering
&TLI
= Builder
.DAG
.getTargetLoweringInfo();
9386 Ops
.push_back(Builder
.DAG
.getTargetFrameIndex(
9387 FI
->getIndex(), TLI
.getFrameIndexTy(Builder
.DAG
.getDataLayout())));
9389 Ops
.push_back(OpVal
);
9393 /// Lower llvm.experimental.stackmap directly to its target opcode.
9394 void SelectionDAGBuilder::visitStackmap(const CallInst
&CI
) {
9395 // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
9396 // [live variables...])
9398 assert(CI
.getType()->isVoidTy() && "Stackmap cannot return a value.");
9400 SDValue Chain
, InFlag
, Callee
, NullPtr
;
9401 SmallVector
<SDValue
, 32> Ops
;
9403 SDLoc DL
= getCurSDLoc();
9404 Callee
= getValue(CI
.getCalledOperand());
9405 NullPtr
= DAG
.getIntPtrConstant(0, DL
, true);
9407 // The stackmap intrinsic only records the live variables (the arguments
9408 // passed to it) and emits NOPS (if requested). Unlike the patchpoint
9409 // intrinsic, this won't be lowered to a function call. This means we don't
9410 // have to worry about calling conventions and target specific lowering code.
9411 // Instead we perform the call lowering right here.
9413 // chain, flag = CALLSEQ_START(chain, 0, 0)
9414 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
9415 // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
9417 Chain
= DAG
.getCALLSEQ_START(getRoot(), 0, 0, DL
);
9418 InFlag
= Chain
.getValue(1);
9420 // Add the <id> and <numBytes> constants.
9421 SDValue IDVal
= getValue(CI
.getOperand(PatchPointOpers::IDPos
));
9422 Ops
.push_back(DAG
.getTargetConstant(
9423 cast
<ConstantSDNode
>(IDVal
)->getZExtValue(), DL
, MVT::i64
));
9424 SDValue NBytesVal
= getValue(CI
.getOperand(PatchPointOpers::NBytesPos
));
9425 Ops
.push_back(DAG
.getTargetConstant(
9426 cast
<ConstantSDNode
>(NBytesVal
)->getZExtValue(), DL
,
9429 // Push live variables for the stack map.
9430 addStackMapLiveVars(CI
, 2, DL
, Ops
, *this);
9432 // We are not pushing any register mask info here on the operands list,
9433 // because the stackmap doesn't clobber anything.
9435 // Push the chain and the glue flag.
9436 Ops
.push_back(Chain
);
9437 Ops
.push_back(InFlag
);
9439 // Create the STACKMAP node.
9440 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
9441 SDNode
*SM
= DAG
.getMachineNode(TargetOpcode::STACKMAP
, DL
, NodeTys
, Ops
);
9442 Chain
= SDValue(SM
, 0);
9443 InFlag
= Chain
.getValue(1);
9445 Chain
= DAG
.getCALLSEQ_END(Chain
, NullPtr
, NullPtr
, InFlag
, DL
);
9447 // Stackmaps don't generate values, so nothing goes into the NodeMap.
9449 // Set the root to the target-lowered call chain.
9452 // Inform the Frame Information that we have a stackmap in this function.
9453 FuncInfo
.MF
->getFrameInfo().setHasStackMap();
9456 /// Lower llvm.experimental.patchpoint directly to its target opcode.
9457 void SelectionDAGBuilder::visitPatchpoint(const CallBase
&CB
,
9458 const BasicBlock
*EHPadBB
) {
9459 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
9464 // [live variables...])
9466 CallingConv::ID CC
= CB
.getCallingConv();
9467 bool IsAnyRegCC
= CC
== CallingConv::AnyReg
;
9468 bool HasDef
= !CB
.getType()->isVoidTy();
9469 SDLoc dl
= getCurSDLoc();
9470 SDValue Callee
= getValue(CB
.getArgOperand(PatchPointOpers::TargetPos
));
9472 // Handle immediate and symbolic callees.
9473 if (auto* ConstCallee
= dyn_cast
<ConstantSDNode
>(Callee
))
9474 Callee
= DAG
.getIntPtrConstant(ConstCallee
->getZExtValue(), dl
,
9476 else if (auto* SymbolicCallee
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
9477 Callee
= DAG
.getTargetGlobalAddress(SymbolicCallee
->getGlobal(),
9478 SDLoc(SymbolicCallee
),
9479 SymbolicCallee
->getValueType(0));
9481 // Get the real number of arguments participating in the call <numArgs>
9482 SDValue NArgVal
= getValue(CB
.getArgOperand(PatchPointOpers::NArgPos
));
9483 unsigned NumArgs
= cast
<ConstantSDNode
>(NArgVal
)->getZExtValue();
9485 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
9486 // Intrinsics include all meta-operands up to but not including CC.
9487 unsigned NumMetaOpers
= PatchPointOpers::CCPos
;
9488 assert(CB
.arg_size() >= NumMetaOpers
+ NumArgs
&&
9489 "Not enough arguments provided to the patchpoint intrinsic");
9491 // For AnyRegCC the arguments are lowered later on manually.
9492 unsigned NumCallArgs
= IsAnyRegCC
? 0 : NumArgs
;
9494 IsAnyRegCC
? Type::getVoidTy(*DAG
.getContext()) : CB
.getType();
9496 TargetLowering::CallLoweringInfo
CLI(DAG
);
9497 populateCallLoweringInfo(CLI
, &CB
, NumMetaOpers
, NumCallArgs
, Callee
,
9499 std::pair
<SDValue
, SDValue
> Result
= lowerInvokable(CLI
, EHPadBB
);
9501 SDNode
*CallEnd
= Result
.second
.getNode();
9502 if (HasDef
&& (CallEnd
->getOpcode() == ISD::CopyFromReg
))
9503 CallEnd
= CallEnd
->getOperand(0).getNode();
9505 /// Get a call instruction from the call sequence chain.
9506 /// Tail calls are not allowed.
9507 assert(CallEnd
->getOpcode() == ISD::CALLSEQ_END
&&
9508 "Expected a callseq node.");
9509 SDNode
*Call
= CallEnd
->getOperand(0).getNode();
9510 bool HasGlue
= Call
->getGluedNode();
9512 // Replace the target specific call node with the patchable intrinsic.
9513 SmallVector
<SDValue
, 8> Ops
;
9515 // Add the <id> and <numBytes> constants.
9516 SDValue IDVal
= getValue(CB
.getArgOperand(PatchPointOpers::IDPos
));
9517 Ops
.push_back(DAG
.getTargetConstant(
9518 cast
<ConstantSDNode
>(IDVal
)->getZExtValue(), dl
, MVT::i64
));
9519 SDValue NBytesVal
= getValue(CB
.getArgOperand(PatchPointOpers::NBytesPos
));
9520 Ops
.push_back(DAG
.getTargetConstant(
9521 cast
<ConstantSDNode
>(NBytesVal
)->getZExtValue(), dl
,
9525 Ops
.push_back(Callee
);
9527 // Adjust <numArgs> to account for any arguments that have been passed on the
9529 // Call Node: Chain, Target, {Args}, RegMask, [Glue]
9530 unsigned NumCallRegArgs
= Call
->getNumOperands() - (HasGlue
? 4 : 3);
9531 NumCallRegArgs
= IsAnyRegCC
? NumArgs
: NumCallRegArgs
;
9532 Ops
.push_back(DAG
.getTargetConstant(NumCallRegArgs
, dl
, MVT::i32
));
9534 // Add the calling convention
9535 Ops
.push_back(DAG
.getTargetConstant((unsigned)CC
, dl
, MVT::i32
));
9537 // Add the arguments we omitted previously. The register allocator should
9538 // place these in any free register.
9540 for (unsigned i
= NumMetaOpers
, e
= NumMetaOpers
+ NumArgs
; i
!= e
; ++i
)
9541 Ops
.push_back(getValue(CB
.getArgOperand(i
)));
9543 // Push the arguments from the call instruction up to the register mask.
9544 SDNode::op_iterator e
= HasGlue
? Call
->op_end()-2 : Call
->op_end()-1;
9545 Ops
.append(Call
->op_begin() + 2, e
);
9547 // Push live variables for the stack map.
9548 addStackMapLiveVars(CB
, NumMetaOpers
+ NumArgs
, dl
, Ops
, *this);
9550 // Push the register mask info.
9552 Ops
.push_back(*(Call
->op_end()-2));
9554 Ops
.push_back(*(Call
->op_end()-1));
9556 // Push the chain (this is originally the first operand of the call, but
9557 // becomes now the last or second to last operand).
9558 Ops
.push_back(*(Call
->op_begin()));
9560 // Push the glue flag (last operand).
9562 Ops
.push_back(*(Call
->op_end()-1));
9565 if (IsAnyRegCC
&& HasDef
) {
9566 // Create the return types based on the intrinsic definition
9567 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
9568 SmallVector
<EVT
, 3> ValueVTs
;
9569 ComputeValueVTs(TLI
, DAG
.getDataLayout(), CB
.getType(), ValueVTs
);
9570 assert(ValueVTs
.size() == 1 && "Expected only one return value type.");
9572 // There is always a chain and a glue type at the end
9573 ValueVTs
.push_back(MVT::Other
);
9574 ValueVTs
.push_back(MVT::Glue
);
9575 NodeTys
= DAG
.getVTList(ValueVTs
);
9577 NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
9579 // Replace the target specific call node with a PATCHPOINT node.
9580 MachineSDNode
*MN
= DAG
.getMachineNode(TargetOpcode::PATCHPOINT
,
9583 // Update the NodeMap.
9586 setValue(&CB
, SDValue(MN
, 0));
9588 setValue(&CB
, Result
.first
);
9591 // Fixup the consumers of the intrinsic. The chain and glue may be used in the
9592 // call sequence. Furthermore the location of the chain and glue can change
9593 // when the AnyReg calling convention is used and the intrinsic returns a
9595 if (IsAnyRegCC
&& HasDef
) {
9596 SDValue From
[] = {SDValue(Call
, 0), SDValue(Call
, 1)};
9597 SDValue To
[] = {SDValue(MN
, 1), SDValue(MN
, 2)};
9598 DAG
.ReplaceAllUsesOfValuesWith(From
, To
, 2);
9600 DAG
.ReplaceAllUsesWith(Call
, MN
);
9601 DAG
.DeleteNode(Call
);
9603 // Inform the Frame Information that we have a patchpoint in this function.
9604 FuncInfo
.MF
->getFrameInfo().setHasPatchPoint();
9607 void SelectionDAGBuilder::visitVectorReduce(const CallInst
&I
,
9608 unsigned Intrinsic
) {
9609 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
9610 SDValue Op1
= getValue(I
.getArgOperand(0));
9612 if (I
.arg_size() > 1)
9613 Op2
= getValue(I
.getArgOperand(1));
9614 SDLoc dl
= getCurSDLoc();
9615 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
9617 SDNodeFlags SDFlags
;
9618 if (auto *FPMO
= dyn_cast
<FPMathOperator
>(&I
))
9619 SDFlags
.copyFMF(*FPMO
);
9621 switch (Intrinsic
) {
9622 case Intrinsic::vector_reduce_fadd
:
9623 if (SDFlags
.hasAllowReassociation())
9624 Res
= DAG
.getNode(ISD::FADD
, dl
, VT
, Op1
,
9625 DAG
.getNode(ISD::VECREDUCE_FADD
, dl
, VT
, Op2
, SDFlags
),
9628 Res
= DAG
.getNode(ISD::VECREDUCE_SEQ_FADD
, dl
, VT
, Op1
, Op2
, SDFlags
);
9630 case Intrinsic::vector_reduce_fmul
:
9631 if (SDFlags
.hasAllowReassociation())
9632 Res
= DAG
.getNode(ISD::FMUL
, dl
, VT
, Op1
,
9633 DAG
.getNode(ISD::VECREDUCE_FMUL
, dl
, VT
, Op2
, SDFlags
),
9636 Res
= DAG
.getNode(ISD::VECREDUCE_SEQ_FMUL
, dl
, VT
, Op1
, Op2
, SDFlags
);
9638 case Intrinsic::vector_reduce_add
:
9639 Res
= DAG
.getNode(ISD::VECREDUCE_ADD
, dl
, VT
, Op1
);
9641 case Intrinsic::vector_reduce_mul
:
9642 Res
= DAG
.getNode(ISD::VECREDUCE_MUL
, dl
, VT
, Op1
);
9644 case Intrinsic::vector_reduce_and
:
9645 Res
= DAG
.getNode(ISD::VECREDUCE_AND
, dl
, VT
, Op1
);
9647 case Intrinsic::vector_reduce_or
:
9648 Res
= DAG
.getNode(ISD::VECREDUCE_OR
, dl
, VT
, Op1
);
9650 case Intrinsic::vector_reduce_xor
:
9651 Res
= DAG
.getNode(ISD::VECREDUCE_XOR
, dl
, VT
, Op1
);
9653 case Intrinsic::vector_reduce_smax
:
9654 Res
= DAG
.getNode(ISD::VECREDUCE_SMAX
, dl
, VT
, Op1
);
9656 case Intrinsic::vector_reduce_smin
:
9657 Res
= DAG
.getNode(ISD::VECREDUCE_SMIN
, dl
, VT
, Op1
);
9659 case Intrinsic::vector_reduce_umax
:
9660 Res
= DAG
.getNode(ISD::VECREDUCE_UMAX
, dl
, VT
, Op1
);
9662 case Intrinsic::vector_reduce_umin
:
9663 Res
= DAG
.getNode(ISD::VECREDUCE_UMIN
, dl
, VT
, Op1
);
9665 case Intrinsic::vector_reduce_fmax
:
9666 Res
= DAG
.getNode(ISD::VECREDUCE_FMAX
, dl
, VT
, Op1
, SDFlags
);
9668 case Intrinsic::vector_reduce_fmin
:
9669 Res
= DAG
.getNode(ISD::VECREDUCE_FMIN
, dl
, VT
, Op1
, SDFlags
);
9672 llvm_unreachable("Unhandled vector reduce intrinsic");
9677 /// Returns an AttributeList representing the attributes applied to the return
9678 /// value of the given call.
9679 static AttributeList
getReturnAttrs(TargetLowering::CallLoweringInfo
&CLI
) {
9680 SmallVector
<Attribute::AttrKind
, 2> Attrs
;
9682 Attrs
.push_back(Attribute::SExt
);
9684 Attrs
.push_back(Attribute::ZExt
);
9686 Attrs
.push_back(Attribute::InReg
);
9688 return AttributeList::get(CLI
.RetTy
->getContext(), AttributeList::ReturnIndex
,
9692 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
9693 /// implementation, which just calls LowerCall.
9694 /// FIXME: When all targets are
9695 /// migrated to using LowerCall, this hook should be integrated into SDISel.
9696 std::pair
<SDValue
, SDValue
>
9697 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo
&CLI
) const {
9698 // Handle the incoming return values from the call.
9700 Type
*OrigRetTy
= CLI
.RetTy
;
9701 SmallVector
<EVT
, 4> RetTys
;
9702 SmallVector
<uint64_t, 4> Offsets
;
9703 auto &DL
= CLI
.DAG
.getDataLayout();
9704 ComputeValueVTs(*this, DL
, CLI
.RetTy
, RetTys
, &Offsets
);
9706 if (CLI
.IsPostTypeLegalization
) {
9707 // If we are lowering a libcall after legalization, split the return type.
9708 SmallVector
<EVT
, 4> OldRetTys
;
9709 SmallVector
<uint64_t, 4> OldOffsets
;
9710 RetTys
.swap(OldRetTys
);
9711 Offsets
.swap(OldOffsets
);
9713 for (size_t i
= 0, e
= OldRetTys
.size(); i
!= e
; ++i
) {
9714 EVT RetVT
= OldRetTys
[i
];
9715 uint64_t Offset
= OldOffsets
[i
];
9716 MVT RegisterVT
= getRegisterType(CLI
.RetTy
->getContext(), RetVT
);
9717 unsigned NumRegs
= getNumRegisters(CLI
.RetTy
->getContext(), RetVT
);
9718 unsigned RegisterVTByteSZ
= RegisterVT
.getSizeInBits() / 8;
9719 RetTys
.append(NumRegs
, RegisterVT
);
9720 for (unsigned j
= 0; j
!= NumRegs
; ++j
)
9721 Offsets
.push_back(Offset
+ j
* RegisterVTByteSZ
);
9725 SmallVector
<ISD::OutputArg
, 4> Outs
;
9726 GetReturnInfo(CLI
.CallConv
, CLI
.RetTy
, getReturnAttrs(CLI
), Outs
, *this, DL
);
9728 bool CanLowerReturn
=
9729 this->CanLowerReturn(CLI
.CallConv
, CLI
.DAG
.getMachineFunction(),
9730 CLI
.IsVarArg
, Outs
, CLI
.RetTy
->getContext());
9732 SDValue DemoteStackSlot
;
9733 int DemoteStackIdx
= -100;
9734 if (!CanLowerReturn
) {
9735 // FIXME: equivalent assert?
9736 // assert(!CS.hasInAllocaArgument() &&
9737 // "sret demotion is incompatible with inalloca");
9738 uint64_t TySize
= DL
.getTypeAllocSize(CLI
.RetTy
);
9739 Align Alignment
= DL
.getPrefTypeAlign(CLI
.RetTy
);
9740 MachineFunction
&MF
= CLI
.DAG
.getMachineFunction();
9742 MF
.getFrameInfo().CreateStackObject(TySize
, Alignment
, false);
9743 Type
*StackSlotPtrType
= PointerType::get(CLI
.RetTy
,
9744 DL
.getAllocaAddrSpace());
9746 DemoteStackSlot
= CLI
.DAG
.getFrameIndex(DemoteStackIdx
, getFrameIndexTy(DL
));
9748 Entry
.Node
= DemoteStackSlot
;
9749 Entry
.Ty
= StackSlotPtrType
;
9750 Entry
.IsSExt
= false;
9751 Entry
.IsZExt
= false;
9752 Entry
.IsInReg
= false;
9753 Entry
.IsSRet
= true;
9754 Entry
.IsNest
= false;
9755 Entry
.IsByVal
= false;
9756 Entry
.IsByRef
= false;
9757 Entry
.IsReturned
= false;
9758 Entry
.IsSwiftSelf
= false;
9759 Entry
.IsSwiftAsync
= false;
9760 Entry
.IsSwiftError
= false;
9761 Entry
.IsCFGuardTarget
= false;
9762 Entry
.Alignment
= Alignment
;
9763 CLI
.getArgs().insert(CLI
.getArgs().begin(), Entry
);
9764 CLI
.NumFixedArgs
+= 1;
9765 CLI
.RetTy
= Type::getVoidTy(CLI
.RetTy
->getContext());
9767 // sret demotion isn't compatible with tail-calls, since the sret argument
9768 // points into the callers stack frame.
9769 CLI
.IsTailCall
= false;
9771 bool NeedsRegBlock
= functionArgumentNeedsConsecutiveRegisters(
9772 CLI
.RetTy
, CLI
.CallConv
, CLI
.IsVarArg
, DL
);
9773 for (unsigned I
= 0, E
= RetTys
.size(); I
!= E
; ++I
) {
9774 ISD::ArgFlagsTy Flags
;
9775 if (NeedsRegBlock
) {
9776 Flags
.setInConsecutiveRegs();
9777 if (I
== RetTys
.size() - 1)
9778 Flags
.setInConsecutiveRegsLast();
9781 MVT RegisterVT
= getRegisterTypeForCallingConv(CLI
.RetTy
->getContext(),
9783 unsigned NumRegs
= getNumRegistersForCallingConv(CLI
.RetTy
->getContext(),
9785 for (unsigned i
= 0; i
!= NumRegs
; ++i
) {
9786 ISD::InputArg MyFlags
;
9787 MyFlags
.Flags
= Flags
;
9788 MyFlags
.VT
= RegisterVT
;
9790 MyFlags
.Used
= CLI
.IsReturnValueUsed
;
9791 if (CLI
.RetTy
->isPointerTy()) {
9792 MyFlags
.Flags
.setPointer();
9793 MyFlags
.Flags
.setPointerAddrSpace(
9794 cast
<PointerType
>(CLI
.RetTy
)->getAddressSpace());
9797 MyFlags
.Flags
.setSExt();
9799 MyFlags
.Flags
.setZExt();
9801 MyFlags
.Flags
.setInReg();
9802 CLI
.Ins
.push_back(MyFlags
);
9807 // We push in swifterror return as the last element of CLI.Ins.
9808 ArgListTy
&Args
= CLI
.getArgs();
9809 if (supportSwiftError()) {
9810 for (const ArgListEntry
&Arg
: Args
) {
9811 if (Arg
.IsSwiftError
) {
9812 ISD::InputArg MyFlags
;
9813 MyFlags
.VT
= getPointerTy(DL
);
9814 MyFlags
.ArgVT
= EVT(getPointerTy(DL
));
9815 MyFlags
.Flags
.setSwiftError();
9816 CLI
.Ins
.push_back(MyFlags
);
9821 // Handle all of the outgoing arguments.
9823 CLI
.OutVals
.clear();
9824 for (unsigned i
= 0, e
= Args
.size(); i
!= e
; ++i
) {
9825 SmallVector
<EVT
, 4> ValueVTs
;
9826 ComputeValueVTs(*this, DL
, Args
[i
].Ty
, ValueVTs
);
9827 // FIXME: Split arguments if CLI.IsPostTypeLegalization
9828 Type
*FinalType
= Args
[i
].Ty
;
9829 if (Args
[i
].IsByVal
)
9830 FinalType
= Args
[i
].IndirectType
;
9831 bool NeedsRegBlock
= functionArgumentNeedsConsecutiveRegisters(
9832 FinalType
, CLI
.CallConv
, CLI
.IsVarArg
, DL
);
9833 for (unsigned Value
= 0, NumValues
= ValueVTs
.size(); Value
!= NumValues
;
9835 EVT VT
= ValueVTs
[Value
];
9836 Type
*ArgTy
= VT
.getTypeForEVT(CLI
.RetTy
->getContext());
9837 SDValue Op
= SDValue(Args
[i
].Node
.getNode(),
9838 Args
[i
].Node
.getResNo() + Value
);
9839 ISD::ArgFlagsTy Flags
;
9841 // Certain targets (such as MIPS), may have a different ABI alignment
9842 // for a type depending on the context. Give the target a chance to
9843 // specify the alignment it wants.
9844 const Align
OriginalAlignment(getABIAlignmentForCallingConv(ArgTy
, DL
));
9845 Flags
.setOrigAlign(OriginalAlignment
);
9847 if (Args
[i
].Ty
->isPointerTy()) {
9849 Flags
.setPointerAddrSpace(
9850 cast
<PointerType
>(Args
[i
].Ty
)->getAddressSpace());
9856 if (Args
[i
].IsInReg
) {
9857 // If we are using vectorcall calling convention, a structure that is
9858 // passed InReg - is surely an HVA
9859 if (CLI
.CallConv
== CallingConv::X86_VectorCall
&&
9860 isa
<StructType
>(FinalType
)) {
9861 // The first value of a structure is marked
9863 Flags
.setHvaStart();
9871 if (Args
[i
].IsSwiftSelf
)
9872 Flags
.setSwiftSelf();
9873 if (Args
[i
].IsSwiftAsync
)
9874 Flags
.setSwiftAsync();
9875 if (Args
[i
].IsSwiftError
)
9876 Flags
.setSwiftError();
9877 if (Args
[i
].IsCFGuardTarget
)
9878 Flags
.setCFGuardTarget();
9879 if (Args
[i
].IsByVal
)
9881 if (Args
[i
].IsByRef
)
9883 if (Args
[i
].IsPreallocated
) {
9884 Flags
.setPreallocated();
9885 // Set the byval flag for CCAssignFn callbacks that don't know about
9886 // preallocated. This way we can know how many bytes we should've
9887 // allocated and how many bytes a callee cleanup function will pop. If
9888 // we port preallocated to more targets, we'll have to add custom
9889 // preallocated handling in the various CC lowering callbacks.
9892 if (Args
[i
].IsInAlloca
) {
9893 Flags
.setInAlloca();
9894 // Set the byval flag for CCAssignFn callbacks that don't know about
9895 // inalloca. This way we can know how many bytes we should've allocated
9896 // and how many bytes a callee cleanup function will pop. If we port
9897 // inalloca to more targets, we'll have to add custom inalloca handling
9898 // in the various CC lowering callbacks.
9902 if (Args
[i
].IsByVal
|| Args
[i
].IsInAlloca
|| Args
[i
].IsPreallocated
) {
9903 unsigned FrameSize
= DL
.getTypeAllocSize(Args
[i
].IndirectType
);
9904 Flags
.setByValSize(FrameSize
);
9906 // info is not there but there are cases it cannot get right.
9907 if (auto MA
= Args
[i
].Alignment
)
9910 MemAlign
= Align(getByValTypeAlignment(Args
[i
].IndirectType
, DL
));
9911 } else if (auto MA
= Args
[i
].Alignment
) {
9914 MemAlign
= OriginalAlignment
;
9916 Flags
.setMemAlign(MemAlign
);
9920 Flags
.setInConsecutiveRegs();
9922 MVT PartVT
= getRegisterTypeForCallingConv(CLI
.RetTy
->getContext(),
9924 unsigned NumParts
= getNumRegistersForCallingConv(CLI
.RetTy
->getContext(),
9926 SmallVector
<SDValue
, 4> Parts(NumParts
);
9927 ISD::NodeType ExtendKind
= ISD::ANY_EXTEND
;
9930 ExtendKind
= ISD::SIGN_EXTEND
;
9931 else if (Args
[i
].IsZExt
)
9932 ExtendKind
= ISD::ZERO_EXTEND
;
9934 // Conservatively only handle 'returned' on non-vectors that can be lowered,
9936 if (Args
[i
].IsReturned
&& !Op
.getValueType().isVector() &&
9938 assert((CLI
.RetTy
== Args
[i
].Ty
||
9939 (CLI
.RetTy
->isPointerTy() && Args
[i
].Ty
->isPointerTy() &&
9940 CLI
.RetTy
->getPointerAddressSpace() ==
9941 Args
[i
].Ty
->getPointerAddressSpace())) &&
9942 RetTys
.size() == NumValues
&& "unexpected use of 'returned'");
9943 // Before passing 'returned' to the target lowering code, ensure that
9944 // either the register MVT and the actual EVT are the same size or that
9945 // the return value and argument are extended in the same way; in these
9946 // cases it's safe to pass the argument register value unchanged as the
9947 // return register value (although it's at the target's option whether
9949 // TODO: allow code generation to take advantage of partially preserved
9950 // registers rather than clobbering the entire register when the
9951 // parameter extension method is not compatible with the return
9953 if ((NumParts
* PartVT
.getSizeInBits() == VT
.getSizeInBits()) ||
9954 (ExtendKind
!= ISD::ANY_EXTEND
&& CLI
.RetSExt
== Args
[i
].IsSExt
&&
9955 CLI
.RetZExt
== Args
[i
].IsZExt
))
9956 Flags
.setReturned();
9959 getCopyToParts(CLI
.DAG
, CLI
.DL
, Op
, &Parts
[0], NumParts
, PartVT
, CLI
.CB
,
9960 CLI
.CallConv
, ExtendKind
);
9962 for (unsigned j
= 0; j
!= NumParts
; ++j
) {
9963 // if it isn't first piece, alignment must be 1
9964 // For scalable vectors the scalable part is currently handled
9965 // by individual targets, so we just use the known minimum size here.
9966 ISD::OutputArg
MyFlags(
9967 Flags
, Parts
[j
].getValueType().getSimpleVT(), VT
,
9968 i
< CLI
.NumFixedArgs
, i
,
9969 j
* Parts
[j
].getValueType().getStoreSize().getKnownMinSize());
9970 if (NumParts
> 1 && j
== 0)
9971 MyFlags
.Flags
.setSplit();
9973 MyFlags
.Flags
.setOrigAlign(Align(1));
9974 if (j
== NumParts
- 1)
9975 MyFlags
.Flags
.setSplitEnd();
9978 CLI
.Outs
.push_back(MyFlags
);
9979 CLI
.OutVals
.push_back(Parts
[j
]);
9982 if (NeedsRegBlock
&& Value
== NumValues
- 1)
9983 CLI
.Outs
[CLI
.Outs
.size() - 1].Flags
.setInConsecutiveRegsLast();
9987 SmallVector
<SDValue
, 4> InVals
;
9988 CLI
.Chain
= LowerCall(CLI
, InVals
);
9990 // Update CLI.InVals to use outside of this function.
9991 CLI
.InVals
= InVals
;
9993 // Verify that the target's LowerCall behaved as expected.
9994 assert(CLI
.Chain
.getNode() && CLI
.Chain
.getValueType() == MVT::Other
&&
9995 "LowerCall didn't return a valid chain!");
9996 assert((!CLI
.IsTailCall
|| InVals
.empty()) &&
9997 "LowerCall emitted a return value for a tail call!");
9998 assert((CLI
.IsTailCall
|| InVals
.size() == CLI
.Ins
.size()) &&
9999 "LowerCall didn't emit the correct number of values!");
10001 // For a tail call, the return value is merely live-out and there aren't
10002 // any nodes in the DAG representing it. Return a special value to
10003 // indicate that a tail call has been emitted and no more Instructions
10004 // should be processed in the current block.
10005 if (CLI
.IsTailCall
) {
10006 CLI
.DAG
.setRoot(CLI
.Chain
);
10007 return std::make_pair(SDValue(), SDValue());
10011 for (unsigned i
= 0, e
= CLI
.Ins
.size(); i
!= e
; ++i
) {
10012 assert(InVals
[i
].getNode() && "LowerCall emitted a null value!");
10013 assert(EVT(CLI
.Ins
[i
].VT
) == InVals
[i
].getValueType() &&
10014 "LowerCall emitted a value with the wrong type!");
10018 SmallVector
<SDValue
, 4> ReturnValues
;
10019 if (!CanLowerReturn
) {
10020 // The instruction result is the result of loading from the
10021 // hidden sret parameter.
10022 SmallVector
<EVT
, 1> PVTs
;
10023 Type
*PtrRetTy
= OrigRetTy
->getPointerTo(DL
.getAllocaAddrSpace());
10025 ComputeValueVTs(*this, DL
, PtrRetTy
, PVTs
);
10026 assert(PVTs
.size() == 1 && "Pointers should fit in one register");
10027 EVT PtrVT
= PVTs
[0];
10029 unsigned NumValues
= RetTys
.size();
10030 ReturnValues
.resize(NumValues
);
10031 SmallVector
<SDValue
, 4> Chains(NumValues
);
10033 // An aggregate return value cannot wrap around the address space, so
10034 // offsets to its parts don't wrap either.
10036 Flags
.setNoUnsignedWrap(true);
10038 MachineFunction
&MF
= CLI
.DAG
.getMachineFunction();
10039 Align HiddenSRetAlign
= MF
.getFrameInfo().getObjectAlign(DemoteStackIdx
);
10040 for (unsigned i
= 0; i
< NumValues
; ++i
) {
10041 SDValue Add
= CLI
.DAG
.getNode(ISD::ADD
, CLI
.DL
, PtrVT
, DemoteStackSlot
,
10042 CLI
.DAG
.getConstant(Offsets
[i
], CLI
.DL
,
10044 SDValue L
= CLI
.DAG
.getLoad(
10045 RetTys
[i
], CLI
.DL
, CLI
.Chain
, Add
,
10046 MachinePointerInfo::getFixedStack(CLI
.DAG
.getMachineFunction(),
10047 DemoteStackIdx
, Offsets
[i
]),
10049 ReturnValues
[i
] = L
;
10050 Chains
[i
] = L
.getValue(1);
10053 CLI
.Chain
= CLI
.DAG
.getNode(ISD::TokenFactor
, CLI
.DL
, MVT::Other
, Chains
);
10055 // Collect the legal value parts into potentially illegal values
10056 // that correspond to the original function's return values.
10057 Optional
<ISD::NodeType
> AssertOp
;
10059 AssertOp
= ISD::AssertSext
;
10060 else if (CLI
.RetZExt
)
10061 AssertOp
= ISD::AssertZext
;
10062 unsigned CurReg
= 0;
10063 for (unsigned I
= 0, E
= RetTys
.size(); I
!= E
; ++I
) {
10064 EVT VT
= RetTys
[I
];
10065 MVT RegisterVT
= getRegisterTypeForCallingConv(CLI
.RetTy
->getContext(),
10067 unsigned NumRegs
= getNumRegistersForCallingConv(CLI
.RetTy
->getContext(),
10070 ReturnValues
.push_back(getCopyFromParts(CLI
.DAG
, CLI
.DL
, &InVals
[CurReg
],
10071 NumRegs
, RegisterVT
, VT
, nullptr,
10072 CLI
.CallConv
, AssertOp
));
10076 // For a function returning void, there is no return value. We can't create
10077 // such a node, so we just return a null return value in that case. In
10078 // that case, nothing will actually look at the value.
10079 if (ReturnValues
.empty())
10080 return std::make_pair(SDValue(), CLI
.Chain
);
10083 SDValue Res
= CLI
.DAG
.getNode(ISD::MERGE_VALUES
, CLI
.DL
,
10084 CLI
.DAG
.getVTList(RetTys
), ReturnValues
);
10085 return std::make_pair(Res
, CLI
.Chain
);
10088 /// Places new result values for the node in Results (their number
10089 /// and types must exactly match those of the original return values of
10090 /// the node), or leaves Results empty, which indicates that the node is not
10091 /// to be custom lowered after all.
10092 void TargetLowering::LowerOperationWrapper(SDNode
*N
,
10093 SmallVectorImpl
<SDValue
> &Results
,
10094 SelectionDAG
&DAG
) const {
10095 SDValue Res
= LowerOperation(SDValue(N
, 0), DAG
);
10097 if (!Res
.getNode())
10100 // If the original node has one result, take the return value from
10101 // LowerOperation as is. It might not be result number 0.
10102 if (N
->getNumValues() == 1) {
10103 Results
.push_back(Res
);
10107 // If the original node has multiple results, then the return node should
10108 // have the same number of results.
10109 assert((N
->getNumValues() == Res
->getNumValues()) &&
10110 "Lowering returned the wrong number of results!");
10112 // Places new result values base on N result number.
10113 for (unsigned I
= 0, E
= N
->getNumValues(); I
!= E
; ++I
)
10114 Results
.push_back(Res
.getValue(I
));
10117 SDValue
TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
10118 llvm_unreachable("LowerOperation not implemented for this target!");
10121 void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value
*V
,
10123 ISD::NodeType ExtendType
) {
10124 SDValue Op
= getNonRegisterValue(V
);
10125 assert((Op
.getOpcode() != ISD::CopyFromReg
||
10126 cast
<RegisterSDNode
>(Op
.getOperand(1))->getReg() != Reg
) &&
10127 "Copy from a reg to the same reg!");
10128 assert(!Register::isPhysicalRegister(Reg
) && "Is a physreg");
10130 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
10131 // If this is an InlineAsm we have to match the registers required, not the
10132 // notional registers required by the type.
10134 RegsForValue
RFV(V
->getContext(), TLI
, DAG
.getDataLayout(), Reg
, V
->getType(),
10135 None
); // This is not an ABI copy.
10136 SDValue Chain
= DAG
.getEntryNode();
10138 if (ExtendType
== ISD::ANY_EXTEND
) {
10139 auto PreferredExtendIt
= FuncInfo
.PreferredExtendType
.find(V
);
10140 if (PreferredExtendIt
!= FuncInfo
.PreferredExtendType
.end())
10141 ExtendType
= PreferredExtendIt
->second
;
10143 RFV
.getCopyToRegs(Op
, DAG
, getCurSDLoc(), Chain
, nullptr, V
, ExtendType
);
10144 PendingExports
.push_back(Chain
);
10147 #include "llvm/CodeGen/SelectionDAGISel.h"
10149 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
10150 /// entry block, return true. This includes arguments used by switches, since
10151 /// the switch may expand into multiple basic blocks.
10152 static bool isOnlyUsedInEntryBlock(const Argument
*A
, bool FastISel
) {
10153 // With FastISel active, we may be splitting blocks, so force creation
10154 // of virtual registers for all non-dead arguments.
10156 return A
->use_empty();
10158 const BasicBlock
&Entry
= A
->getParent()->front();
10159 for (const User
*U
: A
->users())
10160 if (cast
<Instruction
>(U
)->getParent() != &Entry
|| isa
<SwitchInst
>(U
))
10161 return false; // Use not in entry block.
10166 using ArgCopyElisionMapTy
=
10167 DenseMap
<const Argument
*,
10168 std::pair
<const AllocaInst
*, const StoreInst
*>>;
10170 /// Scan the entry block of the function in FuncInfo for arguments that look
10171 /// like copies into a local alloca. Record any copied arguments in
10172 /// ArgCopyElisionCandidates.
10174 findArgumentCopyElisionCandidates(const DataLayout
&DL
,
10175 FunctionLoweringInfo
*FuncInfo
,
10176 ArgCopyElisionMapTy
&ArgCopyElisionCandidates
) {
10177 // Record the state of every static alloca used in the entry block. Argument
10178 // allocas are all used in the entry block, so we need approximately as many
10179 // entries as we have arguments.
10180 enum StaticAllocaInfo
{ Unknown
, Clobbered
, Elidable
};
10181 SmallDenseMap
<const AllocaInst
*, StaticAllocaInfo
, 8> StaticAllocas
;
10182 unsigned NumArgs
= FuncInfo
->Fn
->arg_size();
10183 StaticAllocas
.reserve(NumArgs
* 2);
10185 auto GetInfoIfStaticAlloca
= [&](const Value
*V
) -> StaticAllocaInfo
* {
10188 V
= V
->stripPointerCasts();
10189 const auto *AI
= dyn_cast
<AllocaInst
>(V
);
10190 if (!AI
|| !AI
->isStaticAlloca() || !FuncInfo
->StaticAllocaMap
.count(AI
))
10192 auto Iter
= StaticAllocas
.insert({AI
, Unknown
});
10193 return &Iter
.first
->second
;
10196 // Look for stores of arguments to static allocas. Look through bitcasts and
10197 // GEPs to handle type coercions, as long as the alloca is fully initialized
10198 // by the store. Any non-store use of an alloca escapes it and any subsequent
10199 // unanalyzed store might write it.
10200 // FIXME: Handle structs initialized with multiple stores.
10201 for (const Instruction
&I
: FuncInfo
->Fn
->getEntryBlock()) {
10202 // Look for stores, and handle non-store uses conservatively.
10203 const auto *SI
= dyn_cast
<StoreInst
>(&I
);
10205 // We will look through cast uses, so ignore them completely.
10208 // Ignore debug info and pseudo op intrinsics, they don't escape or store
10210 if (I
.isDebugOrPseudoInst())
10212 // This is an unknown instruction. Assume it escapes or writes to all
10213 // static alloca operands.
10214 for (const Use
&U
: I
.operands()) {
10215 if (StaticAllocaInfo
*Info
= GetInfoIfStaticAlloca(U
))
10216 *Info
= StaticAllocaInfo::Clobbered
;
10221 // If the stored value is a static alloca, mark it as escaped.
10222 if (StaticAllocaInfo
*Info
= GetInfoIfStaticAlloca(SI
->getValueOperand()))
10223 *Info
= StaticAllocaInfo::Clobbered
;
10225 // Check if the destination is a static alloca.
10226 const Value
*Dst
= SI
->getPointerOperand()->stripPointerCasts();
10227 StaticAllocaInfo
*Info
= GetInfoIfStaticAlloca(Dst
);
10230 const AllocaInst
*AI
= cast
<AllocaInst
>(Dst
);
10232 // Skip allocas that have been initialized or clobbered.
10233 if (*Info
!= StaticAllocaInfo::Unknown
)
10236 // Check if the stored value is an argument, and that this store fully
10237 // initializes the alloca.
10238 // If the argument type has padding bits we can't directly forward a pointer
10239 // as the upper bits may contain garbage.
10240 // Don't elide copies from the same argument twice.
10241 const Value
*Val
= SI
->getValueOperand()->stripPointerCasts();
10242 const auto *Arg
= dyn_cast
<Argument
>(Val
);
10243 if (!Arg
|| Arg
->hasPassPointeeByValueCopyAttr() ||
10244 Arg
->getType()->isEmptyTy() ||
10245 DL
.getTypeStoreSize(Arg
->getType()) !=
10246 DL
.getTypeAllocSize(AI
->getAllocatedType()) ||
10247 !DL
.typeSizeEqualsStoreSize(Arg
->getType()) ||
10248 ArgCopyElisionCandidates
.count(Arg
)) {
10249 *Info
= StaticAllocaInfo::Clobbered
;
10253 LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
10256 // Mark this alloca and store for argument copy elision.
10257 *Info
= StaticAllocaInfo::Elidable
;
10258 ArgCopyElisionCandidates
.insert({Arg
, {AI
, SI
}});
10260 // Stop scanning if we've seen all arguments. This will happen early in -O0
10261 // builds, which is useful, because -O0 builds have large entry blocks and
10263 if (ArgCopyElisionCandidates
.size() == NumArgs
)
10268 /// Try to elide argument copies from memory into a local alloca. Succeeds if
10269 /// ArgVal is a load from a suitable fixed stack object.
10270 static void tryToElideArgumentCopy(
10271 FunctionLoweringInfo
&FuncInfo
, SmallVectorImpl
<SDValue
> &Chains
,
10272 DenseMap
<int, int> &ArgCopyElisionFrameIndexMap
,
10273 SmallPtrSetImpl
<const Instruction
*> &ElidedArgCopyInstrs
,
10274 ArgCopyElisionMapTy
&ArgCopyElisionCandidates
, const Argument
&Arg
,
10275 SDValue ArgVal
, bool &ArgHasUses
) {
10276 // Check if this is a load from a fixed stack object.
10277 auto *LNode
= dyn_cast
<LoadSDNode
>(ArgVal
);
10280 auto *FINode
= dyn_cast
<FrameIndexSDNode
>(LNode
->getBasePtr().getNode());
10284 // Check that the fixed stack object is the right size and alignment.
10285 // Look at the alignment that the user wrote on the alloca instead of looking
10286 // at the stack object.
10287 auto ArgCopyIter
= ArgCopyElisionCandidates
.find(&Arg
);
10288 assert(ArgCopyIter
!= ArgCopyElisionCandidates
.end());
10289 const AllocaInst
*AI
= ArgCopyIter
->second
.first
;
10290 int FixedIndex
= FINode
->getIndex();
10291 int &AllocaIndex
= FuncInfo
.StaticAllocaMap
[AI
];
10292 int OldIndex
= AllocaIndex
;
10293 MachineFrameInfo
&MFI
= FuncInfo
.MF
->getFrameInfo();
10294 if (MFI
.getObjectSize(FixedIndex
) != MFI
.getObjectSize(OldIndex
)) {
10296 dbgs() << " argument copy elision failed due to bad fixed stack "
10300 Align RequiredAlignment
= AI
->getAlign();
10301 if (MFI
.getObjectAlign(FixedIndex
) < RequiredAlignment
) {
10302 LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
10303 "greater than stack argument alignment ("
10304 << DebugStr(RequiredAlignment
) << " vs "
10305 << DebugStr(MFI
.getObjectAlign(FixedIndex
)) << ")\n");
10309 // Perform the elision. Delete the old stack object and replace its only use
10310 // in the variable info map. Mark the stack object as mutable.
10312 dbgs() << "Eliding argument copy from " << Arg
<< " to " << *AI
<< '\n'
10313 << " Replacing frame index " << OldIndex
<< " with " << FixedIndex
10316 MFI
.RemoveStackObject(OldIndex
);
10317 MFI
.setIsImmutableObjectIndex(FixedIndex
, false);
10318 AllocaIndex
= FixedIndex
;
10319 ArgCopyElisionFrameIndexMap
.insert({OldIndex
, FixedIndex
});
10320 Chains
.push_back(ArgVal
.getValue(1));
10322 // Avoid emitting code for the store implementing the copy.
10323 const StoreInst
*SI
= ArgCopyIter
->second
.second
;
10324 ElidedArgCopyInstrs
.insert(SI
);
10326 // Check for uses of the argument again so that we can avoid exporting ArgVal
10327 // if it is't used by anything other than the store.
10328 for (const Value
*U
: Arg
.users()) {
10336 void SelectionDAGISel::LowerArguments(const Function
&F
) {
10337 SelectionDAG
&DAG
= SDB
->DAG
;
10338 SDLoc dl
= SDB
->getCurSDLoc();
10339 const DataLayout
&DL
= DAG
.getDataLayout();
10340 SmallVector
<ISD::InputArg
, 16> Ins
;
10342 // In Naked functions we aren't going to save any registers.
10343 if (F
.hasFnAttribute(Attribute::Naked
))
10346 if (!FuncInfo
->CanLowerReturn
) {
10347 // Put in an sret pointer parameter before all the other parameters.
10348 SmallVector
<EVT
, 1> ValueVTs
;
10349 ComputeValueVTs(*TLI
, DAG
.getDataLayout(),
10350 F
.getReturnType()->getPointerTo(
10351 DAG
.getDataLayout().getAllocaAddrSpace()),
10354 // NOTE: Assuming that a pointer will never break down to more than one VT
10355 // or one register.
10356 ISD::ArgFlagsTy Flags
;
10358 MVT RegisterVT
= TLI
->getRegisterType(*DAG
.getContext(), ValueVTs
[0]);
10359 ISD::InputArg
RetArg(Flags
, RegisterVT
, ValueVTs
[0], true,
10360 ISD::InputArg::NoArgIndex
, 0);
10361 Ins
.push_back(RetArg
);
10364 // Look for stores of arguments to static allocas. Mark such arguments with a
10365 // flag to ask the target to give us the memory location of that argument if
10367 ArgCopyElisionMapTy ArgCopyElisionCandidates
;
10368 findArgumentCopyElisionCandidates(DL
, FuncInfo
.get(),
10369 ArgCopyElisionCandidates
);
10371 // Set up the incoming argument description vector.
10372 for (const Argument
&Arg
: F
.args()) {
10373 unsigned ArgNo
= Arg
.getArgNo();
10374 SmallVector
<EVT
, 4> ValueVTs
;
10375 ComputeValueVTs(*TLI
, DAG
.getDataLayout(), Arg
.getType(), ValueVTs
);
10376 bool isArgValueUsed
= !Arg
.use_empty();
10377 unsigned PartBase
= 0;
10378 Type
*FinalType
= Arg
.getType();
10379 if (Arg
.hasAttribute(Attribute::ByVal
))
10380 FinalType
= Arg
.getParamByValType();
10381 bool NeedsRegBlock
= TLI
->functionArgumentNeedsConsecutiveRegisters(
10382 FinalType
, F
.getCallingConv(), F
.isVarArg(), DL
);
10383 for (unsigned Value
= 0, NumValues
= ValueVTs
.size();
10384 Value
!= NumValues
; ++Value
) {
10385 EVT VT
= ValueVTs
[Value
];
10386 Type
*ArgTy
= VT
.getTypeForEVT(*DAG
.getContext());
10387 ISD::ArgFlagsTy Flags
;
10390 if (Arg
.getType()->isPointerTy()) {
10391 Flags
.setPointer();
10392 Flags
.setPointerAddrSpace(
10393 cast
<PointerType
>(Arg
.getType())->getAddressSpace());
10395 if (Arg
.hasAttribute(Attribute::ZExt
))
10397 if (Arg
.hasAttribute(Attribute::SExt
))
10399 if (Arg
.hasAttribute(Attribute::InReg
)) {
10400 // If we are using vectorcall calling convention, a structure that is
10401 // passed InReg - is surely an HVA
10402 if (F
.getCallingConv() == CallingConv::X86_VectorCall
&&
10403 isa
<StructType
>(Arg
.getType())) {
10404 // The first value of a structure is marked
10406 Flags
.setHvaStart();
10412 if (Arg
.hasAttribute(Attribute::StructRet
))
10414 if (Arg
.hasAttribute(Attribute::SwiftSelf
))
10415 Flags
.setSwiftSelf();
10416 if (Arg
.hasAttribute(Attribute::SwiftAsync
))
10417 Flags
.setSwiftAsync();
10418 if (Arg
.hasAttribute(Attribute::SwiftError
))
10419 Flags
.setSwiftError();
10420 if (Arg
.hasAttribute(Attribute::ByVal
))
10422 if (Arg
.hasAttribute(Attribute::ByRef
))
10424 if (Arg
.hasAttribute(Attribute::InAlloca
)) {
10425 Flags
.setInAlloca();
10426 // Set the byval flag for CCAssignFn callbacks that don't know about
10427 // inalloca. This way we can know how many bytes we should've allocated
10428 // and how many bytes a callee cleanup function will pop. If we port
10429 // inalloca to more targets, we'll have to add custom inalloca handling
10430 // in the various CC lowering callbacks.
10433 if (Arg
.hasAttribute(Attribute::Preallocated
)) {
10434 Flags
.setPreallocated();
10435 // Set the byval flag for CCAssignFn callbacks that don't know about
10436 // preallocated. This way we can know how many bytes we should've
10437 // allocated and how many bytes a callee cleanup function will pop. If
10438 // we port preallocated to more targets, we'll have to add custom
10439 // preallocated handling in the various CC lowering callbacks.
10443 // Certain targets (such as MIPS), may have a different ABI alignment
10444 // for a type depending on the context. Give the target a chance to
10445 // specify the alignment it wants.
10446 const Align
OriginalAlignment(
10447 TLI
->getABIAlignmentForCallingConv(ArgTy
, DL
));
10448 Flags
.setOrigAlign(OriginalAlignment
);
10451 Type
*ArgMemTy
= nullptr;
10452 if (Flags
.isByVal() || Flags
.isInAlloca() || Flags
.isPreallocated() ||
10455 ArgMemTy
= Arg
.getPointeeInMemoryValueType();
10457 uint64_t MemSize
= DL
.getTypeAllocSize(ArgMemTy
);
10459 // For in-memory arguments, size and alignment should be passed from FE.
10460 // BE will guess if this info is not there but there are cases it cannot
10462 if (auto ParamAlign
= Arg
.getParamStackAlign())
10463 MemAlign
= *ParamAlign
;
10464 else if ((ParamAlign
= Arg
.getParamAlign()))
10465 MemAlign
= *ParamAlign
;
10467 MemAlign
= Align(TLI
->getByValTypeAlignment(ArgMemTy
, DL
));
10468 if (Flags
.isByRef())
10469 Flags
.setByRefSize(MemSize
);
10471 Flags
.setByValSize(MemSize
);
10472 } else if (auto ParamAlign
= Arg
.getParamStackAlign()) {
10473 MemAlign
= *ParamAlign
;
10475 MemAlign
= OriginalAlignment
;
10477 Flags
.setMemAlign(MemAlign
);
10479 if (Arg
.hasAttribute(Attribute::Nest
))
10482 Flags
.setInConsecutiveRegs();
10483 if (ArgCopyElisionCandidates
.count(&Arg
))
10484 Flags
.setCopyElisionCandidate();
10485 if (Arg
.hasAttribute(Attribute::Returned
))
10486 Flags
.setReturned();
10488 MVT RegisterVT
= TLI
->getRegisterTypeForCallingConv(
10489 *CurDAG
->getContext(), F
.getCallingConv(), VT
);
10490 unsigned NumRegs
= TLI
->getNumRegistersForCallingConv(
10491 *CurDAG
->getContext(), F
.getCallingConv(), VT
);
10492 for (unsigned i
= 0; i
!= NumRegs
; ++i
) {
10493 // For scalable vectors, use the minimum size; individual targets
10494 // are responsible for handling scalable vector arguments and
10496 ISD::InputArg
MyFlags(Flags
, RegisterVT
, VT
, isArgValueUsed
,
10497 ArgNo
, PartBase
+i
*RegisterVT
.getStoreSize().getKnownMinSize());
10498 if (NumRegs
> 1 && i
== 0)
10499 MyFlags
.Flags
.setSplit();
10500 // if it isn't first piece, alignment must be 1
10502 MyFlags
.Flags
.setOrigAlign(Align(1));
10503 if (i
== NumRegs
- 1)
10504 MyFlags
.Flags
.setSplitEnd();
10506 Ins
.push_back(MyFlags
);
10508 if (NeedsRegBlock
&& Value
== NumValues
- 1)
10509 Ins
[Ins
.size() - 1].Flags
.setInConsecutiveRegsLast();
10510 PartBase
+= VT
.getStoreSize().getKnownMinSize();
10514 // Call the target to set up the argument values.
10515 SmallVector
<SDValue
, 8> InVals
;
10516 SDValue NewRoot
= TLI
->LowerFormalArguments(
10517 DAG
.getRoot(), F
.getCallingConv(), F
.isVarArg(), Ins
, dl
, DAG
, InVals
);
10519 // Verify that the target's LowerFormalArguments behaved as expected.
10520 assert(NewRoot
.getNode() && NewRoot
.getValueType() == MVT::Other
&&
10521 "LowerFormalArguments didn't return a valid chain!");
10522 assert(InVals
.size() == Ins
.size() &&
10523 "LowerFormalArguments didn't emit the correct number of values!");
10525 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
10526 assert(InVals
[i
].getNode() &&
10527 "LowerFormalArguments emitted a null value!");
10528 assert(EVT(Ins
[i
].VT
) == InVals
[i
].getValueType() &&
10529 "LowerFormalArguments emitted a value with the wrong type!");
10533 // Update the DAG with the new chain value resulting from argument lowering.
10534 DAG
.setRoot(NewRoot
);
10536 // Set up the argument values.
10538 if (!FuncInfo
->CanLowerReturn
) {
10539 // Create a virtual register for the sret pointer, and put in a copy
10540 // from the sret argument into it.
10541 SmallVector
<EVT
, 1> ValueVTs
;
10542 ComputeValueVTs(*TLI
, DAG
.getDataLayout(),
10543 F
.getReturnType()->getPointerTo(
10544 DAG
.getDataLayout().getAllocaAddrSpace()),
10546 MVT VT
= ValueVTs
[0].getSimpleVT();
10547 MVT RegVT
= TLI
->getRegisterType(*CurDAG
->getContext(), VT
);
10548 Optional
<ISD::NodeType
> AssertOp
= None
;
10549 SDValue ArgValue
= getCopyFromParts(DAG
, dl
, &InVals
[0], 1, RegVT
, VT
,
10550 nullptr, F
.getCallingConv(), AssertOp
);
10552 MachineFunction
& MF
= SDB
->DAG
.getMachineFunction();
10553 MachineRegisterInfo
& RegInfo
= MF
.getRegInfo();
10555 RegInfo
.createVirtualRegister(TLI
->getRegClassFor(RegVT
));
10556 FuncInfo
->DemoteRegister
= SRetReg
;
10558 SDB
->DAG
.getCopyToReg(NewRoot
, SDB
->getCurSDLoc(), SRetReg
, ArgValue
);
10559 DAG
.setRoot(NewRoot
);
10561 // i indexes lowered arguments. Bump it past the hidden sret argument.
10565 SmallVector
<SDValue
, 4> Chains
;
10566 DenseMap
<int, int> ArgCopyElisionFrameIndexMap
;
10567 for (const Argument
&Arg
: F
.args()) {
10568 SmallVector
<SDValue
, 4> ArgValues
;
10569 SmallVector
<EVT
, 4> ValueVTs
;
10570 ComputeValueVTs(*TLI
, DAG
.getDataLayout(), Arg
.getType(), ValueVTs
);
10571 unsigned NumValues
= ValueVTs
.size();
10572 if (NumValues
== 0)
10575 bool ArgHasUses
= !Arg
.use_empty();
10577 // Elide the copying store if the target loaded this argument from a
10578 // suitable fixed stack object.
10579 if (Ins
[i
].Flags
.isCopyElisionCandidate()) {
10580 tryToElideArgumentCopy(*FuncInfo
, Chains
, ArgCopyElisionFrameIndexMap
,
10581 ElidedArgCopyInstrs
, ArgCopyElisionCandidates
, Arg
,
10582 InVals
[i
], ArgHasUses
);
10585 // If this argument is unused then remember its value. It is used to generate
10586 // debugging information.
10587 bool isSwiftErrorArg
=
10588 TLI
->supportSwiftError() &&
10589 Arg
.hasAttribute(Attribute::SwiftError
);
10590 if (!ArgHasUses
&& !isSwiftErrorArg
) {
10591 SDB
->setUnusedArgValue(&Arg
, InVals
[i
]);
10593 // Also remember any frame index for use in FastISel.
10594 if (FrameIndexSDNode
*FI
=
10595 dyn_cast
<FrameIndexSDNode
>(InVals
[i
].getNode()))
10596 FuncInfo
->setArgumentFrameIndex(&Arg
, FI
->getIndex());
10599 for (unsigned Val
= 0; Val
!= NumValues
; ++Val
) {
10600 EVT VT
= ValueVTs
[Val
];
10601 MVT PartVT
= TLI
->getRegisterTypeForCallingConv(*CurDAG
->getContext(),
10602 F
.getCallingConv(), VT
);
10603 unsigned NumParts
= TLI
->getNumRegistersForCallingConv(
10604 *CurDAG
->getContext(), F
.getCallingConv(), VT
);
10606 // Even an apparent 'unused' swifterror argument needs to be returned. So
10607 // we do generate a copy for it that can be used on return from the
10609 if (ArgHasUses
|| isSwiftErrorArg
) {
10610 Optional
<ISD::NodeType
> AssertOp
;
10611 if (Arg
.hasAttribute(Attribute::SExt
))
10612 AssertOp
= ISD::AssertSext
;
10613 else if (Arg
.hasAttribute(Attribute::ZExt
))
10614 AssertOp
= ISD::AssertZext
;
10616 ArgValues
.push_back(getCopyFromParts(DAG
, dl
, &InVals
[i
], NumParts
,
10617 PartVT
, VT
, nullptr,
10618 F
.getCallingConv(), AssertOp
));
10624 // We don't need to do anything else for unused arguments.
10625 if (ArgValues
.empty())
10628 // Note down frame index.
10629 if (FrameIndexSDNode
*FI
=
10630 dyn_cast
<FrameIndexSDNode
>(ArgValues
[0].getNode()))
10631 FuncInfo
->setArgumentFrameIndex(&Arg
, FI
->getIndex());
10633 SDValue Res
= DAG
.getMergeValues(makeArrayRef(ArgValues
.data(), NumValues
),
10634 SDB
->getCurSDLoc());
10636 SDB
->setValue(&Arg
, Res
);
10637 if (!TM
.Options
.EnableFastISel
&& Res
.getOpcode() == ISD::BUILD_PAIR
) {
10638 // We want to associate the argument with the frame index, among
10639 // involved operands, that correspond to the lowest address. The
10640 // getCopyFromParts function, called earlier, is swapping the order of
10641 // the operands to BUILD_PAIR depending on endianness. The result of
10642 // that swapping is that the least significant bits of the argument will
10643 // be in the first operand of the BUILD_PAIR node, and the most
10644 // significant bits will be in the second operand.
10645 unsigned LowAddressOp
= DAG
.getDataLayout().isBigEndian() ? 1 : 0;
10646 if (LoadSDNode
*LNode
=
10647 dyn_cast
<LoadSDNode
>(Res
.getOperand(LowAddressOp
).getNode()))
10648 if (FrameIndexSDNode
*FI
=
10649 dyn_cast
<FrameIndexSDNode
>(LNode
->getBasePtr().getNode()))
10650 FuncInfo
->setArgumentFrameIndex(&Arg
, FI
->getIndex());
10653 // Analyses past this point are naive and don't expect an assertion.
10654 if (Res
.getOpcode() == ISD::AssertZext
)
10655 Res
= Res
.getOperand(0);
10657 // Update the SwiftErrorVRegDefMap.
10658 if (Res
.getOpcode() == ISD::CopyFromReg
&& isSwiftErrorArg
) {
10659 unsigned Reg
= cast
<RegisterSDNode
>(Res
.getOperand(1))->getReg();
10660 if (Register::isVirtualRegister(Reg
))
10661 SwiftError
->setCurrentVReg(FuncInfo
->MBB
, SwiftError
->getFunctionArg(),
10665 // If this argument is live outside of the entry block, insert a copy from
10666 // wherever we got it to the vreg that other BB's will reference it as.
10667 if (Res
.getOpcode() == ISD::CopyFromReg
) {
10668 // If we can, though, try to skip creating an unnecessary vreg.
10669 // FIXME: This isn't very clean... it would be nice to make this more
10671 unsigned Reg
= cast
<RegisterSDNode
>(Res
.getOperand(1))->getReg();
10672 if (Register::isVirtualRegister(Reg
)) {
10673 FuncInfo
->ValueMap
[&Arg
] = Reg
;
10677 if (!isOnlyUsedInEntryBlock(&Arg
, TM
.Options
.EnableFastISel
)) {
10678 FuncInfo
->InitializeRegForValue(&Arg
);
10679 SDB
->CopyToExportRegsIfNeeded(&Arg
);
10683 if (!Chains
.empty()) {
10684 Chains
.push_back(NewRoot
);
10685 NewRoot
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Chains
);
10688 DAG
.setRoot(NewRoot
);
10690 assert(i
== InVals
.size() && "Argument register count mismatch!");
10692 // If any argument copy elisions occurred and we have debug info, update the
10693 // stale frame indices used in the dbg.declare variable info table.
10694 MachineFunction::VariableDbgInfoMapTy
&DbgDeclareInfo
= MF
->getVariableDbgInfo();
10695 if (!DbgDeclareInfo
.empty() && !ArgCopyElisionFrameIndexMap
.empty()) {
10696 for (MachineFunction::VariableDbgInfo
&VI
: DbgDeclareInfo
) {
10697 auto I
= ArgCopyElisionFrameIndexMap
.find(VI
.Slot
);
10698 if (I
!= ArgCopyElisionFrameIndexMap
.end())
10699 VI
.Slot
= I
->second
;
10703 // Finally, if the target has anything special to do, allow it to do so.
10704 emitFunctionEntryCode();
10707 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
10708 /// ensure constants are generated when needed. Remember the virtual registers
10709 /// that need to be added to the Machine PHI nodes as input. We cannot just
10710 /// directly add them, because expansion might result in multiple MBB's for one
10711 /// BB. As such, the start of the BB might correspond to a different MBB than
10714 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock
*LLVMBB
) {
10715 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
10716 const Instruction
*TI
= LLVMBB
->getTerminator();
10718 SmallPtrSet
<MachineBasicBlock
*, 4> SuccsHandled
;
10720 // Check PHI nodes in successors that expect a value to be available from this
10722 for (unsigned succ
= 0, e
= TI
->getNumSuccessors(); succ
!= e
; ++succ
) {
10723 const BasicBlock
*SuccBB
= TI
->getSuccessor(succ
);
10724 if (!isa
<PHINode
>(SuccBB
->begin())) continue;
10725 MachineBasicBlock
*SuccMBB
= FuncInfo
.MBBMap
[SuccBB
];
10727 // If this terminator has multiple identical successors (common for
10728 // switches), only handle each succ once.
10729 if (!SuccsHandled
.insert(SuccMBB
).second
)
10732 MachineBasicBlock::iterator MBBI
= SuccMBB
->begin();
10734 // At this point we know that there is a 1-1 correspondence between LLVM PHI
10735 // nodes and Machine PHI nodes, but the incoming operands have not been
10737 for (const PHINode
&PN
: SuccBB
->phis()) {
10738 // Ignore dead phi's.
10739 if (PN
.use_empty())
10742 // Skip empty types
10743 if (PN
.getType()->isEmptyTy())
10747 const Value
*PHIOp
= PN
.getIncomingValueForBlock(LLVMBB
);
10749 if (const Constant
*C
= dyn_cast
<Constant
>(PHIOp
)) {
10750 unsigned &RegOut
= ConstantsOut
[C
];
10752 RegOut
= FuncInfo
.CreateRegs(C
);
10753 // We need to zero/sign extend ConstantInt phi operands to match
10754 // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
10755 ISD::NodeType ExtendType
= ISD::ANY_EXTEND
;
10756 if (auto *CI
= dyn_cast
<ConstantInt
>(C
))
10757 ExtendType
= TLI
.signExtendConstant(CI
) ? ISD::SIGN_EXTEND
10758 : ISD::ZERO_EXTEND
;
10759 CopyValueToVirtualRegister(C
, RegOut
, ExtendType
);
10763 DenseMap
<const Value
*, Register
>::iterator I
=
10764 FuncInfo
.ValueMap
.find(PHIOp
);
10765 if (I
!= FuncInfo
.ValueMap
.end())
10768 assert(isa
<AllocaInst
>(PHIOp
) &&
10769 FuncInfo
.StaticAllocaMap
.count(cast
<AllocaInst
>(PHIOp
)) &&
10770 "Didn't codegen value into a register!??");
10771 Reg
= FuncInfo
.CreateRegs(PHIOp
);
10772 CopyValueToVirtualRegister(PHIOp
, Reg
);
10776 // Remember that this register needs to added to the machine PHI node as
10777 // the input for this MBB.
10778 SmallVector
<EVT
, 4> ValueVTs
;
10779 ComputeValueVTs(TLI
, DAG
.getDataLayout(), PN
.getType(), ValueVTs
);
10780 for (unsigned vti
= 0, vte
= ValueVTs
.size(); vti
!= vte
; ++vti
) {
10781 EVT VT
= ValueVTs
[vti
];
10782 unsigned NumRegisters
= TLI
.getNumRegisters(*DAG
.getContext(), VT
);
10783 for (unsigned i
= 0, e
= NumRegisters
; i
!= e
; ++i
)
10784 FuncInfo
.PHINodesToUpdate
.push_back(
10785 std::make_pair(&*MBBI
++, Reg
+ i
));
10786 Reg
+= NumRegisters
;
10791 ConstantsOut
.clear();
10794 MachineBasicBlock
*SelectionDAGBuilder::NextBlock(MachineBasicBlock
*MBB
) {
10795 MachineFunction::iterator
I(MBB
);
10796 if (++I
== FuncInfo
.MF
->end())
10801 /// During lowering new call nodes can be created (such as memset, etc.).
10802 /// Those will become new roots of the current DAG, but complications arise
10803 /// when they are tail calls. In such cases, the call lowering will update
10804 /// the root, but the builder still needs to know that a tail call has been
10805 /// lowered in order to avoid generating an additional return.
10806 void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC
) {
10807 // If the node is null, we do have a tail call.
10808 if (MaybeTC
.getNode() != nullptr)
10809 DAG
.setRoot(MaybeTC
);
10811 HasTailCall
= true;
10814 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W
, Value
*Cond
,
10815 MachineBasicBlock
*SwitchMBB
,
10816 MachineBasicBlock
*DefaultMBB
) {
10817 MachineFunction
*CurMF
= FuncInfo
.MF
;
10818 MachineBasicBlock
*NextMBB
= nullptr;
10819 MachineFunction::iterator
BBI(W
.MBB
);
10820 if (++BBI
!= FuncInfo
.MF
->end())
10823 unsigned Size
= W
.LastCluster
- W
.FirstCluster
+ 1;
10825 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
10827 if (Size
== 2 && W
.MBB
== SwitchMBB
) {
10828 // If any two of the cases has the same destination, and if one value
10829 // is the same as the other, but has one bit unset that the other has set,
10830 // use bit manipulation to do two compares at once. For example:
10831 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
10832 // TODO: This could be extended to merge any 2 cases in switches with 3
10834 // TODO: Handle cases where W.CaseBB != SwitchBB.
10835 CaseCluster
&Small
= *W
.FirstCluster
;
10836 CaseCluster
&Big
= *W
.LastCluster
;
10838 if (Small
.Low
== Small
.High
&& Big
.Low
== Big
.High
&&
10839 Small
.MBB
== Big
.MBB
) {
10840 const APInt
&SmallValue
= Small
.Low
->getValue();
10841 const APInt
&BigValue
= Big
.Low
->getValue();
10843 // Check that there is only one bit different.
10844 APInt CommonBit
= BigValue
^ SmallValue
;
10845 if (CommonBit
.isPowerOf2()) {
10846 SDValue CondLHS
= getValue(Cond
);
10847 EVT VT
= CondLHS
.getValueType();
10848 SDLoc DL
= getCurSDLoc();
10850 SDValue Or
= DAG
.getNode(ISD::OR
, DL
, VT
, CondLHS
,
10851 DAG
.getConstant(CommonBit
, DL
, VT
));
10852 SDValue Cond
= DAG
.getSetCC(
10853 DL
, MVT::i1
, Or
, DAG
.getConstant(BigValue
| SmallValue
, DL
, VT
),
10856 // Update successor info.
10857 // Both Small and Big will jump to Small.BB, so we sum up the
10859 addSuccessorWithProb(SwitchMBB
, Small
.MBB
, Small
.Prob
+ Big
.Prob
);
10861 addSuccessorWithProb(
10862 SwitchMBB
, DefaultMBB
,
10863 // The default destination is the first successor in IR.
10864 BPI
->getEdgeProbability(SwitchMBB
->getBasicBlock(), (unsigned)0));
10866 addSuccessorWithProb(SwitchMBB
, DefaultMBB
);
10868 // Insert the true branch.
10870 DAG
.getNode(ISD::BRCOND
, DL
, MVT::Other
, getControlRoot(), Cond
,
10871 DAG
.getBasicBlock(Small
.MBB
));
10872 // Insert the false branch.
10873 BrCond
= DAG
.getNode(ISD::BR
, DL
, MVT::Other
, BrCond
,
10874 DAG
.getBasicBlock(DefaultMBB
));
10876 DAG
.setRoot(BrCond
);
10882 if (TM
.getOptLevel() != CodeGenOpt::None
) {
10883 // Here, we order cases by probability so the most likely case will be
10884 // checked first. However, two clusters can have the same probability in
10885 // which case their relative ordering is non-deterministic. So we use Low
10886 // as a tie-breaker as clusters are guaranteed to never overlap.
10887 llvm::sort(W
.FirstCluster
, W
.LastCluster
+ 1,
10888 [](const CaseCluster
&a
, const CaseCluster
&b
) {
10889 return a
.Prob
!= b
.Prob
?
10891 a
.Low
->getValue().slt(b
.Low
->getValue());
10894 // Rearrange the case blocks so that the last one falls through if possible
10895 // without changing the order of probabilities.
10896 for (CaseClusterIt I
= W
.LastCluster
; I
> W
.FirstCluster
; ) {
10898 if (I
->Prob
> W
.LastCluster
->Prob
)
10900 if (I
->Kind
== CC_Range
&& I
->MBB
== NextMBB
) {
10901 std::swap(*I
, *W
.LastCluster
);
10907 // Compute total probability.
10908 BranchProbability DefaultProb
= W
.DefaultProb
;
10909 BranchProbability UnhandledProbs
= DefaultProb
;
10910 for (CaseClusterIt I
= W
.FirstCluster
; I
<= W
.LastCluster
; ++I
)
10911 UnhandledProbs
+= I
->Prob
;
10913 MachineBasicBlock
*CurMBB
= W
.MBB
;
10914 for (CaseClusterIt I
= W
.FirstCluster
, E
= W
.LastCluster
; I
<= E
; ++I
) {
10915 bool FallthroughUnreachable
= false;
10916 MachineBasicBlock
*Fallthrough
;
10917 if (I
== W
.LastCluster
) {
10918 // For the last cluster, fall through to the default destination.
10919 Fallthrough
= DefaultMBB
;
10920 FallthroughUnreachable
= isa
<UnreachableInst
>(
10921 DefaultMBB
->getBasicBlock()->getFirstNonPHIOrDbg());
10923 Fallthrough
= CurMF
->CreateMachineBasicBlock(CurMBB
->getBasicBlock());
10924 CurMF
->insert(BBI
, Fallthrough
);
10925 // Put Cond in a virtual register to make it available from the new blocks.
10926 ExportFromCurrentBlock(Cond
);
10928 UnhandledProbs
-= I
->Prob
;
10931 case CC_JumpTable
: {
10932 // FIXME: Optimize away range check based on pivot comparisons.
10933 JumpTableHeader
*JTH
= &SL
->JTCases
[I
->JTCasesIndex
].first
;
10934 SwitchCG::JumpTable
*JT
= &SL
->JTCases
[I
->JTCasesIndex
].second
;
10936 // The jump block hasn't been inserted yet; insert it here.
10937 MachineBasicBlock
*JumpMBB
= JT
->MBB
;
10938 CurMF
->insert(BBI
, JumpMBB
);
10940 auto JumpProb
= I
->Prob
;
10941 auto FallthroughProb
= UnhandledProbs
;
10943 // If the default statement is a target of the jump table, we evenly
10944 // distribute the default probability to successors of CurMBB. Also
10945 // update the probability on the edge from JumpMBB to Fallthrough.
10946 for (MachineBasicBlock::succ_iterator SI
= JumpMBB
->succ_begin(),
10947 SE
= JumpMBB
->succ_end();
10949 if (*SI
== DefaultMBB
) {
10950 JumpProb
+= DefaultProb
/ 2;
10951 FallthroughProb
-= DefaultProb
/ 2;
10952 JumpMBB
->setSuccProbability(SI
, DefaultProb
/ 2);
10953 JumpMBB
->normalizeSuccProbs();
10958 if (FallthroughUnreachable
)
10959 JTH
->FallthroughUnreachable
= true;
10961 if (!JTH
->FallthroughUnreachable
)
10962 addSuccessorWithProb(CurMBB
, Fallthrough
, FallthroughProb
);
10963 addSuccessorWithProb(CurMBB
, JumpMBB
, JumpProb
);
10964 CurMBB
->normalizeSuccProbs();
10966 // The jump table header will be inserted in our current block, do the
10967 // range check, and fall through to our fallthrough block.
10968 JTH
->HeaderBB
= CurMBB
;
10969 JT
->Default
= Fallthrough
; // FIXME: Move Default to JumpTableHeader.
10971 // If we're in the right place, emit the jump table header right now.
10972 if (CurMBB
== SwitchMBB
) {
10973 visitJumpTableHeader(*JT
, *JTH
, SwitchMBB
);
10974 JTH
->Emitted
= true;
10978 case CC_BitTests
: {
10979 // FIXME: Optimize away range check based on pivot comparisons.
10980 BitTestBlock
*BTB
= &SL
->BitTestCases
[I
->BTCasesIndex
];
10982 // The bit test blocks haven't been inserted yet; insert them here.
10983 for (BitTestCase
&BTC
: BTB
->Cases
)
10984 CurMF
->insert(BBI
, BTC
.ThisBB
);
10986 // Fill in fields of the BitTestBlock.
10987 BTB
->Parent
= CurMBB
;
10988 BTB
->Default
= Fallthrough
;
10990 BTB
->DefaultProb
= UnhandledProbs
;
10991 // If the cases in bit test don't form a contiguous range, we evenly
10992 // distribute the probability on the edge to Fallthrough to two
10993 // successors of CurMBB.
10994 if (!BTB
->ContiguousRange
) {
10995 BTB
->Prob
+= DefaultProb
/ 2;
10996 BTB
->DefaultProb
-= DefaultProb
/ 2;
10999 if (FallthroughUnreachable
)
11000 BTB
->FallthroughUnreachable
= true;
11002 // If we're in the right place, emit the bit test header right now.
11003 if (CurMBB
== SwitchMBB
) {
11004 visitBitTestHeader(*BTB
, SwitchMBB
);
11005 BTB
->Emitted
= true;
11010 const Value
*RHS
, *LHS
, *MHS
;
11012 if (I
->Low
== I
->High
) {
11013 // Check Cond == I->Low.
11019 // Check I->Low <= Cond <= I->High.
11026 // If Fallthrough is unreachable, fold away the comparison.
11027 if (FallthroughUnreachable
)
11030 // The false probability is the sum of all unhandled cases.
11031 CaseBlock
CB(CC
, LHS
, RHS
, MHS
, I
->MBB
, Fallthrough
, CurMBB
,
11032 getCurSDLoc(), I
->Prob
, UnhandledProbs
);
11034 if (CurMBB
== SwitchMBB
)
11035 visitSwitchCase(CB
, SwitchMBB
);
11037 SL
->SwitchCases
.push_back(CB
);
11042 CurMBB
= Fallthrough
;
11046 unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster
&CC
,
11047 CaseClusterIt First
,
11048 CaseClusterIt Last
) {
11049 return std::count_if(First
, Last
+ 1, [&](const CaseCluster
&X
) {
11050 if (X
.Prob
!= CC
.Prob
)
11051 return X
.Prob
> CC
.Prob
;
11053 // Ties are broken by comparing the case value.
11054 return X
.Low
->getValue().slt(CC
.Low
->getValue());
11058 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList
&WorkList
,
11059 const SwitchWorkListItem
&W
,
11061 MachineBasicBlock
*SwitchMBB
) {
11062 assert(W
.FirstCluster
->Low
->getValue().slt(W
.LastCluster
->Low
->getValue()) &&
11063 "Clusters not sorted?");
11065 assert(W
.LastCluster
- W
.FirstCluster
+ 1 >= 2 && "Too small to split!");
11067 // Balance the tree based on branch probabilities to create a near-optimal (in
11068 // terms of search time given key frequency) binary search tree. See e.g. Kurt
11069 // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
11070 CaseClusterIt LastLeft
= W
.FirstCluster
;
11071 CaseClusterIt FirstRight
= W
.LastCluster
;
11072 auto LeftProb
= LastLeft
->Prob
+ W
.DefaultProb
/ 2;
11073 auto RightProb
= FirstRight
->Prob
+ W
.DefaultProb
/ 2;
11075 // Move LastLeft and FirstRight towards each other from opposite directions to
11076 // find a partitioning of the clusters which balances the probability on both
11077 // sides. If LeftProb and RightProb are equal, alternate which side is
11078 // taken to ensure 0-probability nodes are distributed evenly.
11080 while (LastLeft
+ 1 < FirstRight
) {
11081 if (LeftProb
< RightProb
|| (LeftProb
== RightProb
&& (I
& 1)))
11082 LeftProb
+= (++LastLeft
)->Prob
;
11084 RightProb
+= (--FirstRight
)->Prob
;
11089 // Our binary search tree differs from a typical BST in that ours can have up
11090 // to three values in each leaf. The pivot selection above doesn't take that
11091 // into account, which means the tree might require more nodes and be less
11092 // efficient. We compensate for this here.
11094 unsigned NumLeft
= LastLeft
- W
.FirstCluster
+ 1;
11095 unsigned NumRight
= W
.LastCluster
- FirstRight
+ 1;
11097 if (std::min(NumLeft
, NumRight
) < 3 && std::max(NumLeft
, NumRight
) > 3) {
11098 // If one side has less than 3 clusters, and the other has more than 3,
11099 // consider taking a cluster from the other side.
11101 if (NumLeft
< NumRight
) {
11102 // Consider moving the first cluster on the right to the left side.
11103 CaseCluster
&CC
= *FirstRight
;
11104 unsigned RightSideRank
= caseClusterRank(CC
, FirstRight
, W
.LastCluster
);
11105 unsigned LeftSideRank
= caseClusterRank(CC
, W
.FirstCluster
, LastLeft
);
11106 if (LeftSideRank
<= RightSideRank
) {
11107 // Moving the cluster to the left does not demote it.
11113 assert(NumRight
< NumLeft
);
11114 // Consider moving the last element on the left to the right side.
11115 CaseCluster
&CC
= *LastLeft
;
11116 unsigned LeftSideRank
= caseClusterRank(CC
, W
.FirstCluster
, LastLeft
);
11117 unsigned RightSideRank
= caseClusterRank(CC
, FirstRight
, W
.LastCluster
);
11118 if (RightSideRank
<= LeftSideRank
) {
11119 // Moving the cluster to the right does not demot it.
11129 assert(LastLeft
+ 1 == FirstRight
);
11130 assert(LastLeft
>= W
.FirstCluster
);
11131 assert(FirstRight
<= W
.LastCluster
);
11133 // Use the first element on the right as pivot since we will make less-than
11134 // comparisons against it.
11135 CaseClusterIt PivotCluster
= FirstRight
;
11136 assert(PivotCluster
> W
.FirstCluster
);
11137 assert(PivotCluster
<= W
.LastCluster
);
11139 CaseClusterIt FirstLeft
= W
.FirstCluster
;
11140 CaseClusterIt LastRight
= W
.LastCluster
;
11142 const ConstantInt
*Pivot
= PivotCluster
->Low
;
11144 // New blocks will be inserted immediately after the current one.
11145 MachineFunction::iterator
BBI(W
.MBB
);
11148 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
11149 // we can branch to its destination directly if it's squeezed exactly in
11150 // between the known lower bound and Pivot - 1.
11151 MachineBasicBlock
*LeftMBB
;
11152 if (FirstLeft
== LastLeft
&& FirstLeft
->Kind
== CC_Range
&&
11153 FirstLeft
->Low
== W
.GE
&&
11154 (FirstLeft
->High
->getValue() + 1LL) == Pivot
->getValue()) {
11155 LeftMBB
= FirstLeft
->MBB
;
11157 LeftMBB
= FuncInfo
.MF
->CreateMachineBasicBlock(W
.MBB
->getBasicBlock());
11158 FuncInfo
.MF
->insert(BBI
, LeftMBB
);
11159 WorkList
.push_back(
11160 {LeftMBB
, FirstLeft
, LastLeft
, W
.GE
, Pivot
, W
.DefaultProb
/ 2});
11161 // Put Cond in a virtual register to make it available from the new blocks.
11162 ExportFromCurrentBlock(Cond
);
11165 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
11166 // single cluster, RHS.Low == Pivot, and we can branch to its destination
11167 // directly if RHS.High equals the current upper bound.
11168 MachineBasicBlock
*RightMBB
;
11169 if (FirstRight
== LastRight
&& FirstRight
->Kind
== CC_Range
&&
11170 W
.LT
&& (FirstRight
->High
->getValue() + 1ULL) == W
.LT
->getValue()) {
11171 RightMBB
= FirstRight
->MBB
;
11173 RightMBB
= FuncInfo
.MF
->CreateMachineBasicBlock(W
.MBB
->getBasicBlock());
11174 FuncInfo
.MF
->insert(BBI
, RightMBB
);
11175 WorkList
.push_back(
11176 {RightMBB
, FirstRight
, LastRight
, Pivot
, W
.LT
, W
.DefaultProb
/ 2});
11177 // Put Cond in a virtual register to make it available from the new blocks.
11178 ExportFromCurrentBlock(Cond
);
11181 // Create the CaseBlock record that will be used to lower the branch.
11182 CaseBlock
CB(ISD::SETLT
, Cond
, Pivot
, nullptr, LeftMBB
, RightMBB
, W
.MBB
,
11183 getCurSDLoc(), LeftProb
, RightProb
);
11185 if (W
.MBB
== SwitchMBB
)
11186 visitSwitchCase(CB
, SwitchMBB
);
11188 SL
->SwitchCases
.push_back(CB
);
11191 // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
11192 // from the swith statement.
11193 static BranchProbability
scaleCaseProbality(BranchProbability CaseProb
,
11194 BranchProbability PeeledCaseProb
) {
11195 if (PeeledCaseProb
== BranchProbability::getOne())
11196 return BranchProbability::getZero();
11197 BranchProbability SwitchProb
= PeeledCaseProb
.getCompl();
11199 uint32_t Numerator
= CaseProb
.getNumerator();
11200 uint32_t Denominator
= SwitchProb
.scale(CaseProb
.getDenominator());
11201 return BranchProbability(Numerator
, std::max(Numerator
, Denominator
));
11204 // Try to peel the top probability case if it exceeds the threshold.
11205 // Return current MachineBasicBlock for the switch statement if the peeling
11207 // If the peeling is performed, return the newly created MachineBasicBlock
11208 // for the peeled switch statement. Also update Clusters to remove the peeled
11209 // case. PeeledCaseProb is the BranchProbability for the peeled case.
11210 MachineBasicBlock
*SelectionDAGBuilder::peelDominantCaseCluster(
11211 const SwitchInst
&SI
, CaseClusterVector
&Clusters
,
11212 BranchProbability
&PeeledCaseProb
) {
11213 MachineBasicBlock
*SwitchMBB
= FuncInfo
.MBB
;
11214 // Don't perform if there is only one cluster or optimizing for size.
11215 if (SwitchPeelThreshold
> 100 || !FuncInfo
.BPI
|| Clusters
.size() < 2 ||
11216 TM
.getOptLevel() == CodeGenOpt::None
||
11217 SwitchMBB
->getParent()->getFunction().hasMinSize())
11220 BranchProbability TopCaseProb
= BranchProbability(SwitchPeelThreshold
, 100);
11221 unsigned PeeledCaseIndex
= 0;
11222 bool SwitchPeeled
= false;
11223 for (unsigned Index
= 0; Index
< Clusters
.size(); ++Index
) {
11224 CaseCluster
&CC
= Clusters
[Index
];
11225 if (CC
.Prob
< TopCaseProb
)
11227 TopCaseProb
= CC
.Prob
;
11228 PeeledCaseIndex
= Index
;
11229 SwitchPeeled
= true;
11234 LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
11235 << TopCaseProb
<< "\n");
11237 // Record the MBB for the peeled switch statement.
11238 MachineFunction::iterator
BBI(SwitchMBB
);
11240 MachineBasicBlock
*PeeledSwitchMBB
=
11241 FuncInfo
.MF
->CreateMachineBasicBlock(SwitchMBB
->getBasicBlock());
11242 FuncInfo
.MF
->insert(BBI
, PeeledSwitchMBB
);
11244 ExportFromCurrentBlock(SI
.getCondition());
11245 auto PeeledCaseIt
= Clusters
.begin() + PeeledCaseIndex
;
11246 SwitchWorkListItem W
= {SwitchMBB
, PeeledCaseIt
, PeeledCaseIt
,
11247 nullptr, nullptr, TopCaseProb
.getCompl()};
11248 lowerWorkItem(W
, SI
.getCondition(), SwitchMBB
, PeeledSwitchMBB
);
11250 Clusters
.erase(PeeledCaseIt
);
11251 for (CaseCluster
&CC
: Clusters
) {
11253 dbgs() << "Scale the probablity for one cluster, before scaling: "
11254 << CC
.Prob
<< "\n");
11255 CC
.Prob
= scaleCaseProbality(CC
.Prob
, TopCaseProb
);
11256 LLVM_DEBUG(dbgs() << "After scaling: " << CC
.Prob
<< "\n");
11258 PeeledCaseProb
= TopCaseProb
;
11259 return PeeledSwitchMBB
;
11262 void SelectionDAGBuilder::visitSwitch(const SwitchInst
&SI
) {
11263 // Extract cases from the switch.
11264 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
11265 CaseClusterVector Clusters
;
11266 Clusters
.reserve(SI
.getNumCases());
11267 for (auto I
: SI
.cases()) {
11268 MachineBasicBlock
*Succ
= FuncInfo
.MBBMap
[I
.getCaseSuccessor()];
11269 const ConstantInt
*CaseVal
= I
.getCaseValue();
11270 BranchProbability Prob
=
11271 BPI
? BPI
->getEdgeProbability(SI
.getParent(), I
.getSuccessorIndex())
11272 : BranchProbability(1, SI
.getNumCases() + 1);
11273 Clusters
.push_back(CaseCluster::range(CaseVal
, CaseVal
, Succ
, Prob
));
11276 MachineBasicBlock
*DefaultMBB
= FuncInfo
.MBBMap
[SI
.getDefaultDest()];
11278 // Cluster adjacent cases with the same destination. We do this at all
11279 // optimization levels because it's cheap to do and will make codegen faster
11280 // if there are many clusters.
11281 sortAndRangeify(Clusters
);
11283 // The branch probablity of the peeled case.
11284 BranchProbability PeeledCaseProb
= BranchProbability::getZero();
11285 MachineBasicBlock
*PeeledSwitchMBB
=
11286 peelDominantCaseCluster(SI
, Clusters
, PeeledCaseProb
);
11288 // If there is only the default destination, jump there directly.
11289 MachineBasicBlock
*SwitchMBB
= FuncInfo
.MBB
;
11290 if (Clusters
.empty()) {
11291 assert(PeeledSwitchMBB
== SwitchMBB
);
11292 SwitchMBB
->addSuccessor(DefaultMBB
);
11293 if (DefaultMBB
!= NextBlock(SwitchMBB
)) {
11294 DAG
.setRoot(DAG
.getNode(ISD::BR
, getCurSDLoc(), MVT::Other
,
11295 getControlRoot(), DAG
.getBasicBlock(DefaultMBB
)));
11300 SL
->findJumpTables(Clusters
, &SI
, DefaultMBB
, DAG
.getPSI(), DAG
.getBFI());
11301 SL
->findBitTestClusters(Clusters
, &SI
);
11304 dbgs() << "Case clusters: ";
11305 for (const CaseCluster
&C
: Clusters
) {
11306 if (C
.Kind
== CC_JumpTable
)
11308 if (C
.Kind
== CC_BitTests
)
11311 C
.Low
->getValue().print(dbgs(), true);
11312 if (C
.Low
!= C
.High
) {
11314 C
.High
->getValue().print(dbgs(), true);
11321 assert(!Clusters
.empty());
11322 SwitchWorkList WorkList
;
11323 CaseClusterIt First
= Clusters
.begin();
11324 CaseClusterIt Last
= Clusters
.end() - 1;
11325 auto DefaultProb
= getEdgeProbability(PeeledSwitchMBB
, DefaultMBB
);
11326 // Scale the branchprobability for DefaultMBB if the peel occurs and
11327 // DefaultMBB is not replaced.
11328 if (PeeledCaseProb
!= BranchProbability::getZero() &&
11329 DefaultMBB
== FuncInfo
.MBBMap
[SI
.getDefaultDest()])
11330 DefaultProb
= scaleCaseProbality(DefaultProb
, PeeledCaseProb
);
11331 WorkList
.push_back(
11332 {PeeledSwitchMBB
, First
, Last
, nullptr, nullptr, DefaultProb
});
11334 while (!WorkList
.empty()) {
11335 SwitchWorkListItem W
= WorkList
.pop_back_val();
11336 unsigned NumClusters
= W
.LastCluster
- W
.FirstCluster
+ 1;
11338 if (NumClusters
> 3 && TM
.getOptLevel() != CodeGenOpt::None
&&
11339 !DefaultMBB
->getParent()->getFunction().hasMinSize()) {
11340 // For optimized builds, lower large range as a balanced binary tree.
11341 splitWorkItem(WorkList
, W
, SI
.getCondition(), SwitchMBB
);
11345 lowerWorkItem(W
, SI
.getCondition(), SwitchMBB
, DefaultMBB
);
11349 void SelectionDAGBuilder::visitStepVector(const CallInst
&I
) {
11350 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11351 auto DL
= getCurSDLoc();
11352 EVT ResultVT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
11353 setValue(&I
, DAG
.getStepVector(DL
, ResultVT
));
11356 void SelectionDAGBuilder::visitVectorReverse(const CallInst
&I
) {
11357 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11358 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
11360 SDLoc DL
= getCurSDLoc();
11361 SDValue V
= getValue(I
.getOperand(0));
11362 assert(VT
== V
.getValueType() && "Malformed vector.reverse!");
11364 if (VT
.isScalableVector()) {
11365 setValue(&I
, DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, VT
, V
));
11369 // Use VECTOR_SHUFFLE for the fixed-length vector
11370 // to maintain existing behavior.
11371 SmallVector
<int, 8> Mask
;
11372 unsigned NumElts
= VT
.getVectorMinNumElements();
11373 for (unsigned i
= 0; i
!= NumElts
; ++i
)
11374 Mask
.push_back(NumElts
- 1 - i
);
11376 setValue(&I
, DAG
.getVectorShuffle(VT
, DL
, V
, DAG
.getUNDEF(VT
), Mask
));
11379 void SelectionDAGBuilder::visitFreeze(const FreezeInst
&I
) {
11380 SmallVector
<EVT
, 4> ValueVTs
;
11381 ComputeValueVTs(DAG
.getTargetLoweringInfo(), DAG
.getDataLayout(), I
.getType(),
11383 unsigned NumValues
= ValueVTs
.size();
11384 if (NumValues
== 0) return;
11386 SmallVector
<SDValue
, 4> Values(NumValues
);
11387 SDValue Op
= getValue(I
.getOperand(0));
11389 for (unsigned i
= 0; i
!= NumValues
; ++i
)
11390 Values
[i
] = DAG
.getNode(ISD::FREEZE
, getCurSDLoc(), ValueVTs
[i
],
11391 SDValue(Op
.getNode(), Op
.getResNo() + i
));
11393 setValue(&I
, DAG
.getNode(ISD::MERGE_VALUES
, getCurSDLoc(),
11394 DAG
.getVTList(ValueVTs
), Values
));
11397 void SelectionDAGBuilder::visitVectorSplice(const CallInst
&I
) {
11398 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11399 EVT VT
= TLI
.getValueType(DAG
.getDataLayout(), I
.getType());
11401 SDLoc DL
= getCurSDLoc();
11402 SDValue V1
= getValue(I
.getOperand(0));
11403 SDValue V2
= getValue(I
.getOperand(1));
11404 int64_t Imm
= cast
<ConstantInt
>(I
.getOperand(2))->getSExtValue();
11406 // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
11407 if (VT
.isScalableVector()) {
11408 MVT IdxVT
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
11409 setValue(&I
, DAG
.getNode(ISD::VECTOR_SPLICE
, DL
, VT
, V1
, V2
,
11410 DAG
.getConstant(Imm
, DL
, IdxVT
)));
11414 unsigned NumElts
= VT
.getVectorNumElements();
11416 uint64_t Idx
= (NumElts
+ Imm
) % NumElts
;
11418 // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
11419 SmallVector
<int, 8> Mask
;
11420 for (unsigned i
= 0; i
< NumElts
; ++i
)
11421 Mask
.push_back(Idx
+ i
);
11422 setValue(&I
, DAG
.getVectorShuffle(VT
, DL
, V1
, V2
, Mask
));