1 //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the TargetLoweringBase class.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/BitVector.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/TargetTransformInfo.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/ISDOpcodes.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
32 #include "llvm/CodeGen/StackMaps.h"
33 #include "llvm/CodeGen/TargetLowering.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/CodeGen/TargetRegisterInfo.h"
36 #include "llvm/CodeGen/ValueTypes.h"
37 #include "llvm/CodeGenTypes/MachineValueType.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/CallingConv.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/GlobalVariable.h"
45 #include "llvm/IR/IRBuilder.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/MathExtras.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/Target/TargetOptions.h"
55 #include "llvm/TargetParser/Triple.h"
56 #include "llvm/Transforms/Utils/SizeOpts.h"
68 static cl::opt
<bool> JumpIsExpensiveOverride(
69 "jump-is-expensive", cl::init(false),
70 cl::desc("Do not create extra branches to split comparison logic."),
73 static cl::opt
<unsigned> MinimumJumpTableEntries
74 ("min-jump-table-entries", cl::init(4), cl::Hidden
,
75 cl::desc("Set minimum number of entries to use a jump table."));
77 static cl::opt
<unsigned> MaximumJumpTableSize
78 ("max-jump-table-size", cl::init(UINT_MAX
), cl::Hidden
,
79 cl::desc("Set maximum size of jump tables."));
81 /// Minimum jump table density for normal functions.
82 static cl::opt
<unsigned>
83 JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden
,
84 cl::desc("Minimum density for building a jump table in "
85 "a normal function"));
87 /// Minimum jump table density for -Os or -Oz functions.
88 static cl::opt
<unsigned> OptsizeJumpTableDensity(
89 "optsize-jump-table-density", cl::init(40), cl::Hidden
,
90 cl::desc("Minimum density for building a jump table in "
91 "an optsize function"));
93 // FIXME: This option is only to test if the strict fp operation processed
94 // correctly by preventing mutating strict fp operation to normal fp operation
95 // during development. When the backend supports strict float operation, this
96 // option will be meaningless.
97 static cl::opt
<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
98 cl::desc("Don't mutate strict-float node to a legalize node"),
99 cl::init(false), cl::Hidden
);
101 /// GetFPLibCall - Helper to return the right libcall for the given floating
102 /// point type, or UNKNOWN_LIBCALL if there is none.
103 RTLIB::Libcall
RTLIB::getFPLibCall(EVT VT
,
104 RTLIB::Libcall Call_F32
,
105 RTLIB::Libcall Call_F64
,
106 RTLIB::Libcall Call_F80
,
107 RTLIB::Libcall Call_F128
,
108 RTLIB::Libcall Call_PPCF128
) {
110 VT
== MVT::f32
? Call_F32
:
111 VT
== MVT::f64
? Call_F64
:
112 VT
== MVT::f80
? Call_F80
:
113 VT
== MVT::f128
? Call_F128
:
114 VT
== MVT::ppcf128
? Call_PPCF128
:
115 RTLIB::UNKNOWN_LIBCALL
;
118 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
119 /// UNKNOWN_LIBCALL if there is none.
120 RTLIB::Libcall
RTLIB::getFPEXT(EVT OpVT
, EVT RetVT
) {
121 if (OpVT
== MVT::f16
) {
122 if (RetVT
== MVT::f32
)
123 return FPEXT_F16_F32
;
124 if (RetVT
== MVT::f64
)
125 return FPEXT_F16_F64
;
126 if (RetVT
== MVT::f80
)
127 return FPEXT_F16_F80
;
128 if (RetVT
== MVT::f128
)
129 return FPEXT_F16_F128
;
130 } else if (OpVT
== MVT::f32
) {
131 if (RetVT
== MVT::f64
)
132 return FPEXT_F32_F64
;
133 if (RetVT
== MVT::f128
)
134 return FPEXT_F32_F128
;
135 if (RetVT
== MVT::ppcf128
)
136 return FPEXT_F32_PPCF128
;
137 } else if (OpVT
== MVT::f64
) {
138 if (RetVT
== MVT::f128
)
139 return FPEXT_F64_F128
;
140 else if (RetVT
== MVT::ppcf128
)
141 return FPEXT_F64_PPCF128
;
142 } else if (OpVT
== MVT::f80
) {
143 if (RetVT
== MVT::f128
)
144 return FPEXT_F80_F128
;
145 } else if (OpVT
== MVT::bf16
) {
146 if (RetVT
== MVT::f32
)
147 return FPEXT_BF16_F32
;
150 return UNKNOWN_LIBCALL
;
153 /// getFPROUND - Return the FPROUND_*_* value for the given types, or
154 /// UNKNOWN_LIBCALL if there is none.
155 RTLIB::Libcall
RTLIB::getFPROUND(EVT OpVT
, EVT RetVT
) {
156 if (RetVT
== MVT::f16
) {
157 if (OpVT
== MVT::f32
)
158 return FPROUND_F32_F16
;
159 if (OpVT
== MVT::f64
)
160 return FPROUND_F64_F16
;
161 if (OpVT
== MVT::f80
)
162 return FPROUND_F80_F16
;
163 if (OpVT
== MVT::f128
)
164 return FPROUND_F128_F16
;
165 if (OpVT
== MVT::ppcf128
)
166 return FPROUND_PPCF128_F16
;
167 } else if (RetVT
== MVT::bf16
) {
168 if (OpVT
== MVT::f32
)
169 return FPROUND_F32_BF16
;
170 if (OpVT
== MVT::f64
)
171 return FPROUND_F64_BF16
;
172 } else if (RetVT
== MVT::f32
) {
173 if (OpVT
== MVT::f64
)
174 return FPROUND_F64_F32
;
175 if (OpVT
== MVT::f80
)
176 return FPROUND_F80_F32
;
177 if (OpVT
== MVT::f128
)
178 return FPROUND_F128_F32
;
179 if (OpVT
== MVT::ppcf128
)
180 return FPROUND_PPCF128_F32
;
181 } else if (RetVT
== MVT::f64
) {
182 if (OpVT
== MVT::f80
)
183 return FPROUND_F80_F64
;
184 if (OpVT
== MVT::f128
)
185 return FPROUND_F128_F64
;
186 if (OpVT
== MVT::ppcf128
)
187 return FPROUND_PPCF128_F64
;
188 } else if (RetVT
== MVT::f80
) {
189 if (OpVT
== MVT::f128
)
190 return FPROUND_F128_F80
;
193 return UNKNOWN_LIBCALL
;
196 /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
197 /// UNKNOWN_LIBCALL if there is none.
198 RTLIB::Libcall
RTLIB::getFPTOSINT(EVT OpVT
, EVT RetVT
) {
199 if (OpVT
== MVT::f16
) {
200 if (RetVT
== MVT::i32
)
201 return FPTOSINT_F16_I32
;
202 if (RetVT
== MVT::i64
)
203 return FPTOSINT_F16_I64
;
204 if (RetVT
== MVT::i128
)
205 return FPTOSINT_F16_I128
;
206 } else if (OpVT
== MVT::f32
) {
207 if (RetVT
== MVT::i32
)
208 return FPTOSINT_F32_I32
;
209 if (RetVT
== MVT::i64
)
210 return FPTOSINT_F32_I64
;
211 if (RetVT
== MVT::i128
)
212 return FPTOSINT_F32_I128
;
213 } else if (OpVT
== MVT::f64
) {
214 if (RetVT
== MVT::i32
)
215 return FPTOSINT_F64_I32
;
216 if (RetVT
== MVT::i64
)
217 return FPTOSINT_F64_I64
;
218 if (RetVT
== MVT::i128
)
219 return FPTOSINT_F64_I128
;
220 } else if (OpVT
== MVT::f80
) {
221 if (RetVT
== MVT::i32
)
222 return FPTOSINT_F80_I32
;
223 if (RetVT
== MVT::i64
)
224 return FPTOSINT_F80_I64
;
225 if (RetVT
== MVT::i128
)
226 return FPTOSINT_F80_I128
;
227 } else if (OpVT
== MVT::f128
) {
228 if (RetVT
== MVT::i32
)
229 return FPTOSINT_F128_I32
;
230 if (RetVT
== MVT::i64
)
231 return FPTOSINT_F128_I64
;
232 if (RetVT
== MVT::i128
)
233 return FPTOSINT_F128_I128
;
234 } else if (OpVT
== MVT::ppcf128
) {
235 if (RetVT
== MVT::i32
)
236 return FPTOSINT_PPCF128_I32
;
237 if (RetVT
== MVT::i64
)
238 return FPTOSINT_PPCF128_I64
;
239 if (RetVT
== MVT::i128
)
240 return FPTOSINT_PPCF128_I128
;
242 return UNKNOWN_LIBCALL
;
245 /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
246 /// UNKNOWN_LIBCALL if there is none.
247 RTLIB::Libcall
RTLIB::getFPTOUINT(EVT OpVT
, EVT RetVT
) {
248 if (OpVT
== MVT::f16
) {
249 if (RetVT
== MVT::i32
)
250 return FPTOUINT_F16_I32
;
251 if (RetVT
== MVT::i64
)
252 return FPTOUINT_F16_I64
;
253 if (RetVT
== MVT::i128
)
254 return FPTOUINT_F16_I128
;
255 } else if (OpVT
== MVT::f32
) {
256 if (RetVT
== MVT::i32
)
257 return FPTOUINT_F32_I32
;
258 if (RetVT
== MVT::i64
)
259 return FPTOUINT_F32_I64
;
260 if (RetVT
== MVT::i128
)
261 return FPTOUINT_F32_I128
;
262 } else if (OpVT
== MVT::f64
) {
263 if (RetVT
== MVT::i32
)
264 return FPTOUINT_F64_I32
;
265 if (RetVT
== MVT::i64
)
266 return FPTOUINT_F64_I64
;
267 if (RetVT
== MVT::i128
)
268 return FPTOUINT_F64_I128
;
269 } else if (OpVT
== MVT::f80
) {
270 if (RetVT
== MVT::i32
)
271 return FPTOUINT_F80_I32
;
272 if (RetVT
== MVT::i64
)
273 return FPTOUINT_F80_I64
;
274 if (RetVT
== MVT::i128
)
275 return FPTOUINT_F80_I128
;
276 } else if (OpVT
== MVT::f128
) {
277 if (RetVT
== MVT::i32
)
278 return FPTOUINT_F128_I32
;
279 if (RetVT
== MVT::i64
)
280 return FPTOUINT_F128_I64
;
281 if (RetVT
== MVT::i128
)
282 return FPTOUINT_F128_I128
;
283 } else if (OpVT
== MVT::ppcf128
) {
284 if (RetVT
== MVT::i32
)
285 return FPTOUINT_PPCF128_I32
;
286 if (RetVT
== MVT::i64
)
287 return FPTOUINT_PPCF128_I64
;
288 if (RetVT
== MVT::i128
)
289 return FPTOUINT_PPCF128_I128
;
291 return UNKNOWN_LIBCALL
;
294 /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
295 /// UNKNOWN_LIBCALL if there is none.
296 RTLIB::Libcall
RTLIB::getSINTTOFP(EVT OpVT
, EVT RetVT
) {
297 if (OpVT
== MVT::i32
) {
298 if (RetVT
== MVT::f16
)
299 return SINTTOFP_I32_F16
;
300 if (RetVT
== MVT::f32
)
301 return SINTTOFP_I32_F32
;
302 if (RetVT
== MVT::f64
)
303 return SINTTOFP_I32_F64
;
304 if (RetVT
== MVT::f80
)
305 return SINTTOFP_I32_F80
;
306 if (RetVT
== MVT::f128
)
307 return SINTTOFP_I32_F128
;
308 if (RetVT
== MVT::ppcf128
)
309 return SINTTOFP_I32_PPCF128
;
310 } else if (OpVT
== MVT::i64
) {
311 if (RetVT
== MVT::f16
)
312 return SINTTOFP_I64_F16
;
313 if (RetVT
== MVT::f32
)
314 return SINTTOFP_I64_F32
;
315 if (RetVT
== MVT::f64
)
316 return SINTTOFP_I64_F64
;
317 if (RetVT
== MVT::f80
)
318 return SINTTOFP_I64_F80
;
319 if (RetVT
== MVT::f128
)
320 return SINTTOFP_I64_F128
;
321 if (RetVT
== MVT::ppcf128
)
322 return SINTTOFP_I64_PPCF128
;
323 } else if (OpVT
== MVT::i128
) {
324 if (RetVT
== MVT::f16
)
325 return SINTTOFP_I128_F16
;
326 if (RetVT
== MVT::f32
)
327 return SINTTOFP_I128_F32
;
328 if (RetVT
== MVT::f64
)
329 return SINTTOFP_I128_F64
;
330 if (RetVT
== MVT::f80
)
331 return SINTTOFP_I128_F80
;
332 if (RetVT
== MVT::f128
)
333 return SINTTOFP_I128_F128
;
334 if (RetVT
== MVT::ppcf128
)
335 return SINTTOFP_I128_PPCF128
;
337 return UNKNOWN_LIBCALL
;
340 /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
341 /// UNKNOWN_LIBCALL if there is none.
342 RTLIB::Libcall
RTLIB::getUINTTOFP(EVT OpVT
, EVT RetVT
) {
343 if (OpVT
== MVT::i32
) {
344 if (RetVT
== MVT::f16
)
345 return UINTTOFP_I32_F16
;
346 if (RetVT
== MVT::f32
)
347 return UINTTOFP_I32_F32
;
348 if (RetVT
== MVT::f64
)
349 return UINTTOFP_I32_F64
;
350 if (RetVT
== MVT::f80
)
351 return UINTTOFP_I32_F80
;
352 if (RetVT
== MVT::f128
)
353 return UINTTOFP_I32_F128
;
354 if (RetVT
== MVT::ppcf128
)
355 return UINTTOFP_I32_PPCF128
;
356 } else if (OpVT
== MVT::i64
) {
357 if (RetVT
== MVT::f16
)
358 return UINTTOFP_I64_F16
;
359 if (RetVT
== MVT::f32
)
360 return UINTTOFP_I64_F32
;
361 if (RetVT
== MVT::f64
)
362 return UINTTOFP_I64_F64
;
363 if (RetVT
== MVT::f80
)
364 return UINTTOFP_I64_F80
;
365 if (RetVT
== MVT::f128
)
366 return UINTTOFP_I64_F128
;
367 if (RetVT
== MVT::ppcf128
)
368 return UINTTOFP_I64_PPCF128
;
369 } else if (OpVT
== MVT::i128
) {
370 if (RetVT
== MVT::f16
)
371 return UINTTOFP_I128_F16
;
372 if (RetVT
== MVT::f32
)
373 return UINTTOFP_I128_F32
;
374 if (RetVT
== MVT::f64
)
375 return UINTTOFP_I128_F64
;
376 if (RetVT
== MVT::f80
)
377 return UINTTOFP_I128_F80
;
378 if (RetVT
== MVT::f128
)
379 return UINTTOFP_I128_F128
;
380 if (RetVT
== MVT::ppcf128
)
381 return UINTTOFP_I128_PPCF128
;
383 return UNKNOWN_LIBCALL
;
386 RTLIB::Libcall
RTLIB::getPOWI(EVT RetVT
) {
387 return getFPLibCall(RetVT
, POWI_F32
, POWI_F64
, POWI_F80
, POWI_F128
,
391 RTLIB::Libcall
RTLIB::getLDEXP(EVT RetVT
) {
392 return getFPLibCall(RetVT
, LDEXP_F32
, LDEXP_F64
, LDEXP_F80
, LDEXP_F128
,
396 RTLIB::Libcall
RTLIB::getFREXP(EVT RetVT
) {
397 return getFPLibCall(RetVT
, FREXP_F32
, FREXP_F64
, FREXP_F80
, FREXP_F128
,
401 RTLIB::Libcall
RTLIB::getOutlineAtomicHelper(const Libcall (&LC
)[5][4],
402 AtomicOrdering Order
,
404 unsigned ModeN
, ModelN
;
422 return RTLIB::UNKNOWN_LIBCALL
;
426 case AtomicOrdering::Monotonic
:
429 case AtomicOrdering::Acquire
:
432 case AtomicOrdering::Release
:
435 case AtomicOrdering::AcquireRelease
:
436 case AtomicOrdering::SequentiallyConsistent
:
440 return UNKNOWN_LIBCALL
;
443 return LC
[ModeN
][ModelN
];
446 RTLIB::Libcall
RTLIB::getOUTLINE_ATOMIC(unsigned Opc
, AtomicOrdering Order
,
448 if (!VT
.isScalarInteger())
449 return UNKNOWN_LIBCALL
;
450 uint64_t MemSize
= VT
.getScalarSizeInBits() / 8;
452 #define LCALLS(A, B) \
453 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
455 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
457 case ISD::ATOMIC_CMP_SWAP
: {
458 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS
)};
459 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
461 case ISD::ATOMIC_SWAP
: {
462 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP
)};
463 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
465 case ISD::ATOMIC_LOAD_ADD
: {
466 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD
)};
467 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
469 case ISD::ATOMIC_LOAD_OR
: {
470 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET
)};
471 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
473 case ISD::ATOMIC_LOAD_CLR
: {
474 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR
)};
475 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
477 case ISD::ATOMIC_LOAD_XOR
: {
478 const Libcall LC
[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR
)};
479 return getOutlineAtomicHelper(LC
, Order
, MemSize
);
482 return UNKNOWN_LIBCALL
;
488 RTLIB::Libcall
RTLIB::getSYNC(unsigned Opc
, MVT VT
) {
489 #define OP_TO_LIBCALL(Name, Enum) \
491 switch (VT.SimpleTy) { \
493 return UNKNOWN_LIBCALL; \
507 OP_TO_LIBCALL(ISD::ATOMIC_SWAP
, SYNC_LOCK_TEST_AND_SET
)
508 OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP
, SYNC_VAL_COMPARE_AND_SWAP
)
509 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD
, SYNC_FETCH_AND_ADD
)
510 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB
, SYNC_FETCH_AND_SUB
)
511 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND
, SYNC_FETCH_AND_AND
)
512 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR
, SYNC_FETCH_AND_OR
)
513 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR
, SYNC_FETCH_AND_XOR
)
514 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND
, SYNC_FETCH_AND_NAND
)
515 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX
, SYNC_FETCH_AND_MAX
)
516 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX
, SYNC_FETCH_AND_UMAX
)
517 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN
, SYNC_FETCH_AND_MIN
)
518 OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN
, SYNC_FETCH_AND_UMIN
)
523 return UNKNOWN_LIBCALL
;
526 RTLIB::Libcall
RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize
) {
527 switch (ElementSize
) {
529 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1
;
531 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2
;
533 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4
;
535 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8
;
537 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16
;
539 return UNKNOWN_LIBCALL
;
543 RTLIB::Libcall
RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize
) {
544 switch (ElementSize
) {
546 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1
;
548 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2
;
550 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4
;
552 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8
;
554 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16
;
556 return UNKNOWN_LIBCALL
;
560 RTLIB::Libcall
RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize
) {
561 switch (ElementSize
) {
563 return MEMSET_ELEMENT_UNORDERED_ATOMIC_1
;
565 return MEMSET_ELEMENT_UNORDERED_ATOMIC_2
;
567 return MEMSET_ELEMENT_UNORDERED_ATOMIC_4
;
569 return MEMSET_ELEMENT_UNORDERED_ATOMIC_8
;
571 return MEMSET_ELEMENT_UNORDERED_ATOMIC_16
;
573 return UNKNOWN_LIBCALL
;
577 void RTLIB::initCmpLibcallCCs(ISD::CondCode
*CmpLibcallCCs
) {
578 std::fill(CmpLibcallCCs
, CmpLibcallCCs
+ RTLIB::UNKNOWN_LIBCALL
,
580 CmpLibcallCCs
[RTLIB::OEQ_F32
] = ISD::SETEQ
;
581 CmpLibcallCCs
[RTLIB::OEQ_F64
] = ISD::SETEQ
;
582 CmpLibcallCCs
[RTLIB::OEQ_F128
] = ISD::SETEQ
;
583 CmpLibcallCCs
[RTLIB::OEQ_PPCF128
] = ISD::SETEQ
;
584 CmpLibcallCCs
[RTLIB::UNE_F32
] = ISD::SETNE
;
585 CmpLibcallCCs
[RTLIB::UNE_F64
] = ISD::SETNE
;
586 CmpLibcallCCs
[RTLIB::UNE_F128
] = ISD::SETNE
;
587 CmpLibcallCCs
[RTLIB::UNE_PPCF128
] = ISD::SETNE
;
588 CmpLibcallCCs
[RTLIB::OGE_F32
] = ISD::SETGE
;
589 CmpLibcallCCs
[RTLIB::OGE_F64
] = ISD::SETGE
;
590 CmpLibcallCCs
[RTLIB::OGE_F128
] = ISD::SETGE
;
591 CmpLibcallCCs
[RTLIB::OGE_PPCF128
] = ISD::SETGE
;
592 CmpLibcallCCs
[RTLIB::OLT_F32
] = ISD::SETLT
;
593 CmpLibcallCCs
[RTLIB::OLT_F64
] = ISD::SETLT
;
594 CmpLibcallCCs
[RTLIB::OLT_F128
] = ISD::SETLT
;
595 CmpLibcallCCs
[RTLIB::OLT_PPCF128
] = ISD::SETLT
;
596 CmpLibcallCCs
[RTLIB::OLE_F32
] = ISD::SETLE
;
597 CmpLibcallCCs
[RTLIB::OLE_F64
] = ISD::SETLE
;
598 CmpLibcallCCs
[RTLIB::OLE_F128
] = ISD::SETLE
;
599 CmpLibcallCCs
[RTLIB::OLE_PPCF128
] = ISD::SETLE
;
600 CmpLibcallCCs
[RTLIB::OGT_F32
] = ISD::SETGT
;
601 CmpLibcallCCs
[RTLIB::OGT_F64
] = ISD::SETGT
;
602 CmpLibcallCCs
[RTLIB::OGT_F128
] = ISD::SETGT
;
603 CmpLibcallCCs
[RTLIB::OGT_PPCF128
] = ISD::SETGT
;
604 CmpLibcallCCs
[RTLIB::UO_F32
] = ISD::SETNE
;
605 CmpLibcallCCs
[RTLIB::UO_F64
] = ISD::SETNE
;
606 CmpLibcallCCs
[RTLIB::UO_F128
] = ISD::SETNE
;
607 CmpLibcallCCs
[RTLIB::UO_PPCF128
] = ISD::SETNE
;
610 /// NOTE: The TargetMachine owns TLOF.
611 TargetLoweringBase::TargetLoweringBase(const TargetMachine
&tm
)
612 : TM(tm
), Libcalls(TM
.getTargetTriple()) {
615 // Perform these initializations only once.
616 MaxStoresPerMemset
= MaxStoresPerMemcpy
= MaxStoresPerMemmove
=
617 MaxLoadsPerMemcmp
= 8;
618 MaxGluedStoresPerMemcpy
= 0;
619 MaxStoresPerMemsetOptSize
= MaxStoresPerMemcpyOptSize
=
620 MaxStoresPerMemmoveOptSize
= MaxLoadsPerMemcmpOptSize
= 4;
621 HasMultipleConditionRegisters
= false;
622 HasExtractBitsInsn
= false;
623 JumpIsExpensive
= JumpIsExpensiveOverride
;
624 PredictableSelectIsExpensive
= false;
625 EnableExtLdPromotion
= false;
626 StackPointerRegisterToSaveRestore
= 0;
627 BooleanContents
= UndefinedBooleanContent
;
628 BooleanFloatContents
= UndefinedBooleanContent
;
629 BooleanVectorContents
= UndefinedBooleanContent
;
630 SchedPreferenceInfo
= Sched::ILP
;
631 GatherAllAliasesMaxDepth
= 18;
632 IsStrictFPEnabled
= DisableStrictNodeMutation
;
633 MaxBytesForAlignment
= 0;
634 MaxAtomicSizeInBitsSupported
= 0;
636 // Assume that even with libcalls, no target supports wider than 128 bit
638 MaxDivRemBitWidthSupported
= 128;
640 MaxLargeFPConvertBitWidthSupported
= llvm::IntegerType::MAX_INT_BITS
;
642 MinCmpXchgSizeInBits
= 0;
643 SupportsUnalignedAtomics
= false;
645 RTLIB::initCmpLibcallCCs(CmpLibcallCCs
);
648 void TargetLoweringBase::initActions() {
649 // All operations default to being supported.
650 memset(OpActions
, 0, sizeof(OpActions
));
651 memset(LoadExtActions
, 0, sizeof(LoadExtActions
));
652 memset(TruncStoreActions
, 0, sizeof(TruncStoreActions
));
653 memset(IndexedModeActions
, 0, sizeof(IndexedModeActions
));
654 memset(CondCodeActions
, 0, sizeof(CondCodeActions
));
655 std::fill(std::begin(RegClassForVT
), std::end(RegClassForVT
), nullptr);
656 std::fill(std::begin(TargetDAGCombineArray
),
657 std::end(TargetDAGCombineArray
), 0);
659 // Let extending atomic loads be unsupported by default.
660 for (MVT ValVT
: MVT::all_valuetypes())
661 for (MVT MemVT
: MVT::all_valuetypes())
662 setAtomicLoadExtAction({ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, ValVT
, MemVT
,
665 // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
666 // remove this and targets should individually set these types if not legal.
667 for (ISD::NodeType NT
: enum_seq(ISD::DELETED_NODE
, ISD::BUILTIN_OP_END
,
668 force_iteration_on_noniterable_enum
)) {
669 for (MVT VT
: {MVT::i2
, MVT::i4
})
670 OpActions
[(unsigned)VT
.SimpleTy
][NT
] = Expand
;
672 for (MVT AVT
: MVT::all_valuetypes()) {
673 for (MVT VT
: {MVT::i2
, MVT::i4
, MVT::v128i2
, MVT::v64i4
}) {
674 setTruncStoreAction(AVT
, VT
, Expand
);
675 setLoadExtAction(ISD::EXTLOAD
, AVT
, VT
, Expand
);
676 setLoadExtAction(ISD::ZEXTLOAD
, AVT
, VT
, Expand
);
679 for (unsigned IM
= (unsigned)ISD::PRE_INC
;
680 IM
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++IM
) {
681 for (MVT VT
: {MVT::i2
, MVT::i4
}) {
682 setIndexedLoadAction(IM
, VT
, Expand
);
683 setIndexedStoreAction(IM
, VT
, Expand
);
684 setIndexedMaskedLoadAction(IM
, VT
, Expand
);
685 setIndexedMaskedStoreAction(IM
, VT
, Expand
);
689 for (MVT VT
: MVT::fp_valuetypes()) {
690 MVT IntVT
= MVT::getIntegerVT(VT
.getFixedSizeInBits());
691 if (IntVT
.isValid()) {
692 setOperationAction(ISD::ATOMIC_SWAP
, VT
, Promote
);
693 AddPromotedToType(ISD::ATOMIC_SWAP
, VT
, IntVT
);
697 // Set default actions for various operations.
698 for (MVT VT
: MVT::all_valuetypes()) {
699 // Default all indexed load / store to expand.
700 for (unsigned IM
= (unsigned)ISD::PRE_INC
;
701 IM
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++IM
) {
702 setIndexedLoadAction(IM
, VT
, Expand
);
703 setIndexedStoreAction(IM
, VT
, Expand
);
704 setIndexedMaskedLoadAction(IM
, VT
, Expand
);
705 setIndexedMaskedStoreAction(IM
, VT
, Expand
);
708 // Most backends expect to see the node which just returns the value loaded.
709 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, VT
, Expand
);
711 // These operations default to expand.
712 setOperationAction({ISD::FGETSIGN
, ISD::CONCAT_VECTORS
,
713 ISD::FMINNUM
, ISD::FMAXNUM
,
714 ISD::FMINNUM_IEEE
, ISD::FMAXNUM_IEEE
,
715 ISD::FMINIMUM
, ISD::FMAXIMUM
,
716 ISD::FMAD
, ISD::SMIN
,
717 ISD::SMAX
, ISD::UMIN
,
719 ISD::FSHL
, ISD::FSHR
,
720 ISD::SADDSAT
, ISD::UADDSAT
,
721 ISD::SSUBSAT
, ISD::USUBSAT
,
722 ISD::SSHLSAT
, ISD::USHLSAT
,
723 ISD::SMULFIX
, ISD::SMULFIXSAT
,
724 ISD::UMULFIX
, ISD::UMULFIXSAT
,
725 ISD::SDIVFIX
, ISD::SDIVFIXSAT
,
726 ISD::UDIVFIX
, ISD::UDIVFIXSAT
,
727 ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
,
731 // Overflow operations default to expand
732 setOperationAction({ISD::SADDO
, ISD::SSUBO
, ISD::UADDO
, ISD::USUBO
,
733 ISD::SMULO
, ISD::UMULO
},
736 // Carry-using overflow operations default to expand.
737 setOperationAction({ISD::UADDO_CARRY
, ISD::USUBO_CARRY
, ISD::SETCCCARRY
,
738 ISD::SADDO_CARRY
, ISD::SSUBO_CARRY
},
741 // ADDC/ADDE/SUBC/SUBE default to expand.
742 setOperationAction({ISD::ADDC
, ISD::ADDE
, ISD::SUBC
, ISD::SUBE
}, VT
,
745 // [US]CMP default to expand
746 setOperationAction({ISD::UCMP
, ISD::SCMP
}, VT
, Expand
);
750 {ISD::AVGFLOORS
, ISD::AVGFLOORU
, ISD::AVGCEILS
, ISD::AVGCEILU
}, VT
,
753 // Absolute difference
754 setOperationAction({ISD::ABDS
, ISD::ABDU
}, VT
, Expand
);
756 // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
757 setOperationAction({ISD::CTLZ_ZERO_UNDEF
, ISD::CTTZ_ZERO_UNDEF
}, VT
,
760 setOperationAction({ISD::BITREVERSE
, ISD::PARITY
}, VT
, Expand
);
762 // These library functions default to expand.
763 setOperationAction({ISD::FROUND
, ISD::FPOWI
, ISD::FLDEXP
, ISD::FFREXP
}, VT
,
766 // These operations default to expand for vector types.
769 {ISD::FCOPYSIGN
, ISD::SIGN_EXTEND_INREG
, ISD::ANY_EXTEND_VECTOR_INREG
,
770 ISD::SIGN_EXTEND_VECTOR_INREG
, ISD::ZERO_EXTEND_VECTOR_INREG
,
771 ISD::SPLAT_VECTOR
, ISD::LRINT
, ISD::LLRINT
, ISD::FTAN
, ISD::FACOS
,
772 ISD::FASIN
, ISD::FATAN
, ISD::FCOSH
, ISD::FSINH
, ISD::FTANH
},
775 // Constrained floating-point operations default to expand.
776 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
777 setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
778 #include "llvm/IR/ConstrainedOps.def"
780 // For most targets @llvm.get.dynamic.area.offset just returns 0.
781 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, VT
, Expand
);
783 // Vector reduction default to expand.
785 {ISD::VECREDUCE_FADD
, ISD::VECREDUCE_FMUL
, ISD::VECREDUCE_ADD
,
786 ISD::VECREDUCE_MUL
, ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
,
787 ISD::VECREDUCE_XOR
, ISD::VECREDUCE_SMAX
, ISD::VECREDUCE_SMIN
,
788 ISD::VECREDUCE_UMAX
, ISD::VECREDUCE_UMIN
, ISD::VECREDUCE_FMAX
,
789 ISD::VECREDUCE_FMIN
, ISD::VECREDUCE_FMAXIMUM
, ISD::VECREDUCE_FMINIMUM
,
790 ISD::VECREDUCE_SEQ_FADD
, ISD::VECREDUCE_SEQ_FMUL
},
793 // Named vector shuffles default to expand.
794 setOperationAction(ISD::VECTOR_SPLICE
, VT
, Expand
);
796 // Only some target support this vector operation. Most need to expand it.
797 setOperationAction(ISD::VECTOR_COMPRESS
, VT
, Expand
);
799 // VP operations default to expand.
800 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
801 setOperationAction(ISD::SDOPC, VT, Expand);
802 #include "llvm/IR/VPIntrinsics.def"
804 // FP environment operations default to expand.
805 setOperationAction(ISD::GET_FPENV
, VT
, Expand
);
806 setOperationAction(ISD::SET_FPENV
, VT
, Expand
);
807 setOperationAction(ISD::RESET_FPENV
, VT
, Expand
);
810 // Most targets ignore the @llvm.prefetch intrinsic.
811 setOperationAction(ISD::PREFETCH
, MVT::Other
, Expand
);
813 // Most targets also ignore the @llvm.readcyclecounter intrinsic.
814 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Expand
);
816 // Most targets also ignore the @llvm.readsteadycounter intrinsic.
817 setOperationAction(ISD::READSTEADYCOUNTER
, MVT::i64
, Expand
);
819 // ConstantFP nodes default to expand. Targets can either change this to
820 // Legal, in which case all fp constants are legal, or use isFPImmLegal()
821 // to optimize expansions for certain constants.
822 setOperationAction(ISD::ConstantFP
,
823 {MVT::bf16
, MVT::f16
, MVT::f32
, MVT::f64
, MVT::f80
, MVT::f128
},
826 // These library functions default to expand.
827 setOperationAction({ISD::FCBRT
, ISD::FLOG
, ISD::FLOG2
, ISD::FLOG10
,
828 ISD::FEXP
, ISD::FEXP2
, ISD::FEXP10
, ISD::FFLOOR
,
829 ISD::FNEARBYINT
, ISD::FCEIL
, ISD::FRINT
, ISD::FTRUNC
,
830 ISD::LROUND
, ISD::LLROUND
, ISD::LRINT
, ISD::LLRINT
,
831 ISD::FROUNDEVEN
, ISD::FTAN
, ISD::FACOS
, ISD::FASIN
,
832 ISD::FATAN
, ISD::FCOSH
, ISD::FSINH
, ISD::FTANH
},
833 {MVT::f32
, MVT::f64
, MVT::f128
}, Expand
);
835 setOperationAction({ISD::FTAN
, ISD::FACOS
, ISD::FASIN
, ISD::FATAN
, ISD::FCOSH
,
836 ISD::FSINH
, ISD::FTANH
},
838 // Default ISD::TRAP to expand (which turns it into abort).
839 setOperationAction(ISD::TRAP
, MVT::Other
, Expand
);
841 // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
842 // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
843 setOperationAction(ISD::DEBUGTRAP
, MVT::Other
, Expand
);
845 setOperationAction(ISD::UBSANTRAP
, MVT::Other
, Expand
);
847 setOperationAction(ISD::GET_FPENV_MEM
, MVT::Other
, Expand
);
848 setOperationAction(ISD::SET_FPENV_MEM
, MVT::Other
, Expand
);
850 for (MVT VT
: {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
}) {
851 setOperationAction(ISD::GET_FPMODE
, VT
, Expand
);
852 setOperationAction(ISD::SET_FPMODE
, VT
, Expand
);
854 setOperationAction(ISD::RESET_FPMODE
, MVT::Other
, Expand
);
856 // This one by default will call __clear_cache unless the target
857 // wants something different.
858 setOperationAction(ISD::CLEAR_CACHE
, MVT::Other
, LibCall
);
861 MVT
TargetLoweringBase::getScalarShiftAmountTy(const DataLayout
&DL
,
863 return MVT::getIntegerVT(DL
.getPointerSizeInBits(0));
866 EVT
TargetLoweringBase::getShiftAmountTy(EVT LHSTy
,
867 const DataLayout
&DL
) const {
868 assert(LHSTy
.isInteger() && "Shift amount is not an integer type!");
869 if (LHSTy
.isVector())
871 MVT ShiftVT
= getScalarShiftAmountTy(DL
, LHSTy
);
872 // If any possible shift value won't fit in the prefered type, just use
873 // something safe. Assume it will be legalized when the shift is expanded.
874 if (ShiftVT
.getSizeInBits() < Log2_32_Ceil(LHSTy
.getSizeInBits()))
876 assert(ShiftVT
.getSizeInBits() >= Log2_32_Ceil(LHSTy
.getSizeInBits()) &&
877 "ShiftVT is still too small!");
881 bool TargetLoweringBase::canOpTrap(unsigned Op
, EVT VT
) const {
882 assert(isTypeLegal(VT
));
894 bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS
,
895 unsigned DestAS
) const {
896 return TM
.isNoopAddrSpaceCast(SrcAS
, DestAS
);
899 unsigned TargetLoweringBase::getBitWidthForCttzElements(
900 Type
*RetTy
, ElementCount EC
, bool ZeroIsPoison
,
901 const ConstantRange
*VScaleRange
) const {
902 // Find the smallest "sensible" element type to use for the expansion.
903 ConstantRange
CR(APInt(64, EC
.getKnownMinValue()));
905 CR
= CR
.umul_sat(*VScaleRange
);
908 CR
= CR
.subtract(APInt(64, 1));
910 unsigned EltWidth
= RetTy
->getScalarSizeInBits();
911 EltWidth
= std::min(EltWidth
, (unsigned)CR
.getActiveBits());
912 EltWidth
= std::max(llvm::bit_ceil(EltWidth
), (unsigned)8);
917 void TargetLoweringBase::setJumpIsExpensive(bool isExpensive
) {
918 // If the command-line option was specified, ignore this request.
919 if (!JumpIsExpensiveOverride
.getNumOccurrences())
920 JumpIsExpensive
= isExpensive
;
923 TargetLoweringBase::LegalizeKind
924 TargetLoweringBase::getTypeConversion(LLVMContext
&Context
, EVT VT
) const {
925 // If this is a simple type, use the ComputeRegisterProp mechanism.
927 MVT SVT
= VT
.getSimpleVT();
928 assert((unsigned)SVT
.SimpleTy
< std::size(TransformToType
));
929 MVT NVT
= TransformToType
[SVT
.SimpleTy
];
930 LegalizeTypeAction LA
= ValueTypeActions
.getTypeAction(SVT
);
932 assert((LA
== TypeLegal
|| LA
== TypeSoftenFloat
||
933 LA
== TypeSoftPromoteHalf
||
935 ValueTypeActions
.getTypeAction(NVT
) != TypePromoteInteger
)) &&
936 "Promote may not follow Expand or Promote");
938 if (LA
== TypeSplitVector
)
939 return LegalizeKind(LA
, EVT(SVT
).getHalfNumVectorElementsVT(Context
));
940 if (LA
== TypeScalarizeVector
)
941 return LegalizeKind(LA
, SVT
.getVectorElementType());
942 return LegalizeKind(LA
, NVT
);
945 // Handle Extended Scalar Types.
946 if (!VT
.isVector()) {
947 assert(VT
.isInteger() && "Float types must be simple");
948 unsigned BitSize
= VT
.getSizeInBits();
949 // First promote to a power-of-two size, then expand if necessary.
950 if (BitSize
< 8 || !isPowerOf2_32(BitSize
)) {
951 EVT NVT
= VT
.getRoundIntegerType(Context
);
952 assert(NVT
!= VT
&& "Unable to round integer VT");
953 LegalizeKind NextStep
= getTypeConversion(Context
, NVT
);
954 // Avoid multi-step promotion.
955 if (NextStep
.first
== TypePromoteInteger
)
957 // Return rounded integer type.
958 return LegalizeKind(TypePromoteInteger
, NVT
);
961 return LegalizeKind(TypeExpandInteger
,
962 EVT::getIntegerVT(Context
, VT
.getSizeInBits() / 2));
965 // Handle vector types.
966 ElementCount NumElts
= VT
.getVectorElementCount();
967 EVT EltVT
= VT
.getVectorElementType();
969 // Vectors with only one element are always scalarized.
970 if (NumElts
.isScalar())
971 return LegalizeKind(TypeScalarizeVector
, EltVT
);
973 // Try to widen vector elements until the element type is a power of two and
974 // promote it to a legal type later on, for example:
975 // <3 x i8> -> <4 x i8> -> <4 x i32>
976 if (EltVT
.isInteger()) {
977 // Vectors with a number of elements that is not a power of two are always
978 // widened, for example <3 x i8> -> <4 x i8>.
979 if (!VT
.isPow2VectorType()) {
980 NumElts
= NumElts
.coefficientNextPowerOf2();
981 EVT NVT
= EVT::getVectorVT(Context
, EltVT
, NumElts
);
982 return LegalizeKind(TypeWidenVector
, NVT
);
985 // Examine the element type.
986 LegalizeKind LK
= getTypeConversion(Context
, EltVT
);
988 // If type is to be expanded, split the vector.
989 // <4 x i140> -> <2 x i140>
990 if (LK
.first
== TypeExpandInteger
) {
991 if (VT
.getVectorElementCount().isScalable())
992 return LegalizeKind(TypeScalarizeScalableVector
, EltVT
);
993 return LegalizeKind(TypeSplitVector
,
994 VT
.getHalfNumVectorElementsVT(Context
));
997 // Promote the integer element types until a legal vector type is found
998 // or until the element integer type is too big. If a legal type was not
999 // found, fallback to the usual mechanism of widening/splitting the
1001 EVT OldEltVT
= EltVT
;
1003 // Increase the bitwidth of the element to the next pow-of-two
1004 // (which is greater than 8 bits).
1005 EltVT
= EVT::getIntegerVT(Context
, 1 + EltVT
.getSizeInBits())
1006 .getRoundIntegerType(Context
);
1008 // Stop trying when getting a non-simple element type.
1009 // Note that vector elements may be greater than legal vector element
1010 // types. Example: X86 XMM registers hold 64bit element on 32bit
1012 if (!EltVT
.isSimple())
1015 // Build a new vector type and check if it is legal.
1016 MVT NVT
= MVT::getVectorVT(EltVT
.getSimpleVT(), NumElts
);
1017 // Found a legal promoted vector type.
1018 if (NVT
!= MVT() && ValueTypeActions
.getTypeAction(NVT
) == TypeLegal
)
1019 return LegalizeKind(TypePromoteInteger
,
1020 EVT::getVectorVT(Context
, EltVT
, NumElts
));
1023 // Reset the type to the unexpanded type if we did not find a legal vector
1024 // type with a promoted vector element type.
1028 // Try to widen the vector until a legal type is found.
1029 // If there is no wider legal type, split the vector.
1031 // Round up to the next power of 2.
1032 NumElts
= NumElts
.coefficientNextPowerOf2();
1034 // If there is no simple vector type with this many elements then there
1035 // cannot be a larger legal vector type. Note that this assumes that
1036 // there are no skipped intermediate vector types in the simple types.
1037 if (!EltVT
.isSimple())
1039 MVT LargerVector
= MVT::getVectorVT(EltVT
.getSimpleVT(), NumElts
);
1040 if (LargerVector
== MVT())
1043 // If this type is legal then widen the vector.
1044 if (ValueTypeActions
.getTypeAction(LargerVector
) == TypeLegal
)
1045 return LegalizeKind(TypeWidenVector
, LargerVector
);
1048 // Widen odd vectors to next power of two.
1049 if (!VT
.isPow2VectorType()) {
1050 EVT NVT
= VT
.getPow2VectorType(Context
);
1051 return LegalizeKind(TypeWidenVector
, NVT
);
1054 if (VT
.getVectorElementCount() == ElementCount::getScalable(1))
1055 return LegalizeKind(TypeScalarizeScalableVector
, EltVT
);
1057 // Vectors with illegal element types are expanded.
1058 EVT NVT
= EVT::getVectorVT(Context
, EltVT
,
1059 VT
.getVectorElementCount().divideCoefficientBy(2));
1060 return LegalizeKind(TypeSplitVector
, NVT
);
1063 static unsigned getVectorTypeBreakdownMVT(MVT VT
, MVT
&IntermediateVT
,
1064 unsigned &NumIntermediates
,
1066 TargetLoweringBase
*TLI
) {
1067 // Figure out the right, legal destination reg to copy into.
1068 ElementCount EC
= VT
.getVectorElementCount();
1069 MVT EltTy
= VT
.getVectorElementType();
1071 unsigned NumVectorRegs
= 1;
1073 // Scalable vectors cannot be scalarized, so splitting or widening is
1075 if (VT
.isScalableVector() && !isPowerOf2_32(EC
.getKnownMinValue()))
1077 "Splitting or widening of non-power-of-2 MVTs is not implemented.");
1079 // FIXME: We don't support non-power-of-2-sized vectors for now.
1080 // Ideally we could break down into LHS/RHS like LegalizeDAG does.
1081 if (!isPowerOf2_32(EC
.getKnownMinValue())) {
1082 // Split EC to unit size (scalable property is preserved).
1083 NumVectorRegs
= EC
.getKnownMinValue();
1084 EC
= ElementCount::getFixed(1);
1087 // Divide the input until we get to a supported size. This will
1088 // always end up with an EC that represent a scalar or a scalable
1090 while (EC
.getKnownMinValue() > 1 &&
1091 !TLI
->isTypeLegal(MVT::getVectorVT(EltTy
, EC
))) {
1092 EC
= EC
.divideCoefficientBy(2);
1093 NumVectorRegs
<<= 1;
1096 NumIntermediates
= NumVectorRegs
;
1098 MVT NewVT
= MVT::getVectorVT(EltTy
, EC
);
1099 if (!TLI
->isTypeLegal(NewVT
))
1101 IntermediateVT
= NewVT
;
1103 unsigned LaneSizeInBits
= NewVT
.getScalarSizeInBits();
1105 // Convert sizes such as i33 to i64.
1106 LaneSizeInBits
= llvm::bit_ceil(LaneSizeInBits
);
1108 MVT DestVT
= TLI
->getRegisterType(NewVT
);
1109 RegisterVT
= DestVT
;
1110 if (EVT(DestVT
).bitsLT(NewVT
)) // Value is expanded, e.g. i64 -> i16.
1111 return NumVectorRegs
* (LaneSizeInBits
/ DestVT
.getScalarSizeInBits());
1113 // Otherwise, promotion or legal types use the same number of registers as
1114 // the vector decimated to the appropriate level.
1115 return NumVectorRegs
;
1118 /// isLegalRC - Return true if the value types that can be represented by the
1119 /// specified register class are all legal.
1120 bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo
&TRI
,
1121 const TargetRegisterClass
&RC
) const {
1122 for (const auto *I
= TRI
.legalclasstypes_begin(RC
); *I
!= MVT::Other
; ++I
)
1123 if (isTypeLegal(*I
))
1128 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
1129 /// sequence of memory operands that is recognized by PrologEpilogInserter.
1131 TargetLoweringBase::emitPatchPoint(MachineInstr
&InitialMI
,
1132 MachineBasicBlock
*MBB
) const {
1133 MachineInstr
*MI
= &InitialMI
;
1134 MachineFunction
&MF
= *MI
->getMF();
1135 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1137 // We're handling multiple types of operands here:
1138 // PATCHPOINT MetaArgs - live-in, read only, direct
1139 // STATEPOINT Deopt Spill - live-through, read only, indirect
1140 // STATEPOINT Deopt Alloca - live-through, read only, direct
1141 // (We're currently conservative and mark the deopt slots read/write in
1143 // STATEPOINT GC Spill - live-through, read/write, indirect
1144 // STATEPOINT GC Alloca - live-through, read/write, direct
1145 // The live-in vs live-through is handled already (the live through ones are
1146 // all stack slots), but we need to handle the different type of stackmap
1147 // operands and memory effects here.
1149 if (llvm::none_of(MI
->operands(),
1150 [](MachineOperand
&Operand
) { return Operand
.isFI(); }))
1153 MachineInstrBuilder MIB
= BuildMI(MF
, MI
->getDebugLoc(), MI
->getDesc());
1155 // Inherit previous memory operands.
1156 MIB
.cloneMemRefs(*MI
);
1158 for (unsigned i
= 0; i
< MI
->getNumOperands(); ++i
) {
1159 MachineOperand
&MO
= MI
->getOperand(i
);
1161 // Index of Def operand this Use it tied to.
1162 // Since Defs are coming before Uses, if Use is tied, then
1163 // index of Def must be smaller that index of that Use.
1164 // Also, Defs preserve their position in new MI.
1165 unsigned TiedTo
= i
;
1166 if (MO
.isReg() && MO
.isTied())
1167 TiedTo
= MI
->findTiedOperandIdx(i
);
1170 MIB
->tieOperands(TiedTo
, MIB
->getNumOperands() - 1);
1174 // foldMemoryOperand builds a new MI after replacing a single FI operand
1175 // with the canonical set of five x86 addressing-mode operands.
1176 int FI
= MO
.getIndex();
1178 // Add frame index operands recognized by stackmaps.cpp
1179 if (MFI
.isStatepointSpillSlotObjectIndex(FI
)) {
1180 // indirect-mem-ref tag, size, #FI, offset.
1181 // Used for spills inserted by StatepointLowering. This codepath is not
1182 // used for patchpoints/stackmaps at all, for these spilling is done via
1183 // foldMemoryOperand callback only.
1184 assert(MI
->getOpcode() == TargetOpcode::STATEPOINT
&& "sanity");
1185 MIB
.addImm(StackMaps::IndirectMemRefOp
);
1186 MIB
.addImm(MFI
.getObjectSize(FI
));
1190 // direct-mem-ref tag, #FI, offset.
1191 // Used by patchpoint, and direct alloca arguments to statepoints
1192 MIB
.addImm(StackMaps::DirectMemRefOp
);
1197 assert(MIB
->mayLoad() && "Folded a stackmap use to a non-load!");
1199 // Add a new memory operand for this FI.
1200 assert(MFI
.getObjectOffset(FI
) != -1);
1202 // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
1203 // PATCHPOINT should be updated to do the same. (TODO)
1204 if (MI
->getOpcode() != TargetOpcode::STATEPOINT
) {
1205 auto Flags
= MachineMemOperand::MOLoad
;
1206 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
1207 MachinePointerInfo::getFixedStack(MF
, FI
), Flags
,
1208 MF
.getDataLayout().getPointerSize(), MFI
.getObjectAlign(FI
));
1209 MIB
->addMemOperand(MF
, MMO
);
1212 MBB
->insert(MachineBasicBlock::iterator(MI
), MIB
);
1213 MI
->eraseFromParent();
1217 /// findRepresentativeClass - Return the largest legal super-reg register class
1218 /// of the register class for the specified type and its associated "cost".
1219 // This function is in TargetLowering because it uses RegClassForVT which would
1220 // need to be moved to TargetRegisterInfo and would necessitate moving
1221 // isTypeLegal over as well - a massive change that would just require
1222 // TargetLowering having a TargetRegisterInfo class member that it would use.
1223 std::pair
<const TargetRegisterClass
*, uint8_t>
1224 TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo
*TRI
,
1226 const TargetRegisterClass
*RC
= RegClassForVT
[VT
.SimpleTy
];
1228 return std::make_pair(RC
, 0);
1230 // Compute the set of all super-register classes.
1231 BitVector
SuperRegRC(TRI
->getNumRegClasses());
1232 for (SuperRegClassIterator
RCI(RC
, TRI
); RCI
.isValid(); ++RCI
)
1233 SuperRegRC
.setBitsInMask(RCI
.getMask());
1235 // Find the first legal register class with the largest spill size.
1236 const TargetRegisterClass
*BestRC
= RC
;
1237 for (unsigned i
: SuperRegRC
.set_bits()) {
1238 const TargetRegisterClass
*SuperRC
= TRI
->getRegClass(i
);
1239 // We want the largest possible spill size.
1240 if (TRI
->getSpillSize(*SuperRC
) <= TRI
->getSpillSize(*BestRC
))
1242 if (!isLegalRC(*TRI
, *SuperRC
))
1246 return std::make_pair(BestRC
, 1);
1249 /// computeRegisterProperties - Once all of the register classes are added,
1250 /// this allows us to compute derived properties we expose.
1251 void TargetLoweringBase::computeRegisterProperties(
1252 const TargetRegisterInfo
*TRI
) {
1253 // Everything defaults to needing one register.
1254 for (unsigned i
= 0; i
!= MVT::VALUETYPE_SIZE
; ++i
) {
1255 NumRegistersForVT
[i
] = 1;
1256 RegisterTypeForVT
[i
] = TransformToType
[i
] = (MVT::SimpleValueType
)i
;
1258 // ...except isVoid, which doesn't need any registers.
1259 NumRegistersForVT
[MVT::isVoid
] = 0;
1261 // Find the largest integer register class.
1262 unsigned LargestIntReg
= MVT::LAST_INTEGER_VALUETYPE
;
1263 for (; RegClassForVT
[LargestIntReg
] == nullptr; --LargestIntReg
)
1264 assert(LargestIntReg
!= MVT::i1
&& "No integer registers defined!");
1266 // Every integer value type larger than this largest register takes twice as
1267 // many registers to represent as the previous ValueType.
1268 for (unsigned ExpandedReg
= LargestIntReg
+ 1;
1269 ExpandedReg
<= MVT::LAST_INTEGER_VALUETYPE
; ++ExpandedReg
) {
1270 NumRegistersForVT
[ExpandedReg
] = 2*NumRegistersForVT
[ExpandedReg
-1];
1271 RegisterTypeForVT
[ExpandedReg
] = (MVT::SimpleValueType
)LargestIntReg
;
1272 TransformToType
[ExpandedReg
] = (MVT::SimpleValueType
)(ExpandedReg
- 1);
1273 ValueTypeActions
.setTypeAction((MVT::SimpleValueType
)ExpandedReg
,
1277 // Inspect all of the ValueType's smaller than the largest integer
1278 // register to see which ones need promotion.
1279 unsigned LegalIntReg
= LargestIntReg
;
1280 for (unsigned IntReg
= LargestIntReg
- 1;
1281 IntReg
>= (unsigned)MVT::i1
; --IntReg
) {
1282 MVT IVT
= (MVT::SimpleValueType
)IntReg
;
1283 if (isTypeLegal(IVT
)) {
1284 LegalIntReg
= IntReg
;
1286 RegisterTypeForVT
[IntReg
] = TransformToType
[IntReg
] =
1287 (MVT::SimpleValueType
)LegalIntReg
;
1288 ValueTypeActions
.setTypeAction(IVT
, TypePromoteInteger
);
1292 // ppcf128 type is really two f64's.
1293 if (!isTypeLegal(MVT::ppcf128
)) {
1294 if (isTypeLegal(MVT::f64
)) {
1295 NumRegistersForVT
[MVT::ppcf128
] = 2*NumRegistersForVT
[MVT::f64
];
1296 RegisterTypeForVT
[MVT::ppcf128
] = MVT::f64
;
1297 TransformToType
[MVT::ppcf128
] = MVT::f64
;
1298 ValueTypeActions
.setTypeAction(MVT::ppcf128
, TypeExpandFloat
);
1300 NumRegistersForVT
[MVT::ppcf128
] = NumRegistersForVT
[MVT::i128
];
1301 RegisterTypeForVT
[MVT::ppcf128
] = RegisterTypeForVT
[MVT::i128
];
1302 TransformToType
[MVT::ppcf128
] = MVT::i128
;
1303 ValueTypeActions
.setTypeAction(MVT::ppcf128
, TypeSoftenFloat
);
1307 // Decide how to handle f128. If the target does not have native f128 support,
1308 // expand it to i128 and we will be generating soft float library calls.
1309 if (!isTypeLegal(MVT::f128
)) {
1310 NumRegistersForVT
[MVT::f128
] = NumRegistersForVT
[MVT::i128
];
1311 RegisterTypeForVT
[MVT::f128
] = RegisterTypeForVT
[MVT::i128
];
1312 TransformToType
[MVT::f128
] = MVT::i128
;
1313 ValueTypeActions
.setTypeAction(MVT::f128
, TypeSoftenFloat
);
1316 // Decide how to handle f80. If the target does not have native f80 support,
1317 // expand it to i96 and we will be generating soft float library calls.
1318 if (!isTypeLegal(MVT::f80
)) {
1319 NumRegistersForVT
[MVT::f80
] = 3*NumRegistersForVT
[MVT::i32
];
1320 RegisterTypeForVT
[MVT::f80
] = RegisterTypeForVT
[MVT::i32
];
1321 TransformToType
[MVT::f80
] = MVT::i32
;
1322 ValueTypeActions
.setTypeAction(MVT::f80
, TypeSoftenFloat
);
1325 // Decide how to handle f64. If the target does not have native f64 support,
1326 // expand it to i64 and we will be generating soft float library calls.
1327 if (!isTypeLegal(MVT::f64
)) {
1328 NumRegistersForVT
[MVT::f64
] = NumRegistersForVT
[MVT::i64
];
1329 RegisterTypeForVT
[MVT::f64
] = RegisterTypeForVT
[MVT::i64
];
1330 TransformToType
[MVT::f64
] = MVT::i64
;
1331 ValueTypeActions
.setTypeAction(MVT::f64
, TypeSoftenFloat
);
1334 // Decide how to handle f32. If the target does not have native f32 support,
1335 // expand it to i32 and we will be generating soft float library calls.
1336 if (!isTypeLegal(MVT::f32
)) {
1337 NumRegistersForVT
[MVT::f32
] = NumRegistersForVT
[MVT::i32
];
1338 RegisterTypeForVT
[MVT::f32
] = RegisterTypeForVT
[MVT::i32
];
1339 TransformToType
[MVT::f32
] = MVT::i32
;
1340 ValueTypeActions
.setTypeAction(MVT::f32
, TypeSoftenFloat
);
1343 // Decide how to handle f16. If the target does not have native f16 support,
1344 // promote it to f32, because there are no f16 library calls (except for
1346 if (!isTypeLegal(MVT::f16
)) {
1347 // Allow targets to control how we legalize half.
1348 bool SoftPromoteHalfType
= softPromoteHalfType();
1349 bool UseFPRegsForHalfType
= !SoftPromoteHalfType
|| useFPRegsForHalfType();
1351 if (!UseFPRegsForHalfType
) {
1352 NumRegistersForVT
[MVT::f16
] = NumRegistersForVT
[MVT::i16
];
1353 RegisterTypeForVT
[MVT::f16
] = RegisterTypeForVT
[MVT::i16
];
1355 NumRegistersForVT
[MVT::f16
] = NumRegistersForVT
[MVT::f32
];
1356 RegisterTypeForVT
[MVT::f16
] = RegisterTypeForVT
[MVT::f32
];
1358 TransformToType
[MVT::f16
] = MVT::f32
;
1359 if (SoftPromoteHalfType
) {
1360 ValueTypeActions
.setTypeAction(MVT::f16
, TypeSoftPromoteHalf
);
1362 ValueTypeActions
.setTypeAction(MVT::f16
, TypePromoteFloat
);
1366 // Decide how to handle bf16. If the target does not have native bf16 support,
1367 // promote it to f32, because there are no bf16 library calls (except for
1368 // converting from f32 to bf16).
1369 if (!isTypeLegal(MVT::bf16
)) {
1370 NumRegistersForVT
[MVT::bf16
] = NumRegistersForVT
[MVT::f32
];
1371 RegisterTypeForVT
[MVT::bf16
] = RegisterTypeForVT
[MVT::f32
];
1372 TransformToType
[MVT::bf16
] = MVT::f32
;
1373 ValueTypeActions
.setTypeAction(MVT::bf16
, TypeSoftPromoteHalf
);
1376 // Loop over all of the vector value types to see which need transformations.
1377 for (unsigned i
= MVT::FIRST_VECTOR_VALUETYPE
;
1378 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
1379 MVT VT
= (MVT::SimpleValueType
) i
;
1380 if (isTypeLegal(VT
))
1383 MVT EltVT
= VT
.getVectorElementType();
1384 ElementCount EC
= VT
.getVectorElementCount();
1385 bool IsLegalWiderType
= false;
1386 bool IsScalable
= VT
.isScalableVector();
1387 LegalizeTypeAction PreferredAction
= getPreferredVectorAction(VT
);
1388 switch (PreferredAction
) {
1389 case TypePromoteInteger
: {
1390 MVT::SimpleValueType EndVT
= IsScalable
?
1391 MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE
:
1392 MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE
;
1393 // Try to promote the elements of integer vectors. If no legal
1394 // promotion was found, fall through to the widen-vector method.
1395 for (unsigned nVT
= i
+ 1;
1396 (MVT::SimpleValueType
)nVT
<= EndVT
; ++nVT
) {
1397 MVT SVT
= (MVT::SimpleValueType
) nVT
;
1398 // Promote vectors of integers to vectors with the same number
1399 // of elements, with a wider element type.
1400 if (SVT
.getScalarSizeInBits() > EltVT
.getFixedSizeInBits() &&
1401 SVT
.getVectorElementCount() == EC
&& isTypeLegal(SVT
)) {
1402 TransformToType
[i
] = SVT
;
1403 RegisterTypeForVT
[i
] = SVT
;
1404 NumRegistersForVT
[i
] = 1;
1405 ValueTypeActions
.setTypeAction(VT
, TypePromoteInteger
);
1406 IsLegalWiderType
= true;
1410 if (IsLegalWiderType
)
1415 case TypeWidenVector
:
1416 if (isPowerOf2_32(EC
.getKnownMinValue())) {
1417 // Try to widen the vector.
1418 for (unsigned nVT
= i
+ 1; nVT
<= MVT::LAST_VECTOR_VALUETYPE
; ++nVT
) {
1419 MVT SVT
= (MVT::SimpleValueType
) nVT
;
1420 if (SVT
.getVectorElementType() == EltVT
&&
1421 SVT
.isScalableVector() == IsScalable
&&
1422 SVT
.getVectorElementCount().getKnownMinValue() >
1423 EC
.getKnownMinValue() &&
1425 TransformToType
[i
] = SVT
;
1426 RegisterTypeForVT
[i
] = SVT
;
1427 NumRegistersForVT
[i
] = 1;
1428 ValueTypeActions
.setTypeAction(VT
, TypeWidenVector
);
1429 IsLegalWiderType
= true;
1433 if (IsLegalWiderType
)
1436 // Only widen to the next power of 2 to keep consistency with EVT.
1437 MVT NVT
= VT
.getPow2VectorType();
1438 if (isTypeLegal(NVT
)) {
1439 TransformToType
[i
] = NVT
;
1440 ValueTypeActions
.setTypeAction(VT
, TypeWidenVector
);
1441 RegisterTypeForVT
[i
] = NVT
;
1442 NumRegistersForVT
[i
] = 1;
1448 case TypeSplitVector
:
1449 case TypeScalarizeVector
: {
1452 unsigned NumIntermediates
;
1453 unsigned NumRegisters
= getVectorTypeBreakdownMVT(VT
, IntermediateVT
,
1454 NumIntermediates
, RegisterVT
, this);
1455 NumRegistersForVT
[i
] = NumRegisters
;
1456 assert(NumRegistersForVT
[i
] == NumRegisters
&&
1457 "NumRegistersForVT size cannot represent NumRegisters!");
1458 RegisterTypeForVT
[i
] = RegisterVT
;
1460 MVT NVT
= VT
.getPow2VectorType();
1462 // Type is already a power of 2. The default action is to split.
1463 TransformToType
[i
] = MVT::Other
;
1464 if (PreferredAction
== TypeScalarizeVector
)
1465 ValueTypeActions
.setTypeAction(VT
, TypeScalarizeVector
);
1466 else if (PreferredAction
== TypeSplitVector
)
1467 ValueTypeActions
.setTypeAction(VT
, TypeSplitVector
);
1468 else if (EC
.getKnownMinValue() > 1)
1469 ValueTypeActions
.setTypeAction(VT
, TypeSplitVector
);
1471 ValueTypeActions
.setTypeAction(VT
, EC
.isScalable()
1472 ? TypeScalarizeScalableVector
1473 : TypeScalarizeVector
);
1475 TransformToType
[i
] = NVT
;
1476 ValueTypeActions
.setTypeAction(VT
, TypeWidenVector
);
1481 llvm_unreachable("Unknown vector legalization action!");
1485 // Determine the 'representative' register class for each value type.
1486 // An representative register class is the largest (meaning one which is
1487 // not a sub-register class / subreg register class) legal register class for
1488 // a group of value types. For example, on i386, i8, i16, and i32
1489 // representative would be GR32; while on x86_64 it's GR64.
1490 for (unsigned i
= 0; i
!= MVT::VALUETYPE_SIZE
; ++i
) {
1491 const TargetRegisterClass
* RRC
;
1493 std::tie(RRC
, Cost
) = findRepresentativeClass(TRI
, (MVT::SimpleValueType
)i
);
1494 RepRegClassForVT
[i
] = RRC
;
1495 RepRegClassCostForVT
[i
] = Cost
;
1499 EVT
TargetLoweringBase::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&,
1501 assert(!VT
.isVector() && "No default SetCC type for vectors!");
1502 return getPointerTy(DL
).SimpleTy
;
1505 MVT::SimpleValueType
TargetLoweringBase::getCmpLibcallReturnType() const {
1506 return MVT::i32
; // return the default value
1509 /// getVectorTypeBreakdown - Vector types are broken down into some number of
1510 /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
1511 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
1512 /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
1514 /// This method returns the number of registers needed, and the VT for each
1515 /// register. It also returns the VT and quantity of the intermediate values
1516 /// before they are promoted/expanded.
1517 unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext
&Context
,
1518 EVT VT
, EVT
&IntermediateVT
,
1519 unsigned &NumIntermediates
,
1520 MVT
&RegisterVT
) const {
1521 ElementCount EltCnt
= VT
.getVectorElementCount();
1523 // If there is a wider vector type with the same element type as this one,
1524 // or a promoted vector type that has the same number of elements which
1525 // are wider, then we should convert to that legal vector type.
1526 // This handles things like <2 x float> -> <4 x float> and
1527 // <4 x i1> -> <4 x i32>.
1528 LegalizeTypeAction TA
= getTypeAction(Context
, VT
);
1529 if (!EltCnt
.isScalar() &&
1530 (TA
== TypeWidenVector
|| TA
== TypePromoteInteger
)) {
1531 EVT RegisterEVT
= getTypeToTransformTo(Context
, VT
);
1532 if (isTypeLegal(RegisterEVT
)) {
1533 IntermediateVT
= RegisterEVT
;
1534 RegisterVT
= RegisterEVT
.getSimpleVT();
1535 NumIntermediates
= 1;
1540 // Figure out the right, legal destination reg to copy into.
1541 EVT EltTy
= VT
.getVectorElementType();
1543 unsigned NumVectorRegs
= 1;
1545 // Scalable vectors cannot be scalarized, so handle the legalisation of the
1546 // types like done elsewhere in SelectionDAG.
1547 if (EltCnt
.isScalable()) {
1551 // Iterate until we've found a legal (part) type to hold VT.
1552 LK
= getTypeConversion(Context
, PartVT
);
1554 } while (LK
.first
!= TypeLegal
);
1556 if (!PartVT
.isVector()) {
1558 "Don't know how to legalize this scalable vector type");
1562 divideCeil(VT
.getVectorElementCount().getKnownMinValue(),
1563 PartVT
.getVectorElementCount().getKnownMinValue());
1564 IntermediateVT
= PartVT
;
1565 RegisterVT
= getRegisterType(Context
, IntermediateVT
);
1566 return NumIntermediates
;
1569 // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
1570 // we could break down into LHS/RHS like LegalizeDAG does.
1571 if (!isPowerOf2_32(EltCnt
.getKnownMinValue())) {
1572 NumVectorRegs
= EltCnt
.getKnownMinValue();
1573 EltCnt
= ElementCount::getFixed(1);
1576 // Divide the input until we get to a supported size. This will always
1577 // end with a scalar if the target doesn't support vectors.
1578 while (EltCnt
.getKnownMinValue() > 1 &&
1579 !isTypeLegal(EVT::getVectorVT(Context
, EltTy
, EltCnt
))) {
1580 EltCnt
= EltCnt
.divideCoefficientBy(2);
1581 NumVectorRegs
<<= 1;
1584 NumIntermediates
= NumVectorRegs
;
1586 EVT NewVT
= EVT::getVectorVT(Context
, EltTy
, EltCnt
);
1587 if (!isTypeLegal(NewVT
))
1589 IntermediateVT
= NewVT
;
1591 MVT DestVT
= getRegisterType(Context
, NewVT
);
1592 RegisterVT
= DestVT
;
1594 if (EVT(DestVT
).bitsLT(NewVT
)) { // Value is expanded, e.g. i64 -> i16.
1595 TypeSize NewVTSize
= NewVT
.getSizeInBits();
1596 // Convert sizes such as i33 to i64.
1597 if (!llvm::has_single_bit
<uint32_t>(NewVTSize
.getKnownMinValue()))
1598 NewVTSize
= NewVTSize
.coefficientNextPowerOf2();
1599 return NumVectorRegs
*(NewVTSize
/DestVT
.getSizeInBits());
1602 // Otherwise, promotion or legal types use the same number of registers as
1603 // the vector decimated to the appropriate level.
1604 return NumVectorRegs
;
1607 bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst
*SI
,
1610 ProfileSummaryInfo
*PSI
,
1611 BlockFrequencyInfo
*BFI
) const {
1612 // FIXME: This function check the maximum table size and density, but the
1613 // minimum size is not checked. It would be nice if the minimum size is
1614 // also combined within this function. Currently, the minimum size check is
1615 // performed in findJumpTable() in SelectionDAGBuiler and
1616 // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
1617 const bool OptForSize
=
1618 SI
->getParent()->getParent()->hasOptSize() ||
1619 llvm::shouldOptimizeForSize(SI
->getParent(), PSI
, BFI
);
1620 const unsigned MinDensity
= getMinimumJumpTableDensity(OptForSize
);
1621 const unsigned MaxJumpTableSize
= getMaximumJumpTableSize();
1623 // Check whether the number of cases is small enough and
1624 // the range is dense enough for a jump table.
1625 return (OptForSize
|| Range
<= MaxJumpTableSize
) &&
1626 (NumCases
* 100 >= Range
* MinDensity
);
1629 MVT
TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext
&Context
,
1630 EVT ConditionVT
) const {
1631 return getRegisterType(Context
, ConditionVT
);
1634 /// Get the EVTs and ArgFlags collections that represent the legalized return
1635 /// type of the given function. This does not require a DAG or a return value,
1636 /// and is suitable for use before any DAGs for the function are constructed.
1637 /// TODO: Move this out of TargetLowering.cpp.
1638 void llvm::GetReturnInfo(CallingConv::ID CC
, Type
*ReturnType
,
1640 SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1641 const TargetLowering
&TLI
, const DataLayout
&DL
) {
1642 SmallVector
<EVT
, 4> ValueVTs
;
1643 ComputeValueVTs(TLI
, DL
, ReturnType
, ValueVTs
);
1644 unsigned NumValues
= ValueVTs
.size();
1645 if (NumValues
== 0) return;
1647 for (unsigned j
= 0, f
= NumValues
; j
!= f
; ++j
) {
1648 EVT VT
= ValueVTs
[j
];
1649 ISD::NodeType ExtendKind
= ISD::ANY_EXTEND
;
1651 if (attr
.hasRetAttr(Attribute::SExt
))
1652 ExtendKind
= ISD::SIGN_EXTEND
;
1653 else if (attr
.hasRetAttr(Attribute::ZExt
))
1654 ExtendKind
= ISD::ZERO_EXTEND
;
1656 if (ExtendKind
!= ISD::ANY_EXTEND
&& VT
.isInteger())
1657 VT
= TLI
.getTypeForExtReturn(ReturnType
->getContext(), VT
, ExtendKind
);
1660 TLI
.getNumRegistersForCallingConv(ReturnType
->getContext(), CC
, VT
);
1662 TLI
.getRegisterTypeForCallingConv(ReturnType
->getContext(), CC
, VT
);
1664 // 'inreg' on function refers to return value
1665 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
1666 if (attr
.hasRetAttr(Attribute::InReg
))
1669 // Propagate extension type if any
1670 if (attr
.hasRetAttr(Attribute::SExt
))
1672 else if (attr
.hasRetAttr(Attribute::ZExt
))
1675 for (unsigned i
= 0; i
< NumParts
; ++i
) {
1676 ISD::ArgFlagsTy OutFlags
= Flags
;
1677 if (NumParts
> 1 && i
== 0)
1678 OutFlags
.setSplit();
1679 else if (i
== NumParts
- 1 && i
!= 0)
1680 OutFlags
.setSplitEnd();
1683 ISD::OutputArg(OutFlags
, PartVT
, VT
, /*isfixed=*/true, 0, 0));
1688 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1689 /// function arguments in the caller parameter area. This is the actual
1690 /// alignment, not its logarithm.
1691 uint64_t TargetLoweringBase::getByValTypeAlignment(Type
*Ty
,
1692 const DataLayout
&DL
) const {
1693 return DL
.getABITypeAlign(Ty
).value();
1696 bool TargetLoweringBase::allowsMemoryAccessForAlignment(
1697 LLVMContext
&Context
, const DataLayout
&DL
, EVT VT
, unsigned AddrSpace
,
1698 Align Alignment
, MachineMemOperand::Flags Flags
, unsigned *Fast
) const {
1699 // Check if the specified alignment is sufficient based on the data layout.
1700 // TODO: While using the data layout works in practice, a better solution
1701 // would be to implement this check directly (make this a virtual function).
1702 // For example, the ABI alignment may change based on software platform while
1703 // this function should only be affected by hardware implementation.
1704 Type
*Ty
= VT
.getTypeForEVT(Context
);
1705 if (VT
.isZeroSized() || Alignment
>= DL
.getABITypeAlign(Ty
)) {
1706 // Assume that an access that meets the ABI-specified alignment is fast.
1707 if (Fast
!= nullptr)
1712 // This is a misaligned access.
1713 return allowsMisalignedMemoryAccesses(VT
, AddrSpace
, Alignment
, Flags
, Fast
);
1716 bool TargetLoweringBase::allowsMemoryAccessForAlignment(
1717 LLVMContext
&Context
, const DataLayout
&DL
, EVT VT
,
1718 const MachineMemOperand
&MMO
, unsigned *Fast
) const {
1719 return allowsMemoryAccessForAlignment(Context
, DL
, VT
, MMO
.getAddrSpace(),
1720 MMO
.getAlign(), MMO
.getFlags(), Fast
);
1723 bool TargetLoweringBase::allowsMemoryAccess(LLVMContext
&Context
,
1724 const DataLayout
&DL
, EVT VT
,
1725 unsigned AddrSpace
, Align Alignment
,
1726 MachineMemOperand::Flags Flags
,
1727 unsigned *Fast
) const {
1728 return allowsMemoryAccessForAlignment(Context
, DL
, VT
, AddrSpace
, Alignment
,
1732 bool TargetLoweringBase::allowsMemoryAccess(LLVMContext
&Context
,
1733 const DataLayout
&DL
, EVT VT
,
1734 const MachineMemOperand
&MMO
,
1735 unsigned *Fast
) const {
1736 return allowsMemoryAccess(Context
, DL
, VT
, MMO
.getAddrSpace(), MMO
.getAlign(),
1737 MMO
.getFlags(), Fast
);
1740 bool TargetLoweringBase::allowsMemoryAccess(LLVMContext
&Context
,
1741 const DataLayout
&DL
, LLT Ty
,
1742 const MachineMemOperand
&MMO
,
1743 unsigned *Fast
) const {
1744 EVT VT
= getApproximateEVTForLLT(Ty
, DL
, Context
);
1745 return allowsMemoryAccess(Context
, DL
, VT
, MMO
.getAddrSpace(), MMO
.getAlign(),
1746 MMO
.getFlags(), Fast
);
1749 //===----------------------------------------------------------------------===//
1750 // TargetTransformInfo Helpers
1751 //===----------------------------------------------------------------------===//
1753 int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode
) const {
1754 enum InstructionOpcodes
{
1755 #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
1756 #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
1757 #include "llvm/IR/Instruction.def"
1759 switch (static_cast<InstructionOpcodes
>(Opcode
)) {
1762 case Switch
: return 0;
1763 case IndirectBr
: return 0;
1764 case Invoke
: return 0;
1765 case CallBr
: return 0;
1766 case Resume
: return 0;
1767 case Unreachable
: return 0;
1768 case CleanupRet
: return 0;
1769 case CatchRet
: return 0;
1770 case CatchPad
: return 0;
1771 case CatchSwitch
: return 0;
1772 case CleanupPad
: return 0;
1773 case FNeg
: return ISD::FNEG
;
1774 case Add
: return ISD::ADD
;
1775 case FAdd
: return ISD::FADD
;
1776 case Sub
: return ISD::SUB
;
1777 case FSub
: return ISD::FSUB
;
1778 case Mul
: return ISD::MUL
;
1779 case FMul
: return ISD::FMUL
;
1780 case UDiv
: return ISD::UDIV
;
1781 case SDiv
: return ISD::SDIV
;
1782 case FDiv
: return ISD::FDIV
;
1783 case URem
: return ISD::UREM
;
1784 case SRem
: return ISD::SREM
;
1785 case FRem
: return ISD::FREM
;
1786 case Shl
: return ISD::SHL
;
1787 case LShr
: return ISD::SRL
;
1788 case AShr
: return ISD::SRA
;
1789 case And
: return ISD::AND
;
1790 case Or
: return ISD::OR
;
1791 case Xor
: return ISD::XOR
;
1792 case Alloca
: return 0;
1793 case Load
: return ISD::LOAD
;
1794 case Store
: return ISD::STORE
;
1795 case GetElementPtr
: return 0;
1796 case Fence
: return 0;
1797 case AtomicCmpXchg
: return 0;
1798 case AtomicRMW
: return 0;
1799 case Trunc
: return ISD::TRUNCATE
;
1800 case ZExt
: return ISD::ZERO_EXTEND
;
1801 case SExt
: return ISD::SIGN_EXTEND
;
1802 case FPToUI
: return ISD::FP_TO_UINT
;
1803 case FPToSI
: return ISD::FP_TO_SINT
;
1804 case UIToFP
: return ISD::UINT_TO_FP
;
1805 case SIToFP
: return ISD::SINT_TO_FP
;
1806 case FPTrunc
: return ISD::FP_ROUND
;
1807 case FPExt
: return ISD::FP_EXTEND
;
1808 case PtrToInt
: return ISD::BITCAST
;
1809 case IntToPtr
: return ISD::BITCAST
;
1810 case BitCast
: return ISD::BITCAST
;
1811 case AddrSpaceCast
: return ISD::ADDRSPACECAST
;
1812 case ICmp
: return ISD::SETCC
;
1813 case FCmp
: return ISD::SETCC
;
1815 case Call
: return 0;
1816 case Select
: return ISD::SELECT
;
1817 case UserOp1
: return 0;
1818 case UserOp2
: return 0;
1819 case VAArg
: return 0;
1820 case ExtractElement
: return ISD::EXTRACT_VECTOR_ELT
;
1821 case InsertElement
: return ISD::INSERT_VECTOR_ELT
;
1822 case ShuffleVector
: return ISD::VECTOR_SHUFFLE
;
1823 case ExtractValue
: return ISD::MERGE_VALUES
;
1824 case InsertValue
: return ISD::MERGE_VALUES
;
1825 case LandingPad
: return 0;
1826 case Freeze
: return ISD::FREEZE
;
1829 llvm_unreachable("Unknown instruction type encountered!");
1833 TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase
&IRB
,
1834 bool UseTLS
) const {
1835 // compiler-rt provides a variable with a magic name. Targets that do not
1836 // link with compiler-rt may also provide such a variable.
1837 Module
*M
= IRB
.GetInsertBlock()->getParent()->getParent();
1838 const char *UnsafeStackPtrVar
= "__safestack_unsafe_stack_ptr";
1839 auto UnsafeStackPtr
=
1840 dyn_cast_or_null
<GlobalVariable
>(M
->getNamedValue(UnsafeStackPtrVar
));
1842 Type
*StackPtrTy
= PointerType::getUnqual(M
->getContext());
1844 if (!UnsafeStackPtr
) {
1845 auto TLSModel
= UseTLS
?
1846 GlobalValue::InitialExecTLSModel
:
1847 GlobalValue::NotThreadLocal
;
1848 // The global variable is not defined yet, define it ourselves.
1849 // We use the initial-exec TLS model because we do not support the
1850 // variable living anywhere other than in the main executable.
1851 UnsafeStackPtr
= new GlobalVariable(
1852 *M
, StackPtrTy
, false, GlobalValue::ExternalLinkage
, nullptr,
1853 UnsafeStackPtrVar
, nullptr, TLSModel
);
1855 // The variable exists, check its type and attributes.
1856 if (UnsafeStackPtr
->getValueType() != StackPtrTy
)
1857 report_fatal_error(Twine(UnsafeStackPtrVar
) + " must have void* type");
1858 if (UseTLS
!= UnsafeStackPtr
->isThreadLocal())
1859 report_fatal_error(Twine(UnsafeStackPtrVar
) + " must " +
1860 (UseTLS
? "" : "not ") + "be thread-local");
1862 return UnsafeStackPtr
;
1866 TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase
&IRB
) const {
1867 if (!TM
.getTargetTriple().isAndroid())
1868 return getDefaultSafeStackPointerLocation(IRB
, true);
1870 // Android provides a libc function to retrieve the address of the current
1871 // thread's unsafe stack pointer.
1872 Module
*M
= IRB
.GetInsertBlock()->getParent()->getParent();
1873 auto *PtrTy
= PointerType::getUnqual(M
->getContext());
1875 M
->getOrInsertFunction("__safestack_pointer_address", PtrTy
);
1876 return IRB
.CreateCall(Fn
);
1879 //===----------------------------------------------------------------------===//
1880 // Loop Strength Reduction hooks
1881 //===----------------------------------------------------------------------===//
1883 /// isLegalAddressingMode - Return true if the addressing mode represented
1884 /// by AM is legal for this target, for a load/store of the specified type.
1885 bool TargetLoweringBase::isLegalAddressingMode(const DataLayout
&DL
,
1886 const AddrMode
&AM
, Type
*Ty
,
1887 unsigned AS
, Instruction
*I
) const {
1888 // The default implementation of this implements a conservative RISCy, r+r and
1891 // Scalable offsets not supported
1892 if (AM
.ScalableOffset
)
1895 // Allows a sign-extended 16-bit immediate field.
1896 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
1899 // No global is ever allowed as a base.
1903 // Only support r+r,
1905 case 0: // "r+i" or just "i", depending on HasBaseReg.
1908 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
1910 // Otherwise we have r+r or r+i.
1913 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
1915 // Allow 2*r as r+r.
1917 default: // Don't allow n * r
1924 //===----------------------------------------------------------------------===//
1926 //===----------------------------------------------------------------------===//
1928 // For OpenBSD return its special guard variable. Otherwise return nullptr,
1929 // so that SelectionDAG handle SSP.
1930 Value
*TargetLoweringBase::getIRStackGuard(IRBuilderBase
&IRB
) const {
1931 if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
1932 Module
&M
= *IRB
.GetInsertBlock()->getParent()->getParent();
1933 PointerType
*PtrTy
= PointerType::getUnqual(M
.getContext());
1934 Constant
*C
= M
.getOrInsertGlobal("__guard_local", PtrTy
);
1935 if (GlobalVariable
*G
= dyn_cast_or_null
<GlobalVariable
>(C
))
1936 G
->setVisibility(GlobalValue::HiddenVisibility
);
1942 // Currently only support "standard" __stack_chk_guard.
1943 // TODO: add LOAD_STACK_GUARD support.
1944 void TargetLoweringBase::insertSSPDeclarations(Module
&M
) const {
1945 if (!M
.getNamedValue("__stack_chk_guard")) {
1946 auto *GV
= new GlobalVariable(M
, PointerType::getUnqual(M
.getContext()),
1947 false, GlobalVariable::ExternalLinkage
,
1948 nullptr, "__stack_chk_guard");
1950 // FreeBSD has "__stack_chk_guard" defined externally on libc.so
1951 if (M
.getDirectAccessExternalData() &&
1952 !TM
.getTargetTriple().isWindowsGNUEnvironment() &&
1953 !(TM
.getTargetTriple().isPPC64() &&
1954 TM
.getTargetTriple().isOSFreeBSD()) &&
1955 (!TM
.getTargetTriple().isOSDarwin() ||
1956 TM
.getRelocationModel() == Reloc::Static
))
1957 GV
->setDSOLocal(true);
1961 // Currently only support "standard" __stack_chk_guard.
1962 // TODO: add LOAD_STACK_GUARD support.
1963 Value
*TargetLoweringBase::getSDagStackGuard(const Module
&M
) const {
1964 return M
.getNamedValue("__stack_chk_guard");
1967 Function
*TargetLoweringBase::getSSPStackGuardCheck(const Module
&M
) const {
1971 unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
1972 return MinimumJumpTableEntries
;
1975 void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val
) {
1976 MinimumJumpTableEntries
= Val
;
1979 unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize
) const {
1980 return OptForSize
? OptsizeJumpTableDensity
: JumpTableDensity
;
1983 unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
1984 return MaximumJumpTableSize
;
1987 void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val
) {
1988 MaximumJumpTableSize
= Val
;
1991 bool TargetLoweringBase::isJumpTableRelative() const {
1992 return getTargetMachine().isPositionIndependent();
1995 Align
TargetLoweringBase::getPrefLoopAlignment(MachineLoop
*ML
) const {
1996 if (TM
.Options
.LoopAlignment
)
1997 return Align(TM
.Options
.LoopAlignment
);
1998 return PrefLoopAlignment
;
2001 unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment(
2002 MachineBasicBlock
*MBB
) const {
2003 return MaxBytesForAlignment
;
2006 //===----------------------------------------------------------------------===//
2007 // Reciprocal Estimates
2008 //===----------------------------------------------------------------------===//
2010 /// Get the reciprocal estimate attribute string for a function that will
2011 /// override the target defaults.
2012 static StringRef
getRecipEstimateForFunc(MachineFunction
&MF
) {
2013 const Function
&F
= MF
.getFunction();
2014 return F
.getFnAttribute("reciprocal-estimates").getValueAsString();
2017 /// Construct a string for the given reciprocal operation of the given type.
2018 /// This string should match the corresponding option to the front-end's
2019 /// "-mrecip" flag assuming those strings have been passed through in an
2020 /// attribute string. For example, "vec-divf" for a division of a vXf32.
2021 static std::string
getReciprocalOpName(bool IsSqrt
, EVT VT
) {
2022 std::string Name
= VT
.isVector() ? "vec-" : "";
2024 Name
+= IsSqrt
? "sqrt" : "div";
2026 // TODO: Handle other float types?
2027 if (VT
.getScalarType() == MVT::f64
) {
2029 } else if (VT
.getScalarType() == MVT::f16
) {
2032 assert(VT
.getScalarType() == MVT::f32
&&
2033 "Unexpected FP type for reciprocal estimate");
2040 /// Return the character position and value (a single numeric character) of a
2041 /// customized refinement operation in the input string if it exists. Return
2042 /// false if there is no customized refinement step count.
2043 static bool parseRefinementStep(StringRef In
, size_t &Position
,
2045 const char RefStepToken
= ':';
2046 Position
= In
.find(RefStepToken
);
2047 if (Position
== StringRef::npos
)
2050 StringRef RefStepString
= In
.substr(Position
+ 1);
2051 // Allow exactly one numeric character for the additional refinement
2053 if (RefStepString
.size() == 1) {
2054 char RefStepChar
= RefStepString
[0];
2055 if (isDigit(RefStepChar
)) {
2056 Value
= RefStepChar
- '0';
2060 report_fatal_error("Invalid refinement step for -recip.");
2063 /// For the input attribute string, return one of the ReciprocalEstimate enum
2064 /// status values (enabled, disabled, or not specified) for this operation on
2065 /// the specified data type.
2066 static int getOpEnabled(bool IsSqrt
, EVT VT
, StringRef Override
) {
2067 if (Override
.empty())
2068 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2070 SmallVector
<StringRef
, 4> OverrideVector
;
2071 Override
.split(OverrideVector
, ',');
2072 unsigned NumArgs
= OverrideVector
.size();
2074 // Check if "all", "none", or "default" was specified.
2076 // Look for an optional setting of the number of refinement steps needed
2077 // for this type of reciprocal operation.
2080 if (parseRefinementStep(Override
, RefPos
, RefSteps
)) {
2081 // Split the string for further processing.
2082 Override
= Override
.substr(0, RefPos
);
2085 // All reciprocal types are enabled.
2086 if (Override
== "all")
2087 return TargetLoweringBase::ReciprocalEstimate::Enabled
;
2089 // All reciprocal types are disabled.
2090 if (Override
== "none")
2091 return TargetLoweringBase::ReciprocalEstimate::Disabled
;
2093 // Target defaults for enablement are used.
2094 if (Override
== "default")
2095 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2098 // The attribute string may omit the size suffix ('f'/'d').
2099 std::string VTName
= getReciprocalOpName(IsSqrt
, VT
);
2100 std::string VTNameNoSize
= VTName
;
2101 VTNameNoSize
.pop_back();
2102 static const char DisabledPrefix
= '!';
2104 for (StringRef RecipType
: OverrideVector
) {
2107 if (parseRefinementStep(RecipType
, RefPos
, RefSteps
))
2108 RecipType
= RecipType
.substr(0, RefPos
);
2110 // Ignore the disablement token for string matching.
2111 bool IsDisabled
= RecipType
[0] == DisabledPrefix
;
2113 RecipType
= RecipType
.substr(1);
2115 if (RecipType
== VTName
|| RecipType
== VTNameNoSize
)
2116 return IsDisabled
? TargetLoweringBase::ReciprocalEstimate::Disabled
2117 : TargetLoweringBase::ReciprocalEstimate::Enabled
;
2120 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2123 /// For the input attribute string, return the customized refinement step count
2124 /// for this operation on the specified data type. If the step count does not
2125 /// exist, return the ReciprocalEstimate enum value for unspecified.
2126 static int getOpRefinementSteps(bool IsSqrt
, EVT VT
, StringRef Override
) {
2127 if (Override
.empty())
2128 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2130 SmallVector
<StringRef
, 4> OverrideVector
;
2131 Override
.split(OverrideVector
, ',');
2132 unsigned NumArgs
= OverrideVector
.size();
2134 // Check if "all", "default", or "none" was specified.
2136 // Look for an optional setting of the number of refinement steps needed
2137 // for this type of reciprocal operation.
2140 if (!parseRefinementStep(Override
, RefPos
, RefSteps
))
2141 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2143 // Split the string for further processing.
2144 Override
= Override
.substr(0, RefPos
);
2145 assert(Override
!= "none" &&
2146 "Disabled reciprocals, but specifed refinement steps?");
2148 // If this is a general override, return the specified number of steps.
2149 if (Override
== "all" || Override
== "default")
2153 // The attribute string may omit the size suffix ('f'/'d').
2154 std::string VTName
= getReciprocalOpName(IsSqrt
, VT
);
2155 std::string VTNameNoSize
= VTName
;
2156 VTNameNoSize
.pop_back();
2158 for (StringRef RecipType
: OverrideVector
) {
2161 if (!parseRefinementStep(RecipType
, RefPos
, RefSteps
))
2164 RecipType
= RecipType
.substr(0, RefPos
);
2165 if (RecipType
== VTName
|| RecipType
== VTNameNoSize
)
2169 return TargetLoweringBase::ReciprocalEstimate::Unspecified
;
2172 int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT
,
2173 MachineFunction
&MF
) const {
2174 return getOpEnabled(true, VT
, getRecipEstimateForFunc(MF
));
2177 int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT
,
2178 MachineFunction
&MF
) const {
2179 return getOpEnabled(false, VT
, getRecipEstimateForFunc(MF
));
2182 int TargetLoweringBase::getSqrtRefinementSteps(EVT VT
,
2183 MachineFunction
&MF
) const {
2184 return getOpRefinementSteps(true, VT
, getRecipEstimateForFunc(MF
));
2187 int TargetLoweringBase::getDivRefinementSteps(EVT VT
,
2188 MachineFunction
&MF
) const {
2189 return getOpRefinementSteps(false, VT
, getRecipEstimateForFunc(MF
));
2192 bool TargetLoweringBase::isLoadBitCastBeneficial(
2193 EVT LoadVT
, EVT BitcastVT
, const SelectionDAG
&DAG
,
2194 const MachineMemOperand
&MMO
) const {
2195 // Single-element vectors are scalarized, so we should generally avoid having
2196 // any memory operations on such types, as they would get scalarized too.
2197 if (LoadVT
.isFixedLengthVector() && BitcastVT
.isFixedLengthVector() &&
2198 BitcastVT
.getVectorNumElements() == 1)
2201 // Don't do if we could do an indexed load on the original type, but not on
2203 if (!LoadVT
.isSimple() || !BitcastVT
.isSimple())
2206 MVT LoadMVT
= LoadVT
.getSimpleVT();
2208 // Don't bother doing this if it's just going to be promoted again later, as
2209 // doing so might interfere with other combines.
2210 if (getOperationAction(ISD::LOAD
, LoadMVT
) == Promote
&&
2211 getTypeToPromoteTo(ISD::LOAD
, LoadMVT
) == BitcastVT
.getSimpleVT())
2215 return allowsMemoryAccess(*DAG
.getContext(), DAG
.getDataLayout(), BitcastVT
,
2220 void TargetLoweringBase::finalizeLowering(MachineFunction
&MF
) const {
2221 MF
.getRegInfo().freezeReservedRegs();
2224 MachineMemOperand::Flags
TargetLoweringBase::getLoadMemOperandFlags(
2225 const LoadInst
&LI
, const DataLayout
&DL
, AssumptionCache
*AC
,
2226 const TargetLibraryInfo
*LibInfo
) const {
2227 MachineMemOperand::Flags Flags
= MachineMemOperand::MOLoad
;
2228 if (LI
.isVolatile())
2229 Flags
|= MachineMemOperand::MOVolatile
;
2231 if (LI
.hasMetadata(LLVMContext::MD_nontemporal
))
2232 Flags
|= MachineMemOperand::MONonTemporal
;
2234 if (LI
.hasMetadata(LLVMContext::MD_invariant_load
))
2235 Flags
|= MachineMemOperand::MOInvariant
;
2237 if (isDereferenceableAndAlignedPointer(LI
.getPointerOperand(), LI
.getType(),
2238 LI
.getAlign(), DL
, &LI
, AC
,
2239 /*DT=*/nullptr, LibInfo
))
2240 Flags
|= MachineMemOperand::MODereferenceable
;
2242 Flags
|= getTargetMMOFlags(LI
);
2246 MachineMemOperand::Flags
2247 TargetLoweringBase::getStoreMemOperandFlags(const StoreInst
&SI
,
2248 const DataLayout
&DL
) const {
2249 MachineMemOperand::Flags Flags
= MachineMemOperand::MOStore
;
2251 if (SI
.isVolatile())
2252 Flags
|= MachineMemOperand::MOVolatile
;
2254 if (SI
.hasMetadata(LLVMContext::MD_nontemporal
))
2255 Flags
|= MachineMemOperand::MONonTemporal
;
2257 // FIXME: Not preserving dereferenceable
2258 Flags
|= getTargetMMOFlags(SI
);
2262 MachineMemOperand::Flags
2263 TargetLoweringBase::getAtomicMemOperandFlags(const Instruction
&AI
,
2264 const DataLayout
&DL
) const {
2265 auto Flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
;
2267 if (const AtomicRMWInst
*RMW
= dyn_cast
<AtomicRMWInst
>(&AI
)) {
2268 if (RMW
->isVolatile())
2269 Flags
|= MachineMemOperand::MOVolatile
;
2270 } else if (const AtomicCmpXchgInst
*CmpX
= dyn_cast
<AtomicCmpXchgInst
>(&AI
)) {
2271 if (CmpX
->isVolatile())
2272 Flags
|= MachineMemOperand::MOVolatile
;
2274 llvm_unreachable("not an atomic instruction");
2276 // FIXME: Not preserving dereferenceable
2277 Flags
|= getTargetMMOFlags(AI
);
2281 Instruction
*TargetLoweringBase::emitLeadingFence(IRBuilderBase
&Builder
,
2283 AtomicOrdering Ord
) const {
2284 if (isReleaseOrStronger(Ord
) && Inst
->hasAtomicStore())
2285 return Builder
.CreateFence(Ord
);
2290 Instruction
*TargetLoweringBase::emitTrailingFence(IRBuilderBase
&Builder
,
2292 AtomicOrdering Ord
) const {
2293 if (isAcquireOrStronger(Ord
))
2294 return Builder
.CreateFence(Ord
);
2299 //===----------------------------------------------------------------------===//
2301 //===----------------------------------------------------------------------===//
2303 bool TargetLoweringBase::shouldLocalize(const MachineInstr
&MI
,
2304 const TargetTransformInfo
*TTI
) const {
2305 auto &MF
= *MI
.getMF();
2306 auto &MRI
= MF
.getRegInfo();
2307 // Assuming a spill and reload of a value has a cost of 1 instruction each,
2308 // this helper function computes the maximum number of uses we should consider
2309 // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
2310 // break even in terms of code size when the original MI has 2 users vs
2311 // choosing to potentially spill. Any more than 2 users we we have a net code
2312 // size increase. This doesn't take into account register pressure though.
2313 auto maxUses
= [](unsigned RematCost
) {
2314 // A cost of 1 means remats are basically free.
2316 return std::numeric_limits
<unsigned>::max();
2320 // Remat is too expensive, only sink if there's one user.
2323 llvm_unreachable("Unexpected remat cost");
2326 switch (MI
.getOpcode()) {
2329 // Constants-like instructions should be close to their users.
2330 // We don't want long live-ranges for them.
2331 case TargetOpcode::G_CONSTANT
:
2332 case TargetOpcode::G_FCONSTANT
:
2333 case TargetOpcode::G_FRAME_INDEX
:
2334 case TargetOpcode::G_INTTOPTR
:
2336 case TargetOpcode::G_GLOBAL_VALUE
: {
2337 unsigned RematCost
= TTI
->getGISelRematGlobalCost();
2338 Register Reg
= MI
.getOperand(0).getReg();
2339 unsigned MaxUses
= maxUses(RematCost
);
2340 if (MaxUses
== UINT_MAX
)
2341 return true; // Remats are "free" so always localize.
2342 return MRI
.hasAtMostUserInstrs(Reg
, MaxUses
);