[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / AArch64 / AArch64LegalizerInfo.cpp
blob112ee7a6d4779d77c911305d63b883991809f779
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/CodeGen/ValueTypes.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
26 #define DEBUG_TYPE "aarch64-legalinfo"
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
34 using namespace TargetOpcode;
35 const LLT p0 = LLT::pointer(0, 64);
36 const LLT s1 = LLT::scalar(1);
37 const LLT s8 = LLT::scalar(8);
38 const LLT s16 = LLT::scalar(16);
39 const LLT s32 = LLT::scalar(32);
40 const LLT s64 = LLT::scalar(64);
41 const LLT s128 = LLT::scalar(128);
42 const LLT s256 = LLT::scalar(256);
43 const LLT s512 = LLT::scalar(512);
44 const LLT v16s8 = LLT::vector(16, 8);
45 const LLT v8s8 = LLT::vector(8, 8);
46 const LLT v4s8 = LLT::vector(4, 8);
47 const LLT v8s16 = LLT::vector(8, 16);
48 const LLT v4s16 = LLT::vector(4, 16);
49 const LLT v2s16 = LLT::vector(2, 16);
50 const LLT v2s32 = LLT::vector(2, 32);
51 const LLT v4s32 = LLT::vector(4, 32);
52 const LLT v2s64 = LLT::vector(2, 64);
53 const LLT v2p0 = LLT::vector(2, p0);
55 // FIXME: support subtargets which have neon/fp-armv8 disabled.
56 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57 computeTables();
58 return;
61 getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62 .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
63 .clampScalar(0, s1, s64)
64 .widenScalarToNextPow2(0, 8)
65 .fewerElementsIf(
66 [=](const LegalityQuery &Query) {
67 return Query.Types[0].isVector() &&
68 (Query.Types[0].getElementType() != s64 ||
69 Query.Types[0].getNumElements() != 2);
71 [=](const LegalityQuery &Query) {
72 LLT EltTy = Query.Types[0].getElementType();
73 if (EltTy == s64)
74 return std::make_pair(0, LLT::vector(2, 64));
75 return std::make_pair(0, EltTy);
76 });
78 getActionDefinitionsBuilder(G_PHI)
79 .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80 .clampScalar(0, s16, s64)
81 .widenScalarToNextPow2(0);
83 getActionDefinitionsBuilder(G_BSWAP)
84 .legalFor({s32, s64, v4s32, v2s32, v2s64})
85 .clampScalar(0, s16, s64)
86 .widenScalarToNextPow2(0);
88 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89 .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90 .clampScalar(0, s32, s64)
91 .widenScalarToNextPow2(0)
92 .clampNumElements(0, v2s32, v4s32)
93 .clampNumElements(0, v2s64, v2s64)
94 .moreElementsToNextPow2(0);
96 getActionDefinitionsBuilder(G_SHL)
97 .legalFor({{s32, s32}, {s64, s64},
98 {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99 .clampScalar(1, s32, s64)
100 .clampScalar(0, s32, s64)
101 .widenScalarToNextPow2(0)
102 .clampNumElements(0, v2s32, v4s32)
103 .clampNumElements(0, v2s64, v2s64)
104 .moreElementsToNextPow2(0)
105 .minScalarSameAs(1, 0);
107 getActionDefinitionsBuilder(G_GEP)
108 .legalFor({{p0, s64}})
109 .clampScalar(1, s64, s64);
111 getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
113 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114 .legalFor({s32, s64})
115 .clampScalar(0, s32, s64)
116 .widenScalarToNextPow2(0)
117 .scalarize(0);
119 getActionDefinitionsBuilder({G_LSHR, G_ASHR})
120 .customIf([=](const LegalityQuery &Query) {
121 const auto &SrcTy = Query.Types[0];
122 const auto &AmtTy = Query.Types[1];
123 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
124 AmtTy.getSizeInBits() == 32;
126 .legalFor(
127 {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
128 .clampScalar(1, s32, s64)
129 .clampScalar(0, s32, s64)
130 .minScalarSameAs(1, 0);
132 getActionDefinitionsBuilder({G_SREM, G_UREM})
133 .lowerFor({s1, s8, s16, s32, s64});
135 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
136 .lowerFor({{s64, s1}});
138 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
140 getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
141 .legalFor({{s32, s1}, {s64, s1}});
143 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
144 .legalFor({s32, s64, v2s64, v4s32, v2s32});
146 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
148 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
149 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
150 G_FNEARBYINT})
151 // If we don't have full FP16 support, then scalarize the elements of
152 // vectors containing fp16 types.
153 .fewerElementsIf(
154 [=, &ST](const LegalityQuery &Query) {
155 const auto &Ty = Query.Types[0];
156 return Ty.isVector() && Ty.getElementType() == s16 &&
157 !ST.hasFullFP16();
159 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
160 // If we don't have full FP16 support, then widen s16 to s32 if we
161 // encounter it.
162 .widenScalarIf(
163 [=, &ST](const LegalityQuery &Query) {
164 return Query.Types[0] == s16 && !ST.hasFullFP16();
166 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
167 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
169 getActionDefinitionsBuilder(
170 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
171 // We need a call for these, so we always need to scalarize.
172 .scalarize(0)
173 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
174 .minScalar(0, s32)
175 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
177 getActionDefinitionsBuilder(G_INSERT)
178 .unsupportedIf([=](const LegalityQuery &Query) {
179 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
181 .legalIf([=](const LegalityQuery &Query) {
182 const LLT &Ty0 = Query.Types[0];
183 const LLT &Ty1 = Query.Types[1];
184 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
185 return false;
186 return isPowerOf2_32(Ty1.getSizeInBits()) &&
187 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
189 .clampScalar(0, s32, s64)
190 .widenScalarToNextPow2(0)
191 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
192 .maxScalarIf(typeInSet(0, {s64}), 1, s32)
193 .widenScalarToNextPow2(1);
195 getActionDefinitionsBuilder(G_EXTRACT)
196 .unsupportedIf([=](const LegalityQuery &Query) {
197 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
199 .legalIf([=](const LegalityQuery &Query) {
200 const LLT &Ty0 = Query.Types[0];
201 const LLT &Ty1 = Query.Types[1];
202 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
203 return false;
204 if (Ty1 == p0)
205 return true;
206 return isPowerOf2_32(Ty0.getSizeInBits()) &&
207 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
209 .clampScalar(1, s32, s128)
210 .widenScalarToNextPow2(1)
211 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
212 .maxScalarIf(typeInSet(1, {s64}), 0, s32)
213 .widenScalarToNextPow2(0);
215 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
216 .legalForTypesWithMemDesc({{s32, p0, 8, 8},
217 {s32, p0, 16, 8},
218 {s32, p0, 32, 8},
219 {s64, p0, 8, 2},
220 {s64, p0, 16, 2},
221 {s64, p0, 32, 4},
222 {s64, p0, 64, 8},
223 {p0, p0, 64, 8},
224 {v2s32, p0, 64, 8}})
225 .clampScalar(0, s32, s64)
226 .widenScalarToNextPow2(0)
227 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
228 // how to do that yet.
229 .unsupportedIfMemSizeNotPow2()
230 // Lower anything left over into G_*EXT and G_LOAD
231 .lower();
233 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
234 const LLT &ValTy = Query.Types[0];
235 if (!ValTy.isVector())
236 return false;
237 const LLT EltTy = ValTy.getElementType();
238 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
241 getActionDefinitionsBuilder(G_LOAD)
242 .legalForTypesWithMemDesc({{s8, p0, 8, 8},
243 {s16, p0, 16, 8},
244 {s32, p0, 32, 8},
245 {s64, p0, 64, 8},
246 {p0, p0, 64, 8},
247 {s128, p0, 128, 8},
248 {v8s8, p0, 64, 8},
249 {v16s8, p0, 128, 8},
250 {v4s16, p0, 64, 8},
251 {v8s16, p0, 128, 8},
252 {v2s32, p0, 64, 8},
253 {v4s32, p0, 128, 8},
254 {v2s64, p0, 128, 8}})
255 // These extends are also legal
256 .legalForTypesWithMemDesc({{s32, p0, 8, 8},
257 {s32, p0, 16, 8}})
258 .clampScalar(0, s8, s64)
259 .widenScalarToNextPow2(0)
260 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
261 // how to do that yet.
262 .unsupportedIfMemSizeNotPow2()
263 // Lower any any-extending loads left into G_ANYEXT and G_LOAD
264 .lowerIf([=](const LegalityQuery &Query) {
265 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
267 .clampMaxNumElements(0, s32, 2)
268 .clampMaxNumElements(0, s64, 1)
269 .customIf(IsPtrVecPred);
271 getActionDefinitionsBuilder(G_STORE)
272 .legalForTypesWithMemDesc({{s8, p0, 8, 8},
273 {s16, p0, 16, 8},
274 {s32, p0, 32, 8},
275 {s64, p0, 64, 8},
276 {p0, p0, 64, 8},
277 {s128, p0, 128, 8},
278 {v16s8, p0, 128, 8},
279 {v4s16, p0, 64, 8},
280 {v8s16, p0, 128, 8},
281 {v2s32, p0, 64, 8},
282 {v4s32, p0, 128, 8},
283 {v2s64, p0, 128, 8}})
284 .clampScalar(0, s8, s64)
285 .widenScalarToNextPow2(0)
286 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
287 // how to do that yet.
288 .unsupportedIfMemSizeNotPow2()
289 .lowerIf([=](const LegalityQuery &Query) {
290 return Query.Types[0].isScalar() &&
291 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
293 .clampMaxNumElements(0, s32, 2)
294 .clampMaxNumElements(0, s64, 1)
295 .customIf(IsPtrVecPred);
297 // Constants
298 getActionDefinitionsBuilder(G_CONSTANT)
299 .legalFor({p0, s8, s16, s32, s64})
300 .clampScalar(0, s8, s64)
301 .widenScalarToNextPow2(0);
302 getActionDefinitionsBuilder(G_FCONSTANT)
303 .legalFor({s32, s64})
304 .clampScalar(0, s32, s64);
306 getActionDefinitionsBuilder(G_ICMP)
307 .legalFor({{s32, s32},
308 {s32, s64},
309 {s32, p0},
310 {v4s32, v4s32},
311 {v2s32, v2s32},
312 {v2s64, v2s64},
313 {v2s64, v2p0},
314 {v4s16, v4s16},
315 {v8s16, v8s16},
316 {v8s8, v8s8},
317 {v16s8, v16s8}})
318 .clampScalar(1, s32, s64)
319 .clampScalar(0, s32, s32)
320 .minScalarEltSameAsIf(
321 [=](const LegalityQuery &Query) {
322 const LLT &Ty = Query.Types[0];
323 const LLT &SrcTy = Query.Types[1];
324 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
325 Ty.getElementType() != SrcTy.getElementType();
327 0, 1)
328 .minScalarOrEltIf(
329 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
330 1, s32)
331 .minScalarOrEltIf(
332 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
333 s64)
334 .widenScalarOrEltToNextPow2(1);
336 getActionDefinitionsBuilder(G_FCMP)
337 .legalFor({{s32, s32}, {s32, s64}})
338 .clampScalar(0, s32, s32)
339 .clampScalar(1, s32, s64)
340 .widenScalarToNextPow2(1);
342 // Extensions
343 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
344 unsigned DstSize = Query.Types[0].getSizeInBits();
346 if (DstSize == 128 && !Query.Types[0].isVector())
347 return false; // Extending to a scalar s128 is not legal.
349 // Make sure that we have something that will fit in a register, and
350 // make sure it's a power of 2.
351 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
352 return false;
354 const LLT &SrcTy = Query.Types[1];
356 // Special case for s1.
357 if (SrcTy == s1)
358 return true;
360 // Make sure we fit in a register otherwise. Don't bother checking that
361 // the source type is below 128 bits. We shouldn't be allowing anything
362 // through which is wider than the destination in the first place.
363 unsigned SrcSize = SrcTy.getSizeInBits();
364 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
365 return false;
367 return true;
369 getActionDefinitionsBuilder({G_ZEXT, G_ANYEXT}).legalIf(ExtLegalFunc);
370 getActionDefinitionsBuilder(G_SEXT)
371 .legalIf(ExtLegalFunc)
372 .clampScalar(0, s64, s64); // Just for s128, others are handled above.
374 getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
376 // FP conversions
377 getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
378 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
379 getActionDefinitionsBuilder(G_FPEXT).legalFor(
380 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
382 // Conversions
383 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
384 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
385 .clampScalar(0, s32, s64)
386 .widenScalarToNextPow2(0)
387 .clampScalar(1, s32, s64)
388 .widenScalarToNextPow2(1);
390 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
391 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
392 .clampScalar(1, s32, s64)
393 .widenScalarToNextPow2(1)
394 .clampScalar(0, s32, s64)
395 .widenScalarToNextPow2(0);
397 // Control-flow
398 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
399 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
401 // Select
402 // FIXME: We can probably do a bit better than just scalarizing vector
403 // selects.
404 getActionDefinitionsBuilder(G_SELECT)
405 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
406 .clampScalar(0, s32, s64)
407 .widenScalarToNextPow2(0)
408 .scalarize(0);
410 // Pointer-handling
411 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
412 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
414 getActionDefinitionsBuilder(G_PTRTOINT)
415 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
416 .maxScalar(0, s64)
417 .widenScalarToNextPow2(0, /*Min*/ 8);
419 getActionDefinitionsBuilder(G_INTTOPTR)
420 .unsupportedIf([&](const LegalityQuery &Query) {
421 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
423 .legalFor({{p0, s64}});
425 // Casts for 32 and 64-bit width type are just copies.
426 // Same for 128-bit width type, except they are on the FPR bank.
427 getActionDefinitionsBuilder(G_BITCAST)
428 // FIXME: This is wrong since G_BITCAST is not allowed to change the
429 // number of bits but it's what the previous code described and fixing
430 // it breaks tests.
431 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
432 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
433 v2p0});
435 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
437 // va_list must be a pointer, but most sized types are pretty easy to handle
438 // as the destination.
439 getActionDefinitionsBuilder(G_VAARG)
440 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
441 .clampScalar(0, s8, s64)
442 .widenScalarToNextPow2(0, /*Min*/ 8);
444 if (ST.hasLSE()) {
445 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
446 .lowerIf(all(
447 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
448 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
450 getActionDefinitionsBuilder(
451 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
452 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
453 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
454 .legalIf(all(
455 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
456 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
459 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
461 // Merge/Unmerge
462 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
463 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
464 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
466 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
467 const LLT &Ty = Query.Types[TypeIdx];
468 if (Ty.isVector()) {
469 const LLT &EltTy = Ty.getElementType();
470 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
471 return true;
472 if (!isPowerOf2_32(EltTy.getSizeInBits()))
473 return true;
475 return false;
478 // FIXME: This rule is horrible, but specifies the same as what we had
479 // before with the particularly strange definitions removed (e.g.
480 // s8 = G_MERGE_VALUES s32, s32).
481 // Part of the complexity comes from these ops being extremely flexible. For
482 // example, you can build/decompose vectors with it, concatenate vectors,
483 // etc. and in addition to this you can also bitcast with it at the same
484 // time. We've been considering breaking it up into multiple ops to make it
485 // more manageable throughout the backend.
486 getActionDefinitionsBuilder(Op)
487 // Break up vectors with weird elements into scalars
488 .fewerElementsIf(
489 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
490 scalarize(0))
491 .fewerElementsIf(
492 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
493 scalarize(1))
494 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
495 // or 384.
496 .clampScalar(BigTyIdx, s8, s512)
497 .widenScalarIf(
498 [=](const LegalityQuery &Query) {
499 const LLT &Ty = Query.Types[BigTyIdx];
500 return !isPowerOf2_32(Ty.getSizeInBits()) &&
501 Ty.getSizeInBits() % 64 != 0;
503 [=](const LegalityQuery &Query) {
504 // Pick the next power of 2, or a multiple of 64 over 128.
505 // Whichever is smaller.
506 const LLT &Ty = Query.Types[BigTyIdx];
507 unsigned NewSizeInBits = 1
508 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
509 if (NewSizeInBits >= 256) {
510 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
511 if (RoundedTo < NewSizeInBits)
512 NewSizeInBits = RoundedTo;
514 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
516 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
517 // worth considering the multiples of 64 since 2*192 and 2*384 are not
518 // valid.
519 .clampScalar(LitTyIdx, s8, s256)
520 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
521 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
522 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
523 // At this point it's simple enough to accept the legal types.
524 .legalIf([=](const LegalityQuery &Query) {
525 const LLT &BigTy = Query.Types[BigTyIdx];
526 const LLT &LitTy = Query.Types[LitTyIdx];
527 if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
528 return false;
529 if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
530 return false;
531 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
533 // Any vectors left are the wrong size. Scalarize them.
534 .scalarize(0)
535 .scalarize(1);
538 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
539 .unsupportedIf([=](const LegalityQuery &Query) {
540 const LLT &EltTy = Query.Types[1].getElementType();
541 return Query.Types[0] != EltTy;
543 .minScalar(2, s64)
544 .legalIf([=](const LegalityQuery &Query) {
545 const LLT &VecTy = Query.Types[1];
546 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
547 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
550 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
551 .legalIf([=](const LegalityQuery &Query) {
552 const LLT &VecTy = Query.Types[0];
553 // TODO: Support s8 and s16
554 return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
557 getActionDefinitionsBuilder(G_BUILD_VECTOR)
558 .legalFor({{v4s16, s16},
559 {v8s16, s16},
560 {v2s32, s32},
561 {v4s32, s32},
562 {v2p0, p0},
563 {v2s64, s64}})
564 .clampNumElements(0, v4s32, v4s32)
565 .clampNumElements(0, v2s64, v2s64)
567 // Deal with larger scalar types, which will be implicitly truncated.
568 .legalIf([=](const LegalityQuery &Query) {
569 return Query.Types[0].getScalarSizeInBits() <
570 Query.Types[1].getSizeInBits();
572 .minScalarSameAs(1, 0);
574 getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
575 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
576 .scalarize(1);
578 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
579 .legalIf([=](const LegalityQuery &Query) {
580 const LLT &DstTy = Query.Types[0];
581 const LLT &SrcTy = Query.Types[1];
582 // For now just support the TBL2 variant which needs the source vectors
583 // to be the same size as the dest.
584 if (DstTy != SrcTy)
585 return false;
586 for (auto &Ty : {v2s32, v4s32, v2s64}) {
587 if (DstTy == Ty)
588 return true;
590 return false;
592 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
593 // just want those lowered into G_BUILD_VECTOR
594 .lowerIf([=](const LegalityQuery &Query) {
595 return !Query.Types[1].isVector();
597 .clampNumElements(0, v4s32, v4s32)
598 .clampNumElements(0, v2s64, v2s64);
600 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
601 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
603 getActionDefinitionsBuilder(G_JUMP_TABLE)
604 .legalFor({{p0}, {s64}});
606 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
607 return Query.Types[0] == p0 && Query.Types[1] == s64;
610 computeTables();
611 verify(*ST.getInstrInfo());
614 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
615 MachineRegisterInfo &MRI,
616 MachineIRBuilder &MIRBuilder,
617 GISelChangeObserver &Observer) const {
618 switch (MI.getOpcode()) {
619 default:
620 // No idea what to do.
621 return false;
622 case TargetOpcode::G_VAARG:
623 return legalizeVaArg(MI, MRI, MIRBuilder);
624 case TargetOpcode::G_LOAD:
625 case TargetOpcode::G_STORE:
626 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
627 case TargetOpcode::G_SHL:
628 case TargetOpcode::G_ASHR:
629 case TargetOpcode::G_LSHR:
630 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
633 llvm_unreachable("expected switch to return");
636 bool AArch64LegalizerInfo::legalizeIntrinsic(
637 MachineInstr &MI, MachineRegisterInfo &MRI,
638 MachineIRBuilder &MIRBuilder) const {
639 switch (MI.getIntrinsicID()) {
640 case Intrinsic::memcpy:
641 case Intrinsic::memset:
642 case Intrinsic::memmove:
643 if (createMemLibcall(MIRBuilder, MRI, MI) ==
644 LegalizerHelper::UnableToLegalize)
645 return false;
646 MI.eraseFromParent();
647 return true;
648 default:
649 break;
651 return true;
654 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
655 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
656 GISelChangeObserver &Observer) const {
657 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
658 MI.getOpcode() == TargetOpcode::G_LSHR ||
659 MI.getOpcode() == TargetOpcode::G_SHL);
660 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
661 // imported patterns can select it later. Either way, it will be legal.
662 Register AmtReg = MI.getOperand(2).getReg();
663 auto *CstMI = MRI.getVRegDef(AmtReg);
664 assert(CstMI && "expected to find a vreg def");
665 if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
666 return true;
667 // Check the shift amount is in range for an immediate form.
668 unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
669 if (Amount > 31)
670 return true; // This will have to remain a register variant.
671 assert(MRI.getType(AmtReg).getSizeInBits() == 32);
672 MIRBuilder.setInstr(MI);
673 auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
674 MI.getOperand(2).setReg(ExtCst.getReg(0));
675 return true;
678 bool AArch64LegalizerInfo::legalizeLoadStore(
679 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
680 GISelChangeObserver &Observer) const {
681 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
682 MI.getOpcode() == TargetOpcode::G_LOAD);
683 // Here we just try to handle vector loads/stores where our value type might
684 // have pointer elements, which the SelectionDAG importer can't handle. To
685 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
686 // the value to use s64 types.
688 // Custom legalization requires the instruction, if not deleted, must be fully
689 // legalized. In order to allow further legalization of the inst, we create
690 // a new instruction and erase the existing one.
692 unsigned ValReg = MI.getOperand(0).getReg();
693 const LLT ValTy = MRI.getType(ValReg);
695 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
696 ValTy.getElementType().getAddressSpace() != 0) {
697 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
698 return false;
701 MIRBuilder.setInstr(MI);
702 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
703 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
704 auto &MMO = **MI.memoperands_begin();
705 if (MI.getOpcode() == TargetOpcode::G_STORE) {
706 auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
707 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
708 } else {
709 unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
710 auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
711 MIRBuilder.buildBitcast({ValReg}, {NewLoad});
713 MI.eraseFromParent();
714 return true;
717 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
718 MachineRegisterInfo &MRI,
719 MachineIRBuilder &MIRBuilder) const {
720 MIRBuilder.setInstr(MI);
721 MachineFunction &MF = MIRBuilder.getMF();
722 unsigned Align = MI.getOperand(2).getImm();
723 Register Dst = MI.getOperand(0).getReg();
724 Register ListPtr = MI.getOperand(1).getReg();
726 LLT PtrTy = MRI.getType(ListPtr);
727 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
729 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
730 Register List = MRI.createGenericVirtualRegister(PtrTy);
731 MIRBuilder.buildLoad(
732 List, ListPtr,
733 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
734 PtrSize, /* Align = */ PtrSize));
736 Register DstPtr;
737 if (Align > PtrSize) {
738 // Realign the list to the actual required alignment.
739 auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
741 auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
743 DstPtr = MRI.createGenericVirtualRegister(PtrTy);
744 MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
745 } else
746 DstPtr = List;
748 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
749 MIRBuilder.buildLoad(
750 Dst, DstPtr,
751 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
752 ValSize, std::max(Align, PtrSize)));
754 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
756 auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
758 MIRBuilder.buildStore(
759 NewList, ListPtr,
760 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
761 PtrSize, /* Align = */ PtrSize));
763 MI.eraseFromParent();
764 return true;