1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the Machinelegalizer class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/CodeGen/ValueTypes.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
26 #define DEBUG_TYPE "aarch64-legalinfo"
29 using namespace LegalizeActions
;
30 using namespace LegalizeMutations
;
31 using namespace LegalityPredicates
;
33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget
&ST
) {
34 using namespace TargetOpcode
;
35 const LLT p0
= LLT::pointer(0, 64);
36 const LLT s1
= LLT::scalar(1);
37 const LLT s8
= LLT::scalar(8);
38 const LLT s16
= LLT::scalar(16);
39 const LLT s32
= LLT::scalar(32);
40 const LLT s64
= LLT::scalar(64);
41 const LLT s128
= LLT::scalar(128);
42 const LLT s256
= LLT::scalar(256);
43 const LLT s512
= LLT::scalar(512);
44 const LLT v16s8
= LLT::vector(16, 8);
45 const LLT v8s8
= LLT::vector(8, 8);
46 const LLT v4s8
= LLT::vector(4, 8);
47 const LLT v8s16
= LLT::vector(8, 16);
48 const LLT v4s16
= LLT::vector(4, 16);
49 const LLT v2s16
= LLT::vector(2, 16);
50 const LLT v2s32
= LLT::vector(2, 32);
51 const LLT v4s32
= LLT::vector(4, 32);
52 const LLT v2s64
= LLT::vector(2, 64);
53 const LLT v2p0
= LLT::vector(2, p0
);
55 // FIXME: support subtargets which have neon/fp-armv8 disabled.
56 if (!ST
.hasNEON() || !ST
.hasFPARMv8()) {
61 getActionDefinitionsBuilder(G_IMPLICIT_DEF
)
62 .legalFor({p0
, s1
, s8
, s16
, s32
, s64
, v4s32
, v2s64
})
63 .clampScalar(0, s1
, s64
)
64 .widenScalarToNextPow2(0, 8)
66 [=](const LegalityQuery
&Query
) {
67 return Query
.Types
[0].isVector() &&
68 (Query
.Types
[0].getElementType() != s64
||
69 Query
.Types
[0].getNumElements() != 2);
71 [=](const LegalityQuery
&Query
) {
72 LLT EltTy
= Query
.Types
[0].getElementType();
74 return std::make_pair(0, LLT::vector(2, 64));
75 return std::make_pair(0, EltTy
);
78 getActionDefinitionsBuilder(G_PHI
)
79 .legalFor({p0
, s16
, s32
, s64
, v2s32
, v4s32
, v2s64
})
80 .clampScalar(0, s16
, s64
)
81 .widenScalarToNextPow2(0);
83 getActionDefinitionsBuilder(G_BSWAP
)
84 .legalFor({s32
, s64
, v4s32
, v2s32
, v2s64
})
85 .clampScalar(0, s16
, s64
)
86 .widenScalarToNextPow2(0);
88 getActionDefinitionsBuilder({G_ADD
, G_SUB
, G_MUL
, G_AND
, G_OR
, G_XOR
})
89 .legalFor({s32
, s64
, v2s32
, v4s32
, v2s64
, v8s16
, v16s8
})
90 .clampScalar(0, s32
, s64
)
91 .widenScalarToNextPow2(0)
92 .clampNumElements(0, v2s32
, v4s32
)
93 .clampNumElements(0, v2s64
, v2s64
)
94 .moreElementsToNextPow2(0);
96 getActionDefinitionsBuilder(G_SHL
)
97 .legalFor({{s32
, s32
}, {s64
, s64
},
98 {v2s32
, v2s32
}, {v4s32
, v4s32
}, {v2s64
, v2s64
}})
99 .clampScalar(1, s32
, s64
)
100 .clampScalar(0, s32
, s64
)
101 .widenScalarToNextPow2(0)
102 .clampNumElements(0, v2s32
, v4s32
)
103 .clampNumElements(0, v2s64
, v2s64
)
104 .moreElementsToNextPow2(0)
105 .minScalarSameAs(1, 0);
107 getActionDefinitionsBuilder(G_GEP
)
108 .legalFor({{p0
, s64
}})
109 .clampScalar(1, s64
, s64
);
111 getActionDefinitionsBuilder(G_PTR_MASK
).legalFor({p0
});
113 getActionDefinitionsBuilder({G_SDIV
, G_UDIV
})
114 .legalFor({s32
, s64
})
116 .clampScalar(0, s32
, s64
)
117 .widenScalarToNextPow2(0)
120 getActionDefinitionsBuilder({G_LSHR
, G_ASHR
})
121 .customIf([=](const LegalityQuery
&Query
) {
122 const auto &SrcTy
= Query
.Types
[0];
123 const auto &AmtTy
= Query
.Types
[1];
124 return !SrcTy
.isVector() && SrcTy
.getSizeInBits() == 32 &&
125 AmtTy
.getSizeInBits() == 32;
128 {{s32
, s32
}, {s32
, s64
}, {s64
, s64
}, {v2s32
, v2s32
}, {v4s32
, v4s32
}})
129 .clampScalar(1, s32
, s64
)
130 .clampScalar(0, s32
, s64
)
131 .minScalarSameAs(1, 0);
133 getActionDefinitionsBuilder({G_SREM
, G_UREM
})
134 .lowerFor({s1
, s8
, s16
, s32
, s64
});
136 getActionDefinitionsBuilder({G_SMULO
, G_UMULO
})
137 .lowerFor({{s64
, s1
}});
139 getActionDefinitionsBuilder({G_SMULH
, G_UMULH
}).legalFor({s32
, s64
});
141 getActionDefinitionsBuilder({G_UADDE
, G_USUBE
, G_SADDO
, G_SSUBO
, G_UADDO
})
142 .legalFor({{s32
, s1
}, {s64
, s1
}});
144 getActionDefinitionsBuilder({G_FADD
, G_FSUB
, G_FMUL
, G_FDIV
, G_FNEG
})
145 .legalFor({s32
, s64
, v2s64
, v4s32
, v2s32
});
147 getActionDefinitionsBuilder(G_FREM
).libcallFor({s32
, s64
});
149 getActionDefinitionsBuilder({G_FCEIL
, G_FABS
, G_FSQRT
, G_FFLOOR
, G_FRINT
,
150 G_FMA
, G_INTRINSIC_TRUNC
, G_INTRINSIC_ROUND
,
152 // If we don't have full FP16 support, then scalarize the elements of
153 // vectors containing fp16 types.
155 [=, &ST
](const LegalityQuery
&Query
) {
156 const auto &Ty
= Query
.Types
[0];
157 return Ty
.isVector() && Ty
.getElementType() == s16
&&
160 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s16
); })
161 // If we don't have full FP16 support, then widen s16 to s32 if we
164 [=, &ST
](const LegalityQuery
&Query
) {
165 return Query
.Types
[0] == s16
&& !ST
.hasFullFP16();
167 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s32
); })
168 .legalFor({s16
, s32
, s64
, v2s32
, v4s32
, v2s64
, v2s16
, v4s16
, v8s16
});
170 getActionDefinitionsBuilder(
171 {G_FCOS
, G_FSIN
, G_FLOG10
, G_FLOG
, G_FLOG2
, G_FEXP
, G_FEXP2
, G_FPOW
})
172 // We need a call for these, so we always need to scalarize.
174 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
176 .libcallFor({s32
, s64
, v2s32
, v4s32
, v2s64
});
178 getActionDefinitionsBuilder(G_INSERT
)
179 .unsupportedIf([=](const LegalityQuery
&Query
) {
180 return Query
.Types
[0].getSizeInBits() <= Query
.Types
[1].getSizeInBits();
182 .legalIf([=](const LegalityQuery
&Query
) {
183 const LLT
&Ty0
= Query
.Types
[0];
184 const LLT
&Ty1
= Query
.Types
[1];
185 if (Ty0
!= s32
&& Ty0
!= s64
&& Ty0
!= p0
)
187 return isPowerOf2_32(Ty1
.getSizeInBits()) &&
188 (Ty1
.getSizeInBits() == 1 || Ty1
.getSizeInBits() >= 8);
190 .clampScalar(0, s32
, s64
)
191 .widenScalarToNextPow2(0)
192 .maxScalarIf(typeInSet(0, {s32
}), 1, s16
)
193 .maxScalarIf(typeInSet(0, {s64
}), 1, s32
)
194 .widenScalarToNextPow2(1);
196 getActionDefinitionsBuilder(G_EXTRACT
)
197 .unsupportedIf([=](const LegalityQuery
&Query
) {
198 return Query
.Types
[0].getSizeInBits() >= Query
.Types
[1].getSizeInBits();
200 .legalIf([=](const LegalityQuery
&Query
) {
201 const LLT
&Ty0
= Query
.Types
[0];
202 const LLT
&Ty1
= Query
.Types
[1];
203 if (Ty1
!= s32
&& Ty1
!= s64
&& Ty1
!= s128
)
207 return isPowerOf2_32(Ty0
.getSizeInBits()) &&
208 (Ty0
.getSizeInBits() == 1 || Ty0
.getSizeInBits() >= 8);
210 .clampScalar(1, s32
, s128
)
211 .widenScalarToNextPow2(1)
212 .maxScalarIf(typeInSet(1, {s32
}), 0, s16
)
213 .maxScalarIf(typeInSet(1, {s64
}), 0, s32
)
214 .widenScalarToNextPow2(0);
216 getActionDefinitionsBuilder({G_SEXTLOAD
, G_ZEXTLOAD
})
217 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
226 .clampScalar(0, s32
, s64
)
227 .widenScalarToNextPow2(0)
228 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
229 // how to do that yet.
230 .unsupportedIfMemSizeNotPow2()
231 // Lower anything left over into G_*EXT and G_LOAD
234 auto IsPtrVecPred
= [=](const LegalityQuery
&Query
) {
235 const LLT
&ValTy
= Query
.Types
[0];
236 if (!ValTy
.isVector())
238 const LLT EltTy
= ValTy
.getElementType();
239 return EltTy
.isPointer() && EltTy
.getAddressSpace() == 0;
242 getActionDefinitionsBuilder(G_LOAD
)
243 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
255 {v2s64
, p0
, 128, 8}})
256 // These extends are also legal
257 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
259 .clampScalar(0, s8
, s64
)
260 .lowerIfMemSizeNotPow2()
261 // Lower any any-extending loads left into G_ANYEXT and G_LOAD
262 .lowerIf([=](const LegalityQuery
&Query
) {
263 return Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
265 .widenScalarToNextPow2(0)
266 .clampMaxNumElements(0, s32
, 2)
267 .clampMaxNumElements(0, s64
, 1)
268 .customIf(IsPtrVecPred
);
270 getActionDefinitionsBuilder(G_STORE
)
271 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
284 {v2s64
, p0
, 128, 8}})
285 .clampScalar(0, s8
, s64
)
286 .lowerIfMemSizeNotPow2()
287 .lowerIf([=](const LegalityQuery
&Query
) {
288 return Query
.Types
[0].isScalar() &&
289 Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
291 .clampMaxNumElements(0, s32
, 2)
292 .clampMaxNumElements(0, s64
, 1)
293 .customIf(IsPtrVecPred
);
296 getActionDefinitionsBuilder(G_CONSTANT
)
297 .legalFor({p0
, s8
, s16
, s32
, s64
})
298 .clampScalar(0, s8
, s64
)
299 .widenScalarToNextPow2(0);
300 getActionDefinitionsBuilder(G_FCONSTANT
)
301 .legalFor({s32
, s64
})
302 .clampScalar(0, s32
, s64
);
304 getActionDefinitionsBuilder(G_ICMP
)
305 .legalFor({{s32
, s32
},
316 .clampScalar(1, s32
, s64
)
317 .clampScalar(0, s32
, s32
)
318 .minScalarEltSameAsIf(
319 [=](const LegalityQuery
&Query
) {
320 const LLT
&Ty
= Query
.Types
[0];
321 const LLT
&SrcTy
= Query
.Types
[1];
322 return Ty
.isVector() && !SrcTy
.getElementType().isPointer() &&
323 Ty
.getElementType() != SrcTy
.getElementType();
327 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2s16
; },
330 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2p0
; }, 0,
332 .widenScalarOrEltToNextPow2(1);
334 getActionDefinitionsBuilder(G_FCMP
)
335 .legalFor({{s32
, s32
}, {s32
, s64
}})
336 .clampScalar(0, s32
, s32
)
337 .clampScalar(1, s32
, s64
)
338 .widenScalarToNextPow2(1);
341 auto ExtLegalFunc
= [=](const LegalityQuery
&Query
) {
342 unsigned DstSize
= Query
.Types
[0].getSizeInBits();
344 if (DstSize
== 128 && !Query
.Types
[0].isVector())
345 return false; // Extending to a scalar s128 needs narrowing.
347 // Make sure that we have something that will fit in a register, and
348 // make sure it's a power of 2.
349 if (DstSize
< 8 || DstSize
> 128 || !isPowerOf2_32(DstSize
))
352 const LLT
&SrcTy
= Query
.Types
[1];
354 // Special case for s1.
358 // Make sure we fit in a register otherwise. Don't bother checking that
359 // the source type is below 128 bits. We shouldn't be allowing anything
360 // through which is wider than the destination in the first place.
361 unsigned SrcSize
= SrcTy
.getSizeInBits();
362 if (SrcSize
< 8 || !isPowerOf2_32(SrcSize
))
367 getActionDefinitionsBuilder({G_ZEXT
, G_SEXT
, G_ANYEXT
})
368 .legalIf(ExtLegalFunc
)
369 .clampScalar(0, s64
, s64
); // Just for s128, others are handled above.
371 getActionDefinitionsBuilder(G_TRUNC
).alwaysLegal();
373 getActionDefinitionsBuilder(G_SEXT_INREG
).lower();
376 getActionDefinitionsBuilder(G_FPTRUNC
).legalFor(
377 {{s16
, s32
}, {s16
, s64
}, {s32
, s64
}, {v4s16
, v4s32
}, {v2s32
, v2s64
}});
378 getActionDefinitionsBuilder(G_FPEXT
).legalFor(
379 {{s32
, s16
}, {s64
, s16
}, {s64
, s32
}, {v4s32
, v4s16
}, {v2s64
, v2s32
}});
382 getActionDefinitionsBuilder({G_FPTOSI
, G_FPTOUI
})
383 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
384 .clampScalar(0, s32
, s64
)
385 .widenScalarToNextPow2(0)
386 .clampScalar(1, s32
, s64
)
387 .widenScalarToNextPow2(1);
389 getActionDefinitionsBuilder({G_SITOFP
, G_UITOFP
})
390 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
391 .clampScalar(1, s32
, s64
)
392 .widenScalarToNextPow2(1)
393 .clampScalar(0, s32
, s64
)
394 .widenScalarToNextPow2(0);
397 getActionDefinitionsBuilder(G_BRCOND
).legalFor({s1
, s8
, s16
, s32
});
398 getActionDefinitionsBuilder(G_BRINDIRECT
).legalFor({p0
});
401 // FIXME: We can probably do a bit better than just scalarizing vector
403 getActionDefinitionsBuilder(G_SELECT
)
404 .legalFor({{s32
, s1
}, {s64
, s1
}, {p0
, s1
}})
405 .clampScalar(0, s32
, s64
)
406 .widenScalarToNextPow2(0)
410 getActionDefinitionsBuilder(G_FRAME_INDEX
).legalFor({p0
});
411 getActionDefinitionsBuilder(G_GLOBAL_VALUE
).legalFor({p0
});
413 getActionDefinitionsBuilder(G_PTRTOINT
)
414 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
}, {p0
})
416 .widenScalarToNextPow2(0, /*Min*/ 8);
418 getActionDefinitionsBuilder(G_INTTOPTR
)
419 .unsupportedIf([&](const LegalityQuery
&Query
) {
420 return Query
.Types
[0].getSizeInBits() != Query
.Types
[1].getSizeInBits();
422 .legalFor({{p0
, s64
}});
424 // Casts for 32 and 64-bit width type are just copies.
425 // Same for 128-bit width type, except they are on the FPR bank.
426 getActionDefinitionsBuilder(G_BITCAST
)
427 // FIXME: This is wrong since G_BITCAST is not allowed to change the
428 // number of bits but it's what the previous code described and fixing
430 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
, s128
, v16s8
, v8s8
, v4s8
,
431 v8s16
, v4s16
, v2s16
, v4s32
, v2s32
, v2s64
,
434 getActionDefinitionsBuilder(G_VASTART
).legalFor({p0
});
436 // va_list must be a pointer, but most sized types are pretty easy to handle
437 // as the destination.
438 getActionDefinitionsBuilder(G_VAARG
)
439 .customForCartesianProduct({s8
, s16
, s32
, s64
, p0
}, {p0
})
440 .clampScalar(0, s8
, s64
)
441 .widenScalarToNextPow2(0, /*Min*/ 8);
444 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS
)
446 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, s1
), typeIs(2, p0
),
447 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
449 getActionDefinitionsBuilder(
450 {G_ATOMICRMW_XCHG
, G_ATOMICRMW_ADD
, G_ATOMICRMW_SUB
, G_ATOMICRMW_AND
,
451 G_ATOMICRMW_OR
, G_ATOMICRMW_XOR
, G_ATOMICRMW_MIN
, G_ATOMICRMW_MAX
,
452 G_ATOMICRMW_UMIN
, G_ATOMICRMW_UMAX
, G_ATOMIC_CMPXCHG
})
454 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, p0
),
455 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
458 getActionDefinitionsBuilder(G_BLOCK_ADDR
).legalFor({p0
});
461 for (unsigned Op
: {G_MERGE_VALUES
, G_UNMERGE_VALUES
}) {
462 unsigned BigTyIdx
= Op
== G_MERGE_VALUES
? 0 : 1;
463 unsigned LitTyIdx
= Op
== G_MERGE_VALUES
? 1 : 0;
465 auto notValidElt
= [](const LegalityQuery
&Query
, unsigned TypeIdx
) {
466 const LLT
&Ty
= Query
.Types
[TypeIdx
];
468 const LLT
&EltTy
= Ty
.getElementType();
469 if (EltTy
.getSizeInBits() < 8 || EltTy
.getSizeInBits() > 64)
471 if (!isPowerOf2_32(EltTy
.getSizeInBits()))
477 // FIXME: This rule is horrible, but specifies the same as what we had
478 // before with the particularly strange definitions removed (e.g.
479 // s8 = G_MERGE_VALUES s32, s32).
480 // Part of the complexity comes from these ops being extremely flexible. For
481 // example, you can build/decompose vectors with it, concatenate vectors,
482 // etc. and in addition to this you can also bitcast with it at the same
483 // time. We've been considering breaking it up into multiple ops to make it
484 // more manageable throughout the backend.
485 getActionDefinitionsBuilder(Op
)
486 // Break up vectors with weird elements into scalars
488 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 0); },
491 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 1); },
493 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
495 .clampScalar(BigTyIdx
, s8
, s512
)
497 [=](const LegalityQuery
&Query
) {
498 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
499 return !isPowerOf2_32(Ty
.getSizeInBits()) &&
500 Ty
.getSizeInBits() % 64 != 0;
502 [=](const LegalityQuery
&Query
) {
503 // Pick the next power of 2, or a multiple of 64 over 128.
504 // Whichever is smaller.
505 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
506 unsigned NewSizeInBits
= 1
507 << Log2_32_Ceil(Ty
.getSizeInBits() + 1);
508 if (NewSizeInBits
>= 256) {
509 unsigned RoundedTo
= alignTo
<64>(Ty
.getSizeInBits() + 1);
510 if (RoundedTo
< NewSizeInBits
)
511 NewSizeInBits
= RoundedTo
;
513 return std::make_pair(BigTyIdx
, LLT::scalar(NewSizeInBits
));
515 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
516 // worth considering the multiples of 64 since 2*192 and 2*384 are not
518 .clampScalar(LitTyIdx
, s8
, s256
)
519 .widenScalarToNextPow2(LitTyIdx
, /*Min*/ 8)
520 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
521 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
522 // At this point it's simple enough to accept the legal types.
523 .legalIf([=](const LegalityQuery
&Query
) {
524 const LLT
&BigTy
= Query
.Types
[BigTyIdx
];
525 const LLT
&LitTy
= Query
.Types
[LitTyIdx
];
526 if (BigTy
.isVector() && BigTy
.getSizeInBits() < 32)
528 if (LitTy
.isVector() && LitTy
.getSizeInBits() < 32)
530 return BigTy
.getSizeInBits() % LitTy
.getSizeInBits() == 0;
532 // Any vectors left are the wrong size. Scalarize them.
537 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT
)
538 .unsupportedIf([=](const LegalityQuery
&Query
) {
539 const LLT
&EltTy
= Query
.Types
[1].getElementType();
540 return Query
.Types
[0] != EltTy
;
543 .legalIf([=](const LegalityQuery
&Query
) {
544 const LLT
&VecTy
= Query
.Types
[1];
545 return VecTy
== v2s16
|| VecTy
== v4s16
|| VecTy
== v8s16
||
546 VecTy
== v4s32
|| VecTy
== v2s64
|| VecTy
== v2s32
;
549 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT
)
550 .legalIf([=](const LegalityQuery
&Query
) {
551 const LLT
&VecTy
= Query
.Types
[0];
552 // TODO: Support s8 and s16
553 return VecTy
== v2s32
|| VecTy
== v4s32
|| VecTy
== v2s64
;
556 getActionDefinitionsBuilder(G_BUILD_VECTOR
)
557 .legalFor({{v4s16
, s16
},
563 .clampNumElements(0, v4s32
, v4s32
)
564 .clampNumElements(0, v2s64
, v2s64
)
566 // Deal with larger scalar types, which will be implicitly truncated.
567 .legalIf([=](const LegalityQuery
&Query
) {
568 return Query
.Types
[0].getScalarSizeInBits() <
569 Query
.Types
[1].getSizeInBits();
571 .minScalarSameAs(1, 0);
573 getActionDefinitionsBuilder(G_CTLZ
).legalForCartesianProduct(
574 {s32
, s64
, v8s8
, v16s8
, v4s16
, v8s16
, v2s32
, v4s32
})
577 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR
)
578 .legalIf([=](const LegalityQuery
&Query
) {
579 const LLT
&DstTy
= Query
.Types
[0];
580 const LLT
&SrcTy
= Query
.Types
[1];
581 // For now just support the TBL2 variant which needs the source vectors
582 // to be the same size as the dest.
585 for (auto &Ty
: {v2s32
, v4s32
, v2s64
}) {
591 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
592 // just want those lowered into G_BUILD_VECTOR
593 .lowerIf([=](const LegalityQuery
&Query
) {
594 return !Query
.Types
[1].isVector();
596 .clampNumElements(0, v4s32
, v4s32
)
597 .clampNumElements(0, v2s64
, v2s64
);
599 getActionDefinitionsBuilder(G_CONCAT_VECTORS
)
600 .legalFor({{v4s32
, v2s32
}, {v8s16
, v4s16
}});
602 getActionDefinitionsBuilder(G_JUMP_TABLE
)
603 .legalFor({{p0
}, {s64
}});
605 getActionDefinitionsBuilder(G_BRJT
).legalIf([=](const LegalityQuery
&Query
) {
606 return Query
.Types
[0] == p0
&& Query
.Types
[1] == s64
;
609 getActionDefinitionsBuilder(G_DYN_STACKALLOC
).lower();
612 verify(*ST
.getInstrInfo());
615 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr
&MI
,
616 MachineRegisterInfo
&MRI
,
617 MachineIRBuilder
&MIRBuilder
,
618 GISelChangeObserver
&Observer
) const {
619 switch (MI
.getOpcode()) {
621 // No idea what to do.
623 case TargetOpcode::G_VAARG
:
624 return legalizeVaArg(MI
, MRI
, MIRBuilder
);
625 case TargetOpcode::G_LOAD
:
626 case TargetOpcode::G_STORE
:
627 return legalizeLoadStore(MI
, MRI
, MIRBuilder
, Observer
);
628 case TargetOpcode::G_SHL
:
629 case TargetOpcode::G_ASHR
:
630 case TargetOpcode::G_LSHR
:
631 return legalizeShlAshrLshr(MI
, MRI
, MIRBuilder
, Observer
);
634 llvm_unreachable("expected switch to return");
637 bool AArch64LegalizerInfo::legalizeIntrinsic(
638 MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
639 MachineIRBuilder
&MIRBuilder
) const {
640 switch (MI
.getIntrinsicID()) {
641 case Intrinsic::memcpy
:
642 case Intrinsic::memset
:
643 case Intrinsic::memmove
:
644 if (createMemLibcall(MIRBuilder
, MRI
, MI
) ==
645 LegalizerHelper::UnableToLegalize
)
647 MI
.eraseFromParent();
655 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
656 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
657 GISelChangeObserver
&Observer
) const {
658 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
||
659 MI
.getOpcode() == TargetOpcode::G_LSHR
||
660 MI
.getOpcode() == TargetOpcode::G_SHL
);
661 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
662 // imported patterns can select it later. Either way, it will be legal.
663 Register AmtReg
= MI
.getOperand(2).getReg();
664 auto *CstMI
= MRI
.getVRegDef(AmtReg
);
665 assert(CstMI
&& "expected to find a vreg def");
666 if (CstMI
->getOpcode() != TargetOpcode::G_CONSTANT
)
668 // Check the shift amount is in range for an immediate form.
669 unsigned Amount
= CstMI
->getOperand(1).getCImm()->getZExtValue();
671 return true; // This will have to remain a register variant.
672 assert(MRI
.getType(AmtReg
).getSizeInBits() == 32);
673 MIRBuilder
.setInstr(MI
);
674 auto ExtCst
= MIRBuilder
.buildZExt(LLT::scalar(64), AmtReg
);
675 MI
.getOperand(2).setReg(ExtCst
.getReg(0));
679 bool AArch64LegalizerInfo::legalizeLoadStore(
680 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
681 GISelChangeObserver
&Observer
) const {
682 assert(MI
.getOpcode() == TargetOpcode::G_STORE
||
683 MI
.getOpcode() == TargetOpcode::G_LOAD
);
684 // Here we just try to handle vector loads/stores where our value type might
685 // have pointer elements, which the SelectionDAG importer can't handle. To
686 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
687 // the value to use s64 types.
689 // Custom legalization requires the instruction, if not deleted, must be fully
690 // legalized. In order to allow further legalization of the inst, we create
691 // a new instruction and erase the existing one.
693 Register ValReg
= MI
.getOperand(0).getReg();
694 const LLT ValTy
= MRI
.getType(ValReg
);
696 if (!ValTy
.isVector() || !ValTy
.getElementType().isPointer() ||
697 ValTy
.getElementType().getAddressSpace() != 0) {
698 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
702 MIRBuilder
.setInstr(MI
);
703 unsigned PtrSize
= ValTy
.getElementType().getSizeInBits();
704 const LLT NewTy
= LLT::vector(ValTy
.getNumElements(), PtrSize
);
705 auto &MMO
= **MI
.memoperands_begin();
706 if (MI
.getOpcode() == TargetOpcode::G_STORE
) {
707 auto Bitcast
= MIRBuilder
.buildBitcast({NewTy
}, {ValReg
});
708 MIRBuilder
.buildStore(Bitcast
.getReg(0), MI
.getOperand(1).getReg(), MMO
);
710 Register NewReg
= MRI
.createGenericVirtualRegister(NewTy
);
711 auto NewLoad
= MIRBuilder
.buildLoad(NewReg
, MI
.getOperand(1).getReg(), MMO
);
712 MIRBuilder
.buildBitcast({ValReg
}, {NewLoad
});
714 MI
.eraseFromParent();
718 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr
&MI
,
719 MachineRegisterInfo
&MRI
,
720 MachineIRBuilder
&MIRBuilder
) const {
721 MIRBuilder
.setInstr(MI
);
722 MachineFunction
&MF
= MIRBuilder
.getMF();
723 unsigned Align
= MI
.getOperand(2).getImm();
724 Register Dst
= MI
.getOperand(0).getReg();
725 Register ListPtr
= MI
.getOperand(1).getReg();
727 LLT PtrTy
= MRI
.getType(ListPtr
);
728 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
730 const unsigned PtrSize
= PtrTy
.getSizeInBits() / 8;
731 Register List
= MRI
.createGenericVirtualRegister(PtrTy
);
732 MIRBuilder
.buildLoad(
734 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
735 PtrSize
, /* Align = */ PtrSize
));
738 if (Align
> PtrSize
) {
739 // Realign the list to the actual required alignment.
740 auto AlignMinus1
= MIRBuilder
.buildConstant(IntPtrTy
, Align
- 1);
742 auto ListTmp
= MIRBuilder
.buildGEP(PtrTy
, List
, AlignMinus1
.getReg(0));
744 DstPtr
= MRI
.createGenericVirtualRegister(PtrTy
);
745 MIRBuilder
.buildPtrMask(DstPtr
, ListTmp
, Log2_64(Align
));
749 uint64_t ValSize
= MRI
.getType(Dst
).getSizeInBits() / 8;
750 MIRBuilder
.buildLoad(
752 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
753 ValSize
, std::max(Align
, PtrSize
)));
755 auto Size
= MIRBuilder
.buildConstant(IntPtrTy
, alignTo(ValSize
, PtrSize
));
757 auto NewList
= MIRBuilder
.buildGEP(PtrTy
, DstPtr
, Size
.getReg(0));
759 MIRBuilder
.buildStore(
761 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore
,
762 PtrSize
, /* Align = */ PtrSize
));
764 MI
.eraseFromParent();