1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the Machinelegalizer class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/CodeGen/ValueTypes.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
26 #define DEBUG_TYPE "aarch64-legalinfo"
29 using namespace LegalizeActions
;
30 using namespace LegalizeMutations
;
31 using namespace LegalityPredicates
;
33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget
&ST
) {
34 using namespace TargetOpcode
;
35 const LLT p0
= LLT::pointer(0, 64);
36 const LLT s1
= LLT::scalar(1);
37 const LLT s8
= LLT::scalar(8);
38 const LLT s16
= LLT::scalar(16);
39 const LLT s32
= LLT::scalar(32);
40 const LLT s64
= LLT::scalar(64);
41 const LLT s128
= LLT::scalar(128);
42 const LLT s256
= LLT::scalar(256);
43 const LLT s512
= LLT::scalar(512);
44 const LLT v16s8
= LLT::vector(16, 8);
45 const LLT v8s8
= LLT::vector(8, 8);
46 const LLT v4s8
= LLT::vector(4, 8);
47 const LLT v8s16
= LLT::vector(8, 16);
48 const LLT v4s16
= LLT::vector(4, 16);
49 const LLT v2s16
= LLT::vector(2, 16);
50 const LLT v2s32
= LLT::vector(2, 32);
51 const LLT v4s32
= LLT::vector(4, 32);
52 const LLT v2s64
= LLT::vector(2, 64);
53 const LLT v2p0
= LLT::vector(2, p0
);
55 // FIXME: support subtargets which have neon/fp-armv8 disabled.
56 if (!ST
.hasNEON() || !ST
.hasFPARMv8()) {
61 getActionDefinitionsBuilder(G_IMPLICIT_DEF
)
62 .legalFor({p0
, s1
, s8
, s16
, s32
, s64
, v4s32
, v2s64
})
63 .clampScalar(0, s1
, s64
)
64 .widenScalarToNextPow2(0, 8)
66 [=](const LegalityQuery
&Query
) {
67 return Query
.Types
[0].isVector() &&
68 (Query
.Types
[0].getElementType() != s64
||
69 Query
.Types
[0].getNumElements() != 2);
71 [=](const LegalityQuery
&Query
) {
72 LLT EltTy
= Query
.Types
[0].getElementType();
74 return std::make_pair(0, LLT::vector(2, 64));
75 return std::make_pair(0, EltTy
);
78 getActionDefinitionsBuilder(G_PHI
)
79 .legalFor({p0
, s16
, s32
, s64
, v2s32
, v4s32
, v2s64
})
80 .clampScalar(0, s16
, s64
)
81 .widenScalarToNextPow2(0);
83 getActionDefinitionsBuilder(G_BSWAP
)
84 .legalFor({s32
, s64
, v4s32
, v2s32
, v2s64
})
85 .clampScalar(0, s32
, s64
)
86 .widenScalarToNextPow2(0);
88 getActionDefinitionsBuilder({G_ADD
, G_SUB
, G_MUL
, G_AND
, G_OR
, G_XOR
})
89 .legalFor({s32
, s64
, v2s32
, v4s32
, v2s64
, v8s16
, v16s8
})
90 .clampScalar(0, s32
, s64
)
91 .widenScalarToNextPow2(0)
92 .clampNumElements(0, v2s32
, v4s32
)
93 .clampNumElements(0, v2s64
, v2s64
)
94 .moreElementsToNextPow2(0);
96 getActionDefinitionsBuilder(G_SHL
)
97 .legalFor({{s32
, s32
}, {s64
, s64
},
98 {v2s32
, v2s32
}, {v4s32
, v4s32
}, {v2s64
, v2s64
}})
99 .clampScalar(1, s32
, s64
)
100 .clampScalar(0, s32
, s64
)
101 .widenScalarToNextPow2(0)
102 .clampNumElements(0, v2s32
, v4s32
)
103 .clampNumElements(0, v2s64
, v2s64
)
104 .moreElementsToNextPow2(0)
105 .minScalarSameAs(1, 0);
107 getActionDefinitionsBuilder(G_GEP
)
108 .legalFor({{p0
, s64
}})
109 .clampScalar(1, s64
, s64
);
111 getActionDefinitionsBuilder(G_PTR_MASK
).legalFor({p0
});
113 getActionDefinitionsBuilder({G_SDIV
, G_UDIV
})
114 .legalFor({s32
, s64
})
116 .clampScalar(0, s32
, s64
)
117 .widenScalarToNextPow2(0)
120 getActionDefinitionsBuilder({G_LSHR
, G_ASHR
})
121 .customIf([=](const LegalityQuery
&Query
) {
122 const auto &SrcTy
= Query
.Types
[0];
123 const auto &AmtTy
= Query
.Types
[1];
124 return !SrcTy
.isVector() && SrcTy
.getSizeInBits() == 32 &&
125 AmtTy
.getSizeInBits() == 32;
127 .legalFor({{s32
, s32
},
133 .clampScalar(1, s32
, s64
)
134 .clampScalar(0, s32
, s64
)
135 .minScalarSameAs(1, 0);
137 getActionDefinitionsBuilder({G_SREM
, G_UREM
})
138 .lowerFor({s1
, s8
, s16
, s32
, s64
});
140 getActionDefinitionsBuilder({G_SMULO
, G_UMULO
})
141 .lowerFor({{s64
, s1
}});
143 getActionDefinitionsBuilder({G_SMULH
, G_UMULH
}).legalFor({s32
, s64
});
145 getActionDefinitionsBuilder({G_UADDE
, G_USUBE
, G_SADDO
, G_SSUBO
, G_UADDO
})
146 .legalFor({{s32
, s1
}, {s64
, s1
}});
148 getActionDefinitionsBuilder({G_FADD
, G_FSUB
, G_FMUL
, G_FDIV
, G_FNEG
})
149 .legalFor({s32
, s64
, v2s64
, v4s32
, v2s32
});
151 getActionDefinitionsBuilder(G_FREM
).libcallFor({s32
, s64
});
153 getActionDefinitionsBuilder({G_FCEIL
, G_FABS
, G_FSQRT
, G_FFLOOR
, G_FRINT
,
154 G_FMA
, G_INTRINSIC_TRUNC
, G_INTRINSIC_ROUND
,
156 // If we don't have full FP16 support, then scalarize the elements of
157 // vectors containing fp16 types.
159 [=, &ST
](const LegalityQuery
&Query
) {
160 const auto &Ty
= Query
.Types
[0];
161 return Ty
.isVector() && Ty
.getElementType() == s16
&&
164 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s16
); })
165 // If we don't have full FP16 support, then widen s16 to s32 if we
168 [=, &ST
](const LegalityQuery
&Query
) {
169 return Query
.Types
[0] == s16
&& !ST
.hasFullFP16();
171 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s32
); })
172 .legalFor({s16
, s32
, s64
, v2s32
, v4s32
, v2s64
, v2s16
, v4s16
, v8s16
});
174 getActionDefinitionsBuilder(
175 {G_FCOS
, G_FSIN
, G_FLOG10
, G_FLOG
, G_FLOG2
, G_FEXP
, G_FEXP2
, G_FPOW
})
176 // We need a call for these, so we always need to scalarize.
178 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
180 .libcallFor({s32
, s64
, v2s32
, v4s32
, v2s64
});
182 getActionDefinitionsBuilder(G_INSERT
)
183 .unsupportedIf([=](const LegalityQuery
&Query
) {
184 return Query
.Types
[0].getSizeInBits() <= Query
.Types
[1].getSizeInBits();
186 .legalIf([=](const LegalityQuery
&Query
) {
187 const LLT
&Ty0
= Query
.Types
[0];
188 const LLT
&Ty1
= Query
.Types
[1];
189 if (Ty0
!= s32
&& Ty0
!= s64
&& Ty0
!= p0
)
191 return isPowerOf2_32(Ty1
.getSizeInBits()) &&
192 (Ty1
.getSizeInBits() == 1 || Ty1
.getSizeInBits() >= 8);
194 .clampScalar(0, s32
, s64
)
195 .widenScalarToNextPow2(0)
196 .maxScalarIf(typeInSet(0, {s32
}), 1, s16
)
197 .maxScalarIf(typeInSet(0, {s64
}), 1, s32
)
198 .widenScalarToNextPow2(1);
200 getActionDefinitionsBuilder(G_EXTRACT
)
201 .unsupportedIf([=](const LegalityQuery
&Query
) {
202 return Query
.Types
[0].getSizeInBits() >= Query
.Types
[1].getSizeInBits();
204 .legalIf([=](const LegalityQuery
&Query
) {
205 const LLT
&Ty0
= Query
.Types
[0];
206 const LLT
&Ty1
= Query
.Types
[1];
207 if (Ty1
!= s32
&& Ty1
!= s64
&& Ty1
!= s128
)
211 return isPowerOf2_32(Ty0
.getSizeInBits()) &&
212 (Ty0
.getSizeInBits() == 1 || Ty0
.getSizeInBits() >= 8);
214 .clampScalar(1, s32
, s128
)
215 .widenScalarToNextPow2(1)
216 .maxScalarIf(typeInSet(1, {s32
}), 0, s16
)
217 .maxScalarIf(typeInSet(1, {s64
}), 0, s32
)
218 .widenScalarToNextPow2(0);
220 getActionDefinitionsBuilder({G_SEXTLOAD
, G_ZEXTLOAD
})
221 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
230 .clampScalar(0, s32
, s64
)
231 .widenScalarToNextPow2(0)
232 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
233 // how to do that yet.
234 .unsupportedIfMemSizeNotPow2()
235 // Lower anything left over into G_*EXT and G_LOAD
238 auto IsPtrVecPred
= [=](const LegalityQuery
&Query
) {
239 const LLT
&ValTy
= Query
.Types
[0];
240 if (!ValTy
.isVector())
242 const LLT EltTy
= ValTy
.getElementType();
243 return EltTy
.isPointer() && EltTy
.getAddressSpace() == 0;
246 getActionDefinitionsBuilder(G_LOAD
)
247 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
259 {v2s64
, p0
, 128, 8}})
260 // These extends are also legal
261 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
263 .clampScalar(0, s8
, s64
)
264 .lowerIfMemSizeNotPow2()
265 // Lower any any-extending loads left into G_ANYEXT and G_LOAD
266 .lowerIf([=](const LegalityQuery
&Query
) {
267 return Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
269 .widenScalarToNextPow2(0)
270 .clampMaxNumElements(0, s32
, 2)
271 .clampMaxNumElements(0, s64
, 1)
272 .customIf(IsPtrVecPred
);
274 getActionDefinitionsBuilder(G_STORE
)
275 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
288 {v2s64
, p0
, 128, 8}})
289 .clampScalar(0, s8
, s64
)
290 .lowerIfMemSizeNotPow2()
291 .lowerIf([=](const LegalityQuery
&Query
) {
292 return Query
.Types
[0].isScalar() &&
293 Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
295 .clampMaxNumElements(0, s32
, 2)
296 .clampMaxNumElements(0, s64
, 1)
297 .customIf(IsPtrVecPred
);
300 getActionDefinitionsBuilder(G_CONSTANT
)
301 .legalFor({p0
, s8
, s16
, s32
, s64
})
302 .clampScalar(0, s8
, s64
)
303 .widenScalarToNextPow2(0);
304 getActionDefinitionsBuilder(G_FCONSTANT
)
305 .legalFor({s32
, s64
})
306 .clampScalar(0, s32
, s64
);
308 getActionDefinitionsBuilder(G_ICMP
)
309 .legalFor({{s32
, s32
},
320 .clampScalar(1, s32
, s64
)
321 .clampScalar(0, s32
, s32
)
322 .minScalarEltSameAsIf(
323 [=](const LegalityQuery
&Query
) {
324 const LLT
&Ty
= Query
.Types
[0];
325 const LLT
&SrcTy
= Query
.Types
[1];
326 return Ty
.isVector() && !SrcTy
.getElementType().isPointer() &&
327 Ty
.getElementType() != SrcTy
.getElementType();
331 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2s16
; },
334 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2p0
; }, 0,
336 .widenScalarOrEltToNextPow2(1);
338 getActionDefinitionsBuilder(G_FCMP
)
339 .legalFor({{s32
, s32
}, {s32
, s64
}})
340 .clampScalar(0, s32
, s32
)
341 .clampScalar(1, s32
, s64
)
342 .widenScalarToNextPow2(1);
345 auto ExtLegalFunc
= [=](const LegalityQuery
&Query
) {
346 unsigned DstSize
= Query
.Types
[0].getSizeInBits();
348 if (DstSize
== 128 && !Query
.Types
[0].isVector())
349 return false; // Extending to a scalar s128 needs narrowing.
351 // Make sure that we have something that will fit in a register, and
352 // make sure it's a power of 2.
353 if (DstSize
< 8 || DstSize
> 128 || !isPowerOf2_32(DstSize
))
356 const LLT
&SrcTy
= Query
.Types
[1];
358 // Special case for s1.
362 // Make sure we fit in a register otherwise. Don't bother checking that
363 // the source type is below 128 bits. We shouldn't be allowing anything
364 // through which is wider than the destination in the first place.
365 unsigned SrcSize
= SrcTy
.getSizeInBits();
366 if (SrcSize
< 8 || !isPowerOf2_32(SrcSize
))
371 getActionDefinitionsBuilder({G_ZEXT
, G_SEXT
, G_ANYEXT
})
372 .legalIf(ExtLegalFunc
)
373 .clampScalar(0, s64
, s64
); // Just for s128, others are handled above.
375 getActionDefinitionsBuilder(G_TRUNC
).alwaysLegal();
377 getActionDefinitionsBuilder(G_SEXT_INREG
).lower();
380 getActionDefinitionsBuilder(G_FPTRUNC
).legalFor(
381 {{s16
, s32
}, {s16
, s64
}, {s32
, s64
}, {v4s16
, v4s32
}, {v2s32
, v2s64
}});
382 getActionDefinitionsBuilder(G_FPEXT
).legalFor(
383 {{s32
, s16
}, {s64
, s16
}, {s64
, s32
}, {v4s32
, v4s16
}, {v2s64
, v2s32
}});
386 getActionDefinitionsBuilder({G_FPTOSI
, G_FPTOUI
})
387 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
388 .clampScalar(0, s32
, s64
)
389 .widenScalarToNextPow2(0)
390 .clampScalar(1, s32
, s64
)
391 .widenScalarToNextPow2(1);
393 getActionDefinitionsBuilder({G_SITOFP
, G_UITOFP
})
394 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
395 .clampScalar(1, s32
, s64
)
396 .widenScalarToNextPow2(1)
397 .clampScalar(0, s32
, s64
)
398 .widenScalarToNextPow2(0);
401 getActionDefinitionsBuilder(G_BRCOND
).legalFor({s1
, s8
, s16
, s32
});
402 getActionDefinitionsBuilder(G_BRINDIRECT
).legalFor({p0
});
405 // FIXME: We can probably do a bit better than just scalarizing vector
407 getActionDefinitionsBuilder(G_SELECT
)
408 .legalFor({{s32
, s1
}, {s64
, s1
}, {p0
, s1
}})
409 .clampScalar(0, s32
, s64
)
410 .widenScalarToNextPow2(0)
414 getActionDefinitionsBuilder(G_FRAME_INDEX
).legalFor({p0
});
415 getActionDefinitionsBuilder(G_GLOBAL_VALUE
).legalFor({p0
});
417 getActionDefinitionsBuilder(G_PTRTOINT
)
418 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
}, {p0
})
420 .widenScalarToNextPow2(0, /*Min*/ 8);
422 getActionDefinitionsBuilder(G_INTTOPTR
)
423 .unsupportedIf([&](const LegalityQuery
&Query
) {
424 return Query
.Types
[0].getSizeInBits() != Query
.Types
[1].getSizeInBits();
426 .legalFor({{p0
, s64
}});
428 // Casts for 32 and 64-bit width type are just copies.
429 // Same for 128-bit width type, except they are on the FPR bank.
430 getActionDefinitionsBuilder(G_BITCAST
)
431 // FIXME: This is wrong since G_BITCAST is not allowed to change the
432 // number of bits but it's what the previous code described and fixing
434 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
, s128
, v16s8
, v8s8
, v4s8
,
435 v8s16
, v4s16
, v2s16
, v4s32
, v2s32
, v2s64
,
438 getActionDefinitionsBuilder(G_VASTART
).legalFor({p0
});
440 // va_list must be a pointer, but most sized types are pretty easy to handle
441 // as the destination.
442 getActionDefinitionsBuilder(G_VAARG
)
443 .customForCartesianProduct({s8
, s16
, s32
, s64
, p0
}, {p0
})
444 .clampScalar(0, s8
, s64
)
445 .widenScalarToNextPow2(0, /*Min*/ 8);
448 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS
)
450 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, s1
), typeIs(2, p0
),
451 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
453 getActionDefinitionsBuilder(
454 {G_ATOMICRMW_XCHG
, G_ATOMICRMW_ADD
, G_ATOMICRMW_SUB
, G_ATOMICRMW_AND
,
455 G_ATOMICRMW_OR
, G_ATOMICRMW_XOR
, G_ATOMICRMW_MIN
, G_ATOMICRMW_MAX
,
456 G_ATOMICRMW_UMIN
, G_ATOMICRMW_UMAX
, G_ATOMIC_CMPXCHG
})
458 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, p0
),
459 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
462 getActionDefinitionsBuilder(G_BLOCK_ADDR
).legalFor({p0
});
465 for (unsigned Op
: {G_MERGE_VALUES
, G_UNMERGE_VALUES
}) {
466 unsigned BigTyIdx
= Op
== G_MERGE_VALUES
? 0 : 1;
467 unsigned LitTyIdx
= Op
== G_MERGE_VALUES
? 1 : 0;
469 auto notValidElt
= [](const LegalityQuery
&Query
, unsigned TypeIdx
) {
470 const LLT
&Ty
= Query
.Types
[TypeIdx
];
472 const LLT
&EltTy
= Ty
.getElementType();
473 if (EltTy
.getSizeInBits() < 8 || EltTy
.getSizeInBits() > 64)
475 if (!isPowerOf2_32(EltTy
.getSizeInBits()))
481 // FIXME: This rule is horrible, but specifies the same as what we had
482 // before with the particularly strange definitions removed (e.g.
483 // s8 = G_MERGE_VALUES s32, s32).
484 // Part of the complexity comes from these ops being extremely flexible. For
485 // example, you can build/decompose vectors with it, concatenate vectors,
486 // etc. and in addition to this you can also bitcast with it at the same
487 // time. We've been considering breaking it up into multiple ops to make it
488 // more manageable throughout the backend.
489 getActionDefinitionsBuilder(Op
)
490 // Break up vectors with weird elements into scalars
492 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 0); },
495 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 1); },
497 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
499 .clampScalar(BigTyIdx
, s8
, s512
)
501 [=](const LegalityQuery
&Query
) {
502 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
503 return !isPowerOf2_32(Ty
.getSizeInBits()) &&
504 Ty
.getSizeInBits() % 64 != 0;
506 [=](const LegalityQuery
&Query
) {
507 // Pick the next power of 2, or a multiple of 64 over 128.
508 // Whichever is smaller.
509 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
510 unsigned NewSizeInBits
= 1
511 << Log2_32_Ceil(Ty
.getSizeInBits() + 1);
512 if (NewSizeInBits
>= 256) {
513 unsigned RoundedTo
= alignTo
<64>(Ty
.getSizeInBits() + 1);
514 if (RoundedTo
< NewSizeInBits
)
515 NewSizeInBits
= RoundedTo
;
517 return std::make_pair(BigTyIdx
, LLT::scalar(NewSizeInBits
));
519 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
520 // worth considering the multiples of 64 since 2*192 and 2*384 are not
522 .clampScalar(LitTyIdx
, s8
, s256
)
523 .widenScalarToNextPow2(LitTyIdx
, /*Min*/ 8)
524 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
525 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
526 // At this point it's simple enough to accept the legal types.
527 .legalIf([=](const LegalityQuery
&Query
) {
528 const LLT
&BigTy
= Query
.Types
[BigTyIdx
];
529 const LLT
&LitTy
= Query
.Types
[LitTyIdx
];
530 if (BigTy
.isVector() && BigTy
.getSizeInBits() < 32)
532 if (LitTy
.isVector() && LitTy
.getSizeInBits() < 32)
534 return BigTy
.getSizeInBits() % LitTy
.getSizeInBits() == 0;
536 // Any vectors left are the wrong size. Scalarize them.
541 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT
)
542 .unsupportedIf([=](const LegalityQuery
&Query
) {
543 const LLT
&EltTy
= Query
.Types
[1].getElementType();
544 return Query
.Types
[0] != EltTy
;
547 .legalIf([=](const LegalityQuery
&Query
) {
548 const LLT
&VecTy
= Query
.Types
[1];
549 return VecTy
== v2s16
|| VecTy
== v4s16
|| VecTy
== v8s16
||
550 VecTy
== v4s32
|| VecTy
== v2s64
|| VecTy
== v2s32
;
553 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT
)
554 .legalIf([=](const LegalityQuery
&Query
) {
555 const LLT
&VecTy
= Query
.Types
[0];
556 // TODO: Support s8 and s16
557 return VecTy
== v2s32
|| VecTy
== v4s32
|| VecTy
== v2s64
;
560 getActionDefinitionsBuilder(G_BUILD_VECTOR
)
561 .legalFor({{v4s16
, s16
},
567 .clampNumElements(0, v4s32
, v4s32
)
568 .clampNumElements(0, v2s64
, v2s64
)
570 // Deal with larger scalar types, which will be implicitly truncated.
571 .legalIf([=](const LegalityQuery
&Query
) {
572 return Query
.Types
[0].getScalarSizeInBits() <
573 Query
.Types
[1].getSizeInBits();
575 .minScalarSameAs(1, 0);
577 getActionDefinitionsBuilder(G_CTLZ
).legalForCartesianProduct(
578 {s32
, s64
, v8s8
, v16s8
, v4s16
, v8s16
, v2s32
, v4s32
})
581 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR
)
582 .legalIf([=](const LegalityQuery
&Query
) {
583 const LLT
&DstTy
= Query
.Types
[0];
584 const LLT
&SrcTy
= Query
.Types
[1];
585 // For now just support the TBL2 variant which needs the source vectors
586 // to be the same size as the dest.
589 for (auto &Ty
: {v2s32
, v4s32
, v2s64
}) {
595 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
596 // just want those lowered into G_BUILD_VECTOR
597 .lowerIf([=](const LegalityQuery
&Query
) {
598 return !Query
.Types
[1].isVector();
600 .clampNumElements(0, v4s32
, v4s32
)
601 .clampNumElements(0, v2s64
, v2s64
);
603 getActionDefinitionsBuilder(G_CONCAT_VECTORS
)
604 .legalFor({{v4s32
, v2s32
}, {v8s16
, v4s16
}});
606 getActionDefinitionsBuilder(G_JUMP_TABLE
)
607 .legalFor({{p0
}, {s64
}});
609 getActionDefinitionsBuilder(G_BRJT
).legalIf([=](const LegalityQuery
&Query
) {
610 return Query
.Types
[0] == p0
&& Query
.Types
[1] == s64
;
613 getActionDefinitionsBuilder(G_DYN_STACKALLOC
).lower();
616 verify(*ST
.getInstrInfo());
619 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr
&MI
,
620 MachineRegisterInfo
&MRI
,
621 MachineIRBuilder
&MIRBuilder
,
622 GISelChangeObserver
&Observer
) const {
623 switch (MI
.getOpcode()) {
625 // No idea what to do.
627 case TargetOpcode::G_VAARG
:
628 return legalizeVaArg(MI
, MRI
, MIRBuilder
);
629 case TargetOpcode::G_LOAD
:
630 case TargetOpcode::G_STORE
:
631 return legalizeLoadStore(MI
, MRI
, MIRBuilder
, Observer
);
632 case TargetOpcode::G_SHL
:
633 case TargetOpcode::G_ASHR
:
634 case TargetOpcode::G_LSHR
:
635 return legalizeShlAshrLshr(MI
, MRI
, MIRBuilder
, Observer
);
638 llvm_unreachable("expected switch to return");
641 bool AArch64LegalizerInfo::legalizeIntrinsic(
642 MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
643 MachineIRBuilder
&MIRBuilder
) const {
644 switch (MI
.getIntrinsicID()) {
645 case Intrinsic::memcpy
:
646 case Intrinsic::memset
:
647 case Intrinsic::memmove
:
648 if (createMemLibcall(MIRBuilder
, MRI
, MI
) ==
649 LegalizerHelper::UnableToLegalize
)
651 MI
.eraseFromParent();
659 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
660 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
661 GISelChangeObserver
&Observer
) const {
662 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
||
663 MI
.getOpcode() == TargetOpcode::G_LSHR
||
664 MI
.getOpcode() == TargetOpcode::G_SHL
);
665 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
666 // imported patterns can select it later. Either way, it will be legal.
667 Register AmtReg
= MI
.getOperand(2).getReg();
668 auto *CstMI
= MRI
.getVRegDef(AmtReg
);
669 assert(CstMI
&& "expected to find a vreg def");
670 if (CstMI
->getOpcode() != TargetOpcode::G_CONSTANT
)
672 // Check the shift amount is in range for an immediate form.
673 unsigned Amount
= CstMI
->getOperand(1).getCImm()->getZExtValue();
675 return true; // This will have to remain a register variant.
676 assert(MRI
.getType(AmtReg
).getSizeInBits() == 32);
677 MIRBuilder
.setInstr(MI
);
678 auto ExtCst
= MIRBuilder
.buildZExt(LLT::scalar(64), AmtReg
);
679 MI
.getOperand(2).setReg(ExtCst
.getReg(0));
683 bool AArch64LegalizerInfo::legalizeLoadStore(
684 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
685 GISelChangeObserver
&Observer
) const {
686 assert(MI
.getOpcode() == TargetOpcode::G_STORE
||
687 MI
.getOpcode() == TargetOpcode::G_LOAD
);
688 // Here we just try to handle vector loads/stores where our value type might
689 // have pointer elements, which the SelectionDAG importer can't handle. To
690 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
691 // the value to use s64 types.
693 // Custom legalization requires the instruction, if not deleted, must be fully
694 // legalized. In order to allow further legalization of the inst, we create
695 // a new instruction and erase the existing one.
697 Register ValReg
= MI
.getOperand(0).getReg();
698 const LLT ValTy
= MRI
.getType(ValReg
);
700 if (!ValTy
.isVector() || !ValTy
.getElementType().isPointer() ||
701 ValTy
.getElementType().getAddressSpace() != 0) {
702 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
706 MIRBuilder
.setInstr(MI
);
707 unsigned PtrSize
= ValTy
.getElementType().getSizeInBits();
708 const LLT NewTy
= LLT::vector(ValTy
.getNumElements(), PtrSize
);
709 auto &MMO
= **MI
.memoperands_begin();
710 if (MI
.getOpcode() == TargetOpcode::G_STORE
) {
711 auto Bitcast
= MIRBuilder
.buildBitcast({NewTy
}, {ValReg
});
712 MIRBuilder
.buildStore(Bitcast
.getReg(0), MI
.getOperand(1).getReg(), MMO
);
714 Register NewReg
= MRI
.createGenericVirtualRegister(NewTy
);
715 auto NewLoad
= MIRBuilder
.buildLoad(NewReg
, MI
.getOperand(1).getReg(), MMO
);
716 MIRBuilder
.buildBitcast({ValReg
}, {NewLoad
});
718 MI
.eraseFromParent();
722 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr
&MI
,
723 MachineRegisterInfo
&MRI
,
724 MachineIRBuilder
&MIRBuilder
) const {
725 MIRBuilder
.setInstr(MI
);
726 MachineFunction
&MF
= MIRBuilder
.getMF();
727 unsigned Align
= MI
.getOperand(2).getImm();
728 Register Dst
= MI
.getOperand(0).getReg();
729 Register ListPtr
= MI
.getOperand(1).getReg();
731 LLT PtrTy
= MRI
.getType(ListPtr
);
732 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
734 const unsigned PtrSize
= PtrTy
.getSizeInBits() / 8;
735 Register List
= MRI
.createGenericVirtualRegister(PtrTy
);
736 MIRBuilder
.buildLoad(
738 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
739 PtrSize
, /* Align = */ PtrSize
));
742 if (Align
> PtrSize
) {
743 // Realign the list to the actual required alignment.
744 auto AlignMinus1
= MIRBuilder
.buildConstant(IntPtrTy
, Align
- 1);
746 auto ListTmp
= MIRBuilder
.buildGEP(PtrTy
, List
, AlignMinus1
.getReg(0));
748 DstPtr
= MRI
.createGenericVirtualRegister(PtrTy
);
749 MIRBuilder
.buildPtrMask(DstPtr
, ListTmp
, Log2_64(Align
));
753 uint64_t ValSize
= MRI
.getType(Dst
).getSizeInBits() / 8;
754 MIRBuilder
.buildLoad(
756 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
757 ValSize
, std::max(Align
, PtrSize
)));
759 auto Size
= MIRBuilder
.buildConstant(IntPtrTy
, alignTo(ValSize
, PtrSize
));
761 auto NewList
= MIRBuilder
.buildGEP(PtrTy
, DstPtr
, Size
.getReg(0));
763 MIRBuilder
.buildStore(
765 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore
,
766 PtrSize
, /* Align = */ PtrSize
));
768 MI
.eraseFromParent();