1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the Machinelegalizer class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/CodeGen/ValueTypes.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
26 #define DEBUG_TYPE "aarch64-legalinfo"
29 using namespace LegalizeActions
;
30 using namespace LegalizeMutations
;
31 using namespace LegalityPredicates
;
33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget
&ST
) {
34 using namespace TargetOpcode
;
35 const LLT p0
= LLT::pointer(0, 64);
36 const LLT s1
= LLT::scalar(1);
37 const LLT s8
= LLT::scalar(8);
38 const LLT s16
= LLT::scalar(16);
39 const LLT s32
= LLT::scalar(32);
40 const LLT s64
= LLT::scalar(64);
41 const LLT s128
= LLT::scalar(128);
42 const LLT s256
= LLT::scalar(256);
43 const LLT s512
= LLT::scalar(512);
44 const LLT v16s8
= LLT::vector(16, 8);
45 const LLT v8s8
= LLT::vector(8, 8);
46 const LLT v4s8
= LLT::vector(4, 8);
47 const LLT v8s16
= LLT::vector(8, 16);
48 const LLT v4s16
= LLT::vector(4, 16);
49 const LLT v2s16
= LLT::vector(2, 16);
50 const LLT v2s32
= LLT::vector(2, 32);
51 const LLT v4s32
= LLT::vector(4, 32);
52 const LLT v2s64
= LLT::vector(2, 64);
53 const LLT v2p0
= LLT::vector(2, p0
);
55 // FIXME: support subtargets which have neon/fp-armv8 disabled.
56 if (!ST
.hasNEON() || !ST
.hasFPARMv8()) {
61 getActionDefinitionsBuilder(G_IMPLICIT_DEF
)
62 .legalFor({p0
, s1
, s8
, s16
, s32
, s64
, v4s32
, v2s64
})
63 .clampScalar(0, s1
, s64
)
64 .widenScalarToNextPow2(0, 8)
66 [=](const LegalityQuery
&Query
) {
67 return Query
.Types
[0].isVector() &&
68 (Query
.Types
[0].getElementType() != s64
||
69 Query
.Types
[0].getNumElements() != 2);
71 [=](const LegalityQuery
&Query
) {
72 LLT EltTy
= Query
.Types
[0].getElementType();
74 return std::make_pair(0, LLT::vector(2, 64));
75 return std::make_pair(0, EltTy
);
78 getActionDefinitionsBuilder(G_PHI
)
79 .legalFor({p0
, s16
, s32
, s64
, v2s32
, v4s32
, v2s64
})
80 .clampScalar(0, s16
, s64
)
81 .widenScalarToNextPow2(0);
83 getActionDefinitionsBuilder(G_BSWAP
)
84 .legalFor({s32
, s64
, v4s32
, v2s32
, v2s64
})
85 .clampScalar(0, s16
, s64
)
86 .widenScalarToNextPow2(0);
88 getActionDefinitionsBuilder({G_ADD
, G_SUB
, G_MUL
, G_AND
, G_OR
, G_XOR
})
89 .legalFor({s32
, s64
, v2s32
, v4s32
, v2s64
, v8s16
, v16s8
})
90 .clampScalar(0, s32
, s64
)
91 .widenScalarToNextPow2(0)
92 .clampNumElements(0, v2s32
, v4s32
)
93 .clampNumElements(0, v2s64
, v2s64
)
94 .moreElementsToNextPow2(0);
96 getActionDefinitionsBuilder(G_SHL
)
97 .legalFor({{s32
, s32
}, {s64
, s64
},
98 {v2s32
, v2s32
}, {v4s32
, v4s32
}, {v2s64
, v2s64
}})
99 .clampScalar(1, s32
, s64
)
100 .clampScalar(0, s32
, s64
)
101 .widenScalarToNextPow2(0)
102 .clampNumElements(0, v2s32
, v4s32
)
103 .clampNumElements(0, v2s64
, v2s64
)
104 .moreElementsToNextPow2(0)
105 .minScalarSameAs(1, 0);
107 getActionDefinitionsBuilder(G_GEP
)
108 .legalFor({{p0
, s64
}})
109 .clampScalar(1, s64
, s64
);
111 getActionDefinitionsBuilder(G_PTR_MASK
).legalFor({p0
});
113 getActionDefinitionsBuilder({G_SDIV
, G_UDIV
})
114 .legalFor({s32
, s64
})
115 .clampScalar(0, s32
, s64
)
116 .widenScalarToNextPow2(0)
119 getActionDefinitionsBuilder({G_LSHR
, G_ASHR
})
120 .customIf([=](const LegalityQuery
&Query
) {
121 const auto &SrcTy
= Query
.Types
[0];
122 const auto &AmtTy
= Query
.Types
[1];
123 return !SrcTy
.isVector() && SrcTy
.getSizeInBits() == 32 &&
124 AmtTy
.getSizeInBits() == 32;
127 {{s32
, s32
}, {s32
, s64
}, {s64
, s64
}, {v2s32
, v2s32
}, {v4s32
, v4s32
}})
128 .clampScalar(1, s32
, s64
)
129 .clampScalar(0, s32
, s64
)
130 .minScalarSameAs(1, 0);
132 getActionDefinitionsBuilder({G_SREM
, G_UREM
})
133 .lowerFor({s1
, s8
, s16
, s32
, s64
});
135 getActionDefinitionsBuilder({G_SMULO
, G_UMULO
})
136 .lowerFor({{s64
, s1
}});
138 getActionDefinitionsBuilder({G_SMULH
, G_UMULH
}).legalFor({s32
, s64
});
140 getActionDefinitionsBuilder({G_UADDE
, G_USUBE
, G_SADDO
, G_SSUBO
, G_UADDO
})
141 .legalFor({{s32
, s1
}, {s64
, s1
}});
143 getActionDefinitionsBuilder({G_FADD
, G_FSUB
, G_FMUL
, G_FDIV
, G_FNEG
})
144 .legalFor({s32
, s64
, v2s64
, v4s32
, v2s32
});
146 getActionDefinitionsBuilder(G_FREM
).libcallFor({s32
, s64
});
148 getActionDefinitionsBuilder({G_FCEIL
, G_FABS
, G_FSQRT
, G_FFLOOR
, G_FRINT
,
149 G_FMA
, G_INTRINSIC_TRUNC
, G_INTRINSIC_ROUND
,
151 // If we don't have full FP16 support, then scalarize the elements of
152 // vectors containing fp16 types.
154 [=, &ST
](const LegalityQuery
&Query
) {
155 const auto &Ty
= Query
.Types
[0];
156 return Ty
.isVector() && Ty
.getElementType() == s16
&&
159 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s16
); })
160 // If we don't have full FP16 support, then widen s16 to s32 if we
163 [=, &ST
](const LegalityQuery
&Query
) {
164 return Query
.Types
[0] == s16
&& !ST
.hasFullFP16();
166 [=](const LegalityQuery
&Query
) { return std::make_pair(0, s32
); })
167 .legalFor({s16
, s32
, s64
, v2s32
, v4s32
, v2s64
, v2s16
, v4s16
, v8s16
});
169 getActionDefinitionsBuilder(
170 {G_FCOS
, G_FSIN
, G_FLOG10
, G_FLOG
, G_FLOG2
, G_FEXP
, G_FEXP2
, G_FPOW
})
171 // We need a call for these, so we always need to scalarize.
173 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
175 .libcallFor({s32
, s64
, v2s32
, v4s32
, v2s64
});
177 getActionDefinitionsBuilder(G_INSERT
)
178 .unsupportedIf([=](const LegalityQuery
&Query
) {
179 return Query
.Types
[0].getSizeInBits() <= Query
.Types
[1].getSizeInBits();
181 .legalIf([=](const LegalityQuery
&Query
) {
182 const LLT
&Ty0
= Query
.Types
[0];
183 const LLT
&Ty1
= Query
.Types
[1];
184 if (Ty0
!= s32
&& Ty0
!= s64
&& Ty0
!= p0
)
186 return isPowerOf2_32(Ty1
.getSizeInBits()) &&
187 (Ty1
.getSizeInBits() == 1 || Ty1
.getSizeInBits() >= 8);
189 .clampScalar(0, s32
, s64
)
190 .widenScalarToNextPow2(0)
191 .maxScalarIf(typeInSet(0, {s32
}), 1, s16
)
192 .maxScalarIf(typeInSet(0, {s64
}), 1, s32
)
193 .widenScalarToNextPow2(1);
195 getActionDefinitionsBuilder(G_EXTRACT
)
196 .unsupportedIf([=](const LegalityQuery
&Query
) {
197 return Query
.Types
[0].getSizeInBits() >= Query
.Types
[1].getSizeInBits();
199 .legalIf([=](const LegalityQuery
&Query
) {
200 const LLT
&Ty0
= Query
.Types
[0];
201 const LLT
&Ty1
= Query
.Types
[1];
202 if (Ty1
!= s32
&& Ty1
!= s64
&& Ty1
!= s128
)
206 return isPowerOf2_32(Ty0
.getSizeInBits()) &&
207 (Ty0
.getSizeInBits() == 1 || Ty0
.getSizeInBits() >= 8);
209 .clampScalar(1, s32
, s128
)
210 .widenScalarToNextPow2(1)
211 .maxScalarIf(typeInSet(1, {s32
}), 0, s16
)
212 .maxScalarIf(typeInSet(1, {s64
}), 0, s32
)
213 .widenScalarToNextPow2(0);
215 getActionDefinitionsBuilder({G_SEXTLOAD
, G_ZEXTLOAD
})
216 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
225 .clampScalar(0, s32
, s64
)
226 .widenScalarToNextPow2(0)
227 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
228 // how to do that yet.
229 .unsupportedIfMemSizeNotPow2()
230 // Lower anything left over into G_*EXT and G_LOAD
233 auto IsPtrVecPred
= [=](const LegalityQuery
&Query
) {
234 const LLT
&ValTy
= Query
.Types
[0];
235 if (!ValTy
.isVector())
237 const LLT EltTy
= ValTy
.getElementType();
238 return EltTy
.isPointer() && EltTy
.getAddressSpace() == 0;
241 getActionDefinitionsBuilder(G_LOAD
)
242 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
254 {v2s64
, p0
, 128, 8}})
255 // These extends are also legal
256 .legalForTypesWithMemDesc({{s32
, p0
, 8, 8},
258 .clampScalar(0, s8
, s64
)
259 .widenScalarToNextPow2(0)
260 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
261 // how to do that yet.
262 .unsupportedIfMemSizeNotPow2()
263 // Lower any any-extending loads left into G_ANYEXT and G_LOAD
264 .lowerIf([=](const LegalityQuery
&Query
) {
265 return Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
267 .clampMaxNumElements(0, s32
, 2)
268 .clampMaxNumElements(0, s64
, 1)
269 .customIf(IsPtrVecPred
);
271 getActionDefinitionsBuilder(G_STORE
)
272 .legalForTypesWithMemDesc({{s8
, p0
, 8, 8},
283 {v2s64
, p0
, 128, 8}})
284 .clampScalar(0, s8
, s64
)
285 .widenScalarToNextPow2(0)
286 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
287 // how to do that yet.
288 .unsupportedIfMemSizeNotPow2()
289 .lowerIf([=](const LegalityQuery
&Query
) {
290 return Query
.Types
[0].isScalar() &&
291 Query
.Types
[0].getSizeInBits() != Query
.MMODescrs
[0].SizeInBits
;
293 .clampMaxNumElements(0, s32
, 2)
294 .clampMaxNumElements(0, s64
, 1)
295 .customIf(IsPtrVecPred
);
298 getActionDefinitionsBuilder(G_CONSTANT
)
299 .legalFor({p0
, s8
, s16
, s32
, s64
})
300 .clampScalar(0, s8
, s64
)
301 .widenScalarToNextPow2(0);
302 getActionDefinitionsBuilder(G_FCONSTANT
)
303 .legalFor({s32
, s64
})
304 .clampScalar(0, s32
, s64
);
306 getActionDefinitionsBuilder(G_ICMP
)
307 .legalFor({{s32
, s32
},
318 .clampScalar(1, s32
, s64
)
319 .clampScalar(0, s32
, s32
)
320 .minScalarEltSameAsIf(
321 [=](const LegalityQuery
&Query
) {
322 const LLT
&Ty
= Query
.Types
[0];
323 const LLT
&SrcTy
= Query
.Types
[1];
324 return Ty
.isVector() && !SrcTy
.getElementType().isPointer() &&
325 Ty
.getElementType() != SrcTy
.getElementType();
329 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2s16
; },
332 [=](const LegalityQuery
&Query
) { return Query
.Types
[1] == v2p0
; }, 0,
334 .widenScalarOrEltToNextPow2(1);
336 getActionDefinitionsBuilder(G_FCMP
)
337 .legalFor({{s32
, s32
}, {s32
, s64
}})
338 .clampScalar(0, s32
, s32
)
339 .clampScalar(1, s32
, s64
)
340 .widenScalarToNextPow2(1);
343 auto ExtLegalFunc
= [=](const LegalityQuery
&Query
) {
344 unsigned DstSize
= Query
.Types
[0].getSizeInBits();
346 if (DstSize
== 128 && !Query
.Types
[0].isVector())
347 return false; // Extending to a scalar s128 is not legal.
349 // Make sure that we have something that will fit in a register, and
350 // make sure it's a power of 2.
351 if (DstSize
< 8 || DstSize
> 128 || !isPowerOf2_32(DstSize
))
354 const LLT
&SrcTy
= Query
.Types
[1];
356 // Special case for s1.
360 // Make sure we fit in a register otherwise. Don't bother checking that
361 // the source type is below 128 bits. We shouldn't be allowing anything
362 // through which is wider than the destination in the first place.
363 unsigned SrcSize
= SrcTy
.getSizeInBits();
364 if (SrcSize
< 8 || !isPowerOf2_32(SrcSize
))
369 getActionDefinitionsBuilder({G_ZEXT
, G_ANYEXT
}).legalIf(ExtLegalFunc
);
370 getActionDefinitionsBuilder(G_SEXT
)
371 .legalIf(ExtLegalFunc
)
372 .clampScalar(0, s64
, s64
); // Just for s128, others are handled above.
374 getActionDefinitionsBuilder(G_TRUNC
).alwaysLegal();
377 getActionDefinitionsBuilder(G_FPTRUNC
).legalFor(
378 {{s16
, s32
}, {s16
, s64
}, {s32
, s64
}, {v4s16
, v4s32
}, {v2s32
, v2s64
}});
379 getActionDefinitionsBuilder(G_FPEXT
).legalFor(
380 {{s32
, s16
}, {s64
, s16
}, {s64
, s32
}, {v4s32
, v4s16
}, {v2s64
, v2s32
}});
383 getActionDefinitionsBuilder({G_FPTOSI
, G_FPTOUI
})
384 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
385 .clampScalar(0, s32
, s64
)
386 .widenScalarToNextPow2(0)
387 .clampScalar(1, s32
, s64
)
388 .widenScalarToNextPow2(1);
390 getActionDefinitionsBuilder({G_SITOFP
, G_UITOFP
})
391 .legalForCartesianProduct({s32
, s64
, v2s64
, v4s32
, v2s32
})
392 .clampScalar(1, s32
, s64
)
393 .widenScalarToNextPow2(1)
394 .clampScalar(0, s32
, s64
)
395 .widenScalarToNextPow2(0);
398 getActionDefinitionsBuilder(G_BRCOND
).legalFor({s1
, s8
, s16
, s32
});
399 getActionDefinitionsBuilder(G_BRINDIRECT
).legalFor({p0
});
402 // FIXME: We can probably do a bit better than just scalarizing vector
404 getActionDefinitionsBuilder(G_SELECT
)
405 .legalFor({{s32
, s1
}, {s64
, s1
}, {p0
, s1
}})
406 .clampScalar(0, s32
, s64
)
407 .widenScalarToNextPow2(0)
411 getActionDefinitionsBuilder(G_FRAME_INDEX
).legalFor({p0
});
412 getActionDefinitionsBuilder(G_GLOBAL_VALUE
).legalFor({p0
});
414 getActionDefinitionsBuilder(G_PTRTOINT
)
415 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
}, {p0
})
417 .widenScalarToNextPow2(0, /*Min*/ 8);
419 getActionDefinitionsBuilder(G_INTTOPTR
)
420 .unsupportedIf([&](const LegalityQuery
&Query
) {
421 return Query
.Types
[0].getSizeInBits() != Query
.Types
[1].getSizeInBits();
423 .legalFor({{p0
, s64
}});
425 // Casts for 32 and 64-bit width type are just copies.
426 // Same for 128-bit width type, except they are on the FPR bank.
427 getActionDefinitionsBuilder(G_BITCAST
)
428 // FIXME: This is wrong since G_BITCAST is not allowed to change the
429 // number of bits but it's what the previous code described and fixing
431 .legalForCartesianProduct({s1
, s8
, s16
, s32
, s64
, s128
, v16s8
, v8s8
, v4s8
,
432 v8s16
, v4s16
, v2s16
, v4s32
, v2s32
, v2s64
,
435 getActionDefinitionsBuilder(G_VASTART
).legalFor({p0
});
437 // va_list must be a pointer, but most sized types are pretty easy to handle
438 // as the destination.
439 getActionDefinitionsBuilder(G_VAARG
)
440 .customForCartesianProduct({s8
, s16
, s32
, s64
, p0
}, {p0
})
441 .clampScalar(0, s8
, s64
)
442 .widenScalarToNextPow2(0, /*Min*/ 8);
445 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS
)
447 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, s1
), typeIs(2, p0
),
448 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
450 getActionDefinitionsBuilder(
451 {G_ATOMICRMW_XCHG
, G_ATOMICRMW_ADD
, G_ATOMICRMW_SUB
, G_ATOMICRMW_AND
,
452 G_ATOMICRMW_OR
, G_ATOMICRMW_XOR
, G_ATOMICRMW_MIN
, G_ATOMICRMW_MAX
,
453 G_ATOMICRMW_UMIN
, G_ATOMICRMW_UMAX
, G_ATOMIC_CMPXCHG
})
455 typeInSet(0, {s8
, s16
, s32
, s64
}), typeIs(1, p0
),
456 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic
)));
459 getActionDefinitionsBuilder(G_BLOCK_ADDR
).legalFor({p0
});
462 for (unsigned Op
: {G_MERGE_VALUES
, G_UNMERGE_VALUES
}) {
463 unsigned BigTyIdx
= Op
== G_MERGE_VALUES
? 0 : 1;
464 unsigned LitTyIdx
= Op
== G_MERGE_VALUES
? 1 : 0;
466 auto notValidElt
= [](const LegalityQuery
&Query
, unsigned TypeIdx
) {
467 const LLT
&Ty
= Query
.Types
[TypeIdx
];
469 const LLT
&EltTy
= Ty
.getElementType();
470 if (EltTy
.getSizeInBits() < 8 || EltTy
.getSizeInBits() > 64)
472 if (!isPowerOf2_32(EltTy
.getSizeInBits()))
478 // FIXME: This rule is horrible, but specifies the same as what we had
479 // before with the particularly strange definitions removed (e.g.
480 // s8 = G_MERGE_VALUES s32, s32).
481 // Part of the complexity comes from these ops being extremely flexible. For
482 // example, you can build/decompose vectors with it, concatenate vectors,
483 // etc. and in addition to this you can also bitcast with it at the same
484 // time. We've been considering breaking it up into multiple ops to make it
485 // more manageable throughout the backend.
486 getActionDefinitionsBuilder(Op
)
487 // Break up vectors with weird elements into scalars
489 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 0); },
492 [=](const LegalityQuery
&Query
) { return notValidElt(Query
, 1); },
494 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
496 .clampScalar(BigTyIdx
, s8
, s512
)
498 [=](const LegalityQuery
&Query
) {
499 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
500 return !isPowerOf2_32(Ty
.getSizeInBits()) &&
501 Ty
.getSizeInBits() % 64 != 0;
503 [=](const LegalityQuery
&Query
) {
504 // Pick the next power of 2, or a multiple of 64 over 128.
505 // Whichever is smaller.
506 const LLT
&Ty
= Query
.Types
[BigTyIdx
];
507 unsigned NewSizeInBits
= 1
508 << Log2_32_Ceil(Ty
.getSizeInBits() + 1);
509 if (NewSizeInBits
>= 256) {
510 unsigned RoundedTo
= alignTo
<64>(Ty
.getSizeInBits() + 1);
511 if (RoundedTo
< NewSizeInBits
)
512 NewSizeInBits
= RoundedTo
;
514 return std::make_pair(BigTyIdx
, LLT::scalar(NewSizeInBits
));
516 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
517 // worth considering the multiples of 64 since 2*192 and 2*384 are not
519 .clampScalar(LitTyIdx
, s8
, s256
)
520 .widenScalarToNextPow2(LitTyIdx
, /*Min*/ 8)
521 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
522 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
523 // At this point it's simple enough to accept the legal types.
524 .legalIf([=](const LegalityQuery
&Query
) {
525 const LLT
&BigTy
= Query
.Types
[BigTyIdx
];
526 const LLT
&LitTy
= Query
.Types
[LitTyIdx
];
527 if (BigTy
.isVector() && BigTy
.getSizeInBits() < 32)
529 if (LitTy
.isVector() && LitTy
.getSizeInBits() < 32)
531 return BigTy
.getSizeInBits() % LitTy
.getSizeInBits() == 0;
533 // Any vectors left are the wrong size. Scalarize them.
538 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT
)
539 .unsupportedIf([=](const LegalityQuery
&Query
) {
540 const LLT
&EltTy
= Query
.Types
[1].getElementType();
541 return Query
.Types
[0] != EltTy
;
544 .legalIf([=](const LegalityQuery
&Query
) {
545 const LLT
&VecTy
= Query
.Types
[1];
546 return VecTy
== v2s16
|| VecTy
== v4s16
|| VecTy
== v8s16
||
547 VecTy
== v4s32
|| VecTy
== v2s64
|| VecTy
== v2s32
;
550 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT
)
551 .legalIf([=](const LegalityQuery
&Query
) {
552 const LLT
&VecTy
= Query
.Types
[0];
553 // TODO: Support s8 and s16
554 return VecTy
== v2s32
|| VecTy
== v4s32
|| VecTy
== v2s64
;
557 getActionDefinitionsBuilder(G_BUILD_VECTOR
)
558 .legalFor({{v4s16
, s16
},
564 .clampNumElements(0, v4s32
, v4s32
)
565 .clampNumElements(0, v2s64
, v2s64
)
567 // Deal with larger scalar types, which will be implicitly truncated.
568 .legalIf([=](const LegalityQuery
&Query
) {
569 return Query
.Types
[0].getScalarSizeInBits() <
570 Query
.Types
[1].getSizeInBits();
572 .minScalarSameAs(1, 0);
574 getActionDefinitionsBuilder(G_CTLZ
).legalForCartesianProduct(
575 {s32
, s64
, v8s8
, v16s8
, v4s16
, v8s16
, v2s32
, v4s32
})
578 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR
)
579 .legalIf([=](const LegalityQuery
&Query
) {
580 const LLT
&DstTy
= Query
.Types
[0];
581 const LLT
&SrcTy
= Query
.Types
[1];
582 // For now just support the TBL2 variant which needs the source vectors
583 // to be the same size as the dest.
586 for (auto &Ty
: {v2s32
, v4s32
, v2s64
}) {
592 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
593 // just want those lowered into G_BUILD_VECTOR
594 .lowerIf([=](const LegalityQuery
&Query
) {
595 return !Query
.Types
[1].isVector();
597 .clampNumElements(0, v4s32
, v4s32
)
598 .clampNumElements(0, v2s64
, v2s64
);
600 getActionDefinitionsBuilder(G_CONCAT_VECTORS
)
601 .legalFor({{v4s32
, v2s32
}, {v8s16
, v4s16
}});
603 getActionDefinitionsBuilder(G_JUMP_TABLE
)
604 .legalFor({{p0
}, {s64
}});
606 getActionDefinitionsBuilder(G_BRJT
).legalIf([=](const LegalityQuery
&Query
) {
607 return Query
.Types
[0] == p0
&& Query
.Types
[1] == s64
;
611 verify(*ST
.getInstrInfo());
614 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr
&MI
,
615 MachineRegisterInfo
&MRI
,
616 MachineIRBuilder
&MIRBuilder
,
617 GISelChangeObserver
&Observer
) const {
618 switch (MI
.getOpcode()) {
620 // No idea what to do.
622 case TargetOpcode::G_VAARG
:
623 return legalizeVaArg(MI
, MRI
, MIRBuilder
);
624 case TargetOpcode::G_LOAD
:
625 case TargetOpcode::G_STORE
:
626 return legalizeLoadStore(MI
, MRI
, MIRBuilder
, Observer
);
627 case TargetOpcode::G_SHL
:
628 case TargetOpcode::G_ASHR
:
629 case TargetOpcode::G_LSHR
:
630 return legalizeShlAshrLshr(MI
, MRI
, MIRBuilder
, Observer
);
633 llvm_unreachable("expected switch to return");
636 bool AArch64LegalizerInfo::legalizeIntrinsic(
637 MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
638 MachineIRBuilder
&MIRBuilder
) const {
639 switch (MI
.getIntrinsicID()) {
640 case Intrinsic::memcpy
:
641 case Intrinsic::memset
:
642 case Intrinsic::memmove
:
643 if (createMemLibcall(MIRBuilder
, MRI
, MI
) ==
644 LegalizerHelper::UnableToLegalize
)
646 MI
.eraseFromParent();
654 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
655 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
656 GISelChangeObserver
&Observer
) const {
657 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
||
658 MI
.getOpcode() == TargetOpcode::G_LSHR
||
659 MI
.getOpcode() == TargetOpcode::G_SHL
);
660 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
661 // imported patterns can select it later. Either way, it will be legal.
662 Register AmtReg
= MI
.getOperand(2).getReg();
663 auto *CstMI
= MRI
.getVRegDef(AmtReg
);
664 assert(CstMI
&& "expected to find a vreg def");
665 if (CstMI
->getOpcode() != TargetOpcode::G_CONSTANT
)
667 // Check the shift amount is in range for an immediate form.
668 unsigned Amount
= CstMI
->getOperand(1).getCImm()->getZExtValue();
670 return true; // This will have to remain a register variant.
671 assert(MRI
.getType(AmtReg
).getSizeInBits() == 32);
672 MIRBuilder
.setInstr(MI
);
673 auto ExtCst
= MIRBuilder
.buildZExt(LLT::scalar(64), AmtReg
);
674 MI
.getOperand(2).setReg(ExtCst
.getReg(0));
678 bool AArch64LegalizerInfo::legalizeLoadStore(
679 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, MachineIRBuilder
&MIRBuilder
,
680 GISelChangeObserver
&Observer
) const {
681 assert(MI
.getOpcode() == TargetOpcode::G_STORE
||
682 MI
.getOpcode() == TargetOpcode::G_LOAD
);
683 // Here we just try to handle vector loads/stores where our value type might
684 // have pointer elements, which the SelectionDAG importer can't handle. To
685 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
686 // the value to use s64 types.
688 // Custom legalization requires the instruction, if not deleted, must be fully
689 // legalized. In order to allow further legalization of the inst, we create
690 // a new instruction and erase the existing one.
692 unsigned ValReg
= MI
.getOperand(0).getReg();
693 const LLT ValTy
= MRI
.getType(ValReg
);
695 if (!ValTy
.isVector() || !ValTy
.getElementType().isPointer() ||
696 ValTy
.getElementType().getAddressSpace() != 0) {
697 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
701 MIRBuilder
.setInstr(MI
);
702 unsigned PtrSize
= ValTy
.getElementType().getSizeInBits();
703 const LLT NewTy
= LLT::vector(ValTy
.getNumElements(), PtrSize
);
704 auto &MMO
= **MI
.memoperands_begin();
705 if (MI
.getOpcode() == TargetOpcode::G_STORE
) {
706 auto Bitcast
= MIRBuilder
.buildBitcast({NewTy
}, {ValReg
});
707 MIRBuilder
.buildStore(Bitcast
.getReg(0), MI
.getOperand(1).getReg(), MMO
);
709 unsigned NewReg
= MRI
.createGenericVirtualRegister(NewTy
);
710 auto NewLoad
= MIRBuilder
.buildLoad(NewReg
, MI
.getOperand(1).getReg(), MMO
);
711 MIRBuilder
.buildBitcast({ValReg
}, {NewLoad
});
713 MI
.eraseFromParent();
717 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr
&MI
,
718 MachineRegisterInfo
&MRI
,
719 MachineIRBuilder
&MIRBuilder
) const {
720 MIRBuilder
.setInstr(MI
);
721 MachineFunction
&MF
= MIRBuilder
.getMF();
722 unsigned Align
= MI
.getOperand(2).getImm();
723 Register Dst
= MI
.getOperand(0).getReg();
724 Register ListPtr
= MI
.getOperand(1).getReg();
726 LLT PtrTy
= MRI
.getType(ListPtr
);
727 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
729 const unsigned PtrSize
= PtrTy
.getSizeInBits() / 8;
730 Register List
= MRI
.createGenericVirtualRegister(PtrTy
);
731 MIRBuilder
.buildLoad(
733 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
734 PtrSize
, /* Align = */ PtrSize
));
737 if (Align
> PtrSize
) {
738 // Realign the list to the actual required alignment.
739 auto AlignMinus1
= MIRBuilder
.buildConstant(IntPtrTy
, Align
- 1);
741 auto ListTmp
= MIRBuilder
.buildGEP(PtrTy
, List
, AlignMinus1
.getReg(0));
743 DstPtr
= MRI
.createGenericVirtualRegister(PtrTy
);
744 MIRBuilder
.buildPtrMask(DstPtr
, ListTmp
, Log2_64(Align
));
748 uint64_t ValSize
= MRI
.getType(Dst
).getSizeInBits() / 8;
749 MIRBuilder
.buildLoad(
751 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad
,
752 ValSize
, std::max(Align
, PtrSize
)));
754 auto Size
= MIRBuilder
.buildConstant(IntPtrTy
, alignTo(ValSize
, PtrSize
));
756 auto NewList
= MIRBuilder
.buildGEP(PtrTy
, DstPtr
, Size
.getReg(0));
758 MIRBuilder
.buildStore(
760 *MF
.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore
,
761 PtrSize
, /* Align = */ PtrSize
));
763 MI
.eraseFromParent();