[InstCombine] Signed saturation patterns
[llvm-core.git] / lib / Target / X86 / X86LegalizerInfo.cpp
blob04121f863c898dd3f093e5ba84ecd37e665539b7
1 //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for X86.
10 /// \todo This should be generated by TableGen.
11 //===----------------------------------------------------------------------===//
13 #include "X86LegalizerInfo.h"
14 #include "X86Subtarget.h"
15 #include "X86TargetMachine.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/TargetOpcodes.h"
18 #include "llvm/CodeGen/ValueTypes.h"
19 #include "llvm/IR/DerivedTypes.h"
20 #include "llvm/IR/Type.h"
22 using namespace llvm;
23 using namespace TargetOpcode;
24 using namespace LegalizeActions;
26 /// FIXME: The following static functions are SizeChangeStrategy functions
27 /// that are meant to temporarily mimic the behaviour of the old legalization
28 /// based on doubling/halving non-legal types as closely as possible. This is
29 /// not entirly possible as only legalizing the types that are exactly a power
30 /// of 2 times the size of the legal types would require specifying all those
31 /// sizes explicitly.
32 /// In practice, not specifying those isn't a problem, and the below functions
33 /// should disappear quickly as we add support for legalizing non-power-of-2
34 /// sized types further.
35 static void
36 addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result,
37 const LegalizerInfo::SizeAndActionsVec &v) {
38 for (unsigned i = 0; i < v.size(); ++i) {
39 result.push_back(v[i]);
40 if (i + 1 < v[i].first && i + 1 < v.size() &&
41 v[i + 1].first != v[i].first + 1)
42 result.push_back({v[i].first + 1, Unsupported});
46 static LegalizerInfo::SizeAndActionsVec
47 widen_1(const LegalizerInfo::SizeAndActionsVec &v) {
48 assert(v.size() >= 1);
49 assert(v[0].first > 1);
50 LegalizerInfo::SizeAndActionsVec result = {{1, WidenScalar},
51 {2, Unsupported}};
52 addAndInterleaveWithUnsupported(result, v);
53 auto Largest = result.back().first;
54 result.push_back({Largest + 1, Unsupported});
55 return result;
58 X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
59 const X86TargetMachine &TM)
60 : Subtarget(STI), TM(TM) {
62 setLegalizerInfo32bit();
63 setLegalizerInfo64bit();
64 setLegalizerInfoSSE1();
65 setLegalizerInfoSSE2();
66 setLegalizerInfoSSE41();
67 setLegalizerInfoAVX();
68 setLegalizerInfoAVX2();
69 setLegalizerInfoAVX512();
70 setLegalizerInfoAVX512DQ();
71 setLegalizerInfoAVX512BW();
73 setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1);
74 for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR})
75 setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1);
76 for (unsigned MemOp : {G_LOAD, G_STORE})
77 setLegalizeScalarToDifferentSizeStrategy(MemOp, 0,
78 narrowToSmallerAndWidenToSmallest);
79 setLegalizeScalarToDifferentSizeStrategy(
80 G_GEP, 1, widenToLargerTypesUnsupportedOtherwise);
81 setLegalizeScalarToDifferentSizeStrategy(
82 G_CONSTANT, 0, widenToLargerTypesAndNarrowToLargest);
84 computeTables();
85 verify(*STI.getInstrInfo());
88 bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
89 MachineRegisterInfo &MRI,
90 MachineIRBuilder &MIRBuilder) const {
91 switch (MI.getIntrinsicID()) {
92 case Intrinsic::memcpy:
93 case Intrinsic::memset:
94 case Intrinsic::memmove:
95 if (createMemLibcall(MIRBuilder, MRI, MI) ==
96 LegalizerHelper::UnableToLegalize)
97 return false;
98 MI.eraseFromParent();
99 return true;
100 default:
101 break;
103 return true;
106 void X86LegalizerInfo::setLegalizerInfo32bit() {
108 const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
109 const LLT s1 = LLT::scalar(1);
110 const LLT s8 = LLT::scalar(8);
111 const LLT s16 = LLT::scalar(16);
112 const LLT s32 = LLT::scalar(32);
113 const LLT s64 = LLT::scalar(64);
114 const LLT s128 = LLT::scalar(128);
116 for (auto Ty : {p0, s1, s8, s16, s32})
117 setAction({G_IMPLICIT_DEF, Ty}, Legal);
119 for (auto Ty : {s8, s16, s32, p0})
120 setAction({G_PHI, Ty}, Legal);
122 for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
123 for (auto Ty : {s8, s16, s32})
124 setAction({BinOp, Ty}, Legal);
126 for (unsigned Op : {G_UADDE}) {
127 setAction({Op, s32}, Legal);
128 setAction({Op, 1, s1}, Legal);
131 for (unsigned MemOp : {G_LOAD, G_STORE}) {
132 for (auto Ty : {s8, s16, s32, p0})
133 setAction({MemOp, Ty}, Legal);
135 // And everything's fine in addrspace 0.
136 setAction({MemOp, 1, p0}, Legal);
139 // Pointer-handling
140 setAction({G_FRAME_INDEX, p0}, Legal);
141 setAction({G_GLOBAL_VALUE, p0}, Legal);
143 setAction({G_GEP, p0}, Legal);
144 setAction({G_GEP, 1, s32}, Legal);
146 if (!Subtarget.is64Bit()) {
147 getActionDefinitionsBuilder(G_PTRTOINT)
148 .legalForCartesianProduct({s1, s8, s16, s32}, {p0})
149 .maxScalar(0, s32)
150 .widenScalarToNextPow2(0, /*Min*/ 8);
151 getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
153 // Shifts and SDIV
154 getActionDefinitionsBuilder(
155 {G_SDIV, G_SREM, G_UDIV, G_UREM})
156 .legalFor({s8, s16, s32})
157 .clampScalar(0, s8, s32);
159 getActionDefinitionsBuilder(
160 {G_SHL, G_LSHR, G_ASHR})
161 .legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
162 .clampScalar(0, s8, s32)
163 .clampScalar(1, s8, s8);
166 // Control-flow
167 setAction({G_BRCOND, s1}, Legal);
169 // Constants
170 for (auto Ty : {s8, s16, s32, p0})
171 setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
173 // Extensions
174 for (auto Ty : {s8, s16, s32}) {
175 setAction({G_ZEXT, Ty}, Legal);
176 setAction({G_SEXT, Ty}, Legal);
177 setAction({G_ANYEXT, Ty}, Legal);
179 setAction({G_ANYEXT, s128}, Legal);
180 getActionDefinitionsBuilder(G_SEXT_INREG).lower();
182 // Comparison
183 setAction({G_ICMP, s1}, Legal);
185 for (auto Ty : {s8, s16, s32, p0})
186 setAction({G_ICMP, 1, Ty}, Legal);
188 // Merge/Unmerge
189 for (const auto &Ty : {s16, s32, s64}) {
190 setAction({G_MERGE_VALUES, Ty}, Legal);
191 setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
193 for (const auto &Ty : {s8, s16, s32}) {
194 setAction({G_MERGE_VALUES, 1, Ty}, Legal);
195 setAction({G_UNMERGE_VALUES, Ty}, Legal);
199 void X86LegalizerInfo::setLegalizerInfo64bit() {
201 if (!Subtarget.is64Bit())
202 return;
204 const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
205 const LLT s1 = LLT::scalar(1);
206 const LLT s8 = LLT::scalar(8);
207 const LLT s16 = LLT::scalar(16);
208 const LLT s32 = LLT::scalar(32);
209 const LLT s64 = LLT::scalar(64);
210 const LLT s128 = LLT::scalar(128);
212 setAction({G_IMPLICIT_DEF, s64}, Legal);
213 // Need to have that, as tryFoldImplicitDef will create this pattern:
214 // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
215 setAction({G_IMPLICIT_DEF, s128}, Legal);
217 setAction({G_PHI, s64}, Legal);
219 for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
220 setAction({BinOp, s64}, Legal);
222 for (unsigned MemOp : {G_LOAD, G_STORE})
223 setAction({MemOp, s64}, Legal);
225 // Pointer-handling
226 setAction({G_GEP, 1, s64}, Legal);
227 getActionDefinitionsBuilder(G_PTRTOINT)
228 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
229 .maxScalar(0, s64)
230 .widenScalarToNextPow2(0, /*Min*/ 8);
231 getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s64}});
233 // Constants
234 setAction({TargetOpcode::G_CONSTANT, s64}, Legal);
236 // Extensions
237 for (unsigned extOp : {G_ZEXT, G_SEXT, G_ANYEXT}) {
238 setAction({extOp, s64}, Legal);
241 getActionDefinitionsBuilder(G_SITOFP)
242 .legalForCartesianProduct({s32, s64})
243 .clampScalar(1, s32, s64)
244 .widenScalarToNextPow2(1)
245 .clampScalar(0, s32, s64)
246 .widenScalarToNextPow2(0);
248 getActionDefinitionsBuilder(G_FPTOSI)
249 .legalForCartesianProduct({s32, s64})
250 .clampScalar(1, s32, s64)
251 .widenScalarToNextPow2(0)
252 .clampScalar(0, s32, s64)
253 .widenScalarToNextPow2(1);
255 // Comparison
256 setAction({G_ICMP, 1, s64}, Legal);
258 getActionDefinitionsBuilder(G_FCMP)
259 .legalForCartesianProduct({s8}, {s32, s64})
260 .clampScalar(0, s8, s8)
261 .clampScalar(1, s32, s64)
262 .widenScalarToNextPow2(1);
264 // Divisions
265 getActionDefinitionsBuilder(
266 {G_SDIV, G_SREM, G_UDIV, G_UREM})
267 .legalFor({s8, s16, s32, s64})
268 .clampScalar(0, s8, s64);
270 // Shifts
271 getActionDefinitionsBuilder(
272 {G_SHL, G_LSHR, G_ASHR})
273 .legalFor({{s8, s8}, {s16, s8}, {s32, s8}, {s64, s8}})
274 .clampScalar(0, s8, s64)
275 .clampScalar(1, s8, s8);
277 // Merge/Unmerge
278 setAction({G_MERGE_VALUES, s128}, Legal);
279 setAction({G_UNMERGE_VALUES, 1, s128}, Legal);
280 setAction({G_MERGE_VALUES, 1, s128}, Legal);
281 setAction({G_UNMERGE_VALUES, s128}, Legal);
284 void X86LegalizerInfo::setLegalizerInfoSSE1() {
285 if (!Subtarget.hasSSE1())
286 return;
288 const LLT s32 = LLT::scalar(32);
289 const LLT s64 = LLT::scalar(64);
290 const LLT v4s32 = LLT::vector(4, 32);
291 const LLT v2s64 = LLT::vector(2, 64);
293 for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
294 for (auto Ty : {s32, v4s32})
295 setAction({BinOp, Ty}, Legal);
297 for (unsigned MemOp : {G_LOAD, G_STORE})
298 for (auto Ty : {v4s32, v2s64})
299 setAction({MemOp, Ty}, Legal);
301 // Constants
302 setAction({TargetOpcode::G_FCONSTANT, s32}, Legal);
304 // Merge/Unmerge
305 for (const auto &Ty : {v4s32, v2s64}) {
306 setAction({G_CONCAT_VECTORS, Ty}, Legal);
307 setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
309 setAction({G_MERGE_VALUES, 1, s64}, Legal);
310 setAction({G_UNMERGE_VALUES, s64}, Legal);
313 void X86LegalizerInfo::setLegalizerInfoSSE2() {
314 if (!Subtarget.hasSSE2())
315 return;
317 const LLT s32 = LLT::scalar(32);
318 const LLT s64 = LLT::scalar(64);
319 const LLT v16s8 = LLT::vector(16, 8);
320 const LLT v8s16 = LLT::vector(8, 16);
321 const LLT v4s32 = LLT::vector(4, 32);
322 const LLT v2s64 = LLT::vector(2, 64);
324 const LLT v32s8 = LLT::vector(32, 8);
325 const LLT v16s16 = LLT::vector(16, 16);
326 const LLT v8s32 = LLT::vector(8, 32);
327 const LLT v4s64 = LLT::vector(4, 64);
329 for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
330 for (auto Ty : {s64, v2s64})
331 setAction({BinOp, Ty}, Legal);
333 for (unsigned BinOp : {G_ADD, G_SUB})
334 for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
335 setAction({BinOp, Ty}, Legal);
337 setAction({G_MUL, v8s16}, Legal);
339 setAction({G_FPEXT, s64}, Legal);
340 setAction({G_FPEXT, 1, s32}, Legal);
342 setAction({G_FPTRUNC, s32}, Legal);
343 setAction({G_FPTRUNC, 1, s64}, Legal);
345 // Constants
346 setAction({TargetOpcode::G_FCONSTANT, s64}, Legal);
348 // Merge/Unmerge
349 for (const auto &Ty :
350 {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
351 setAction({G_CONCAT_VECTORS, Ty}, Legal);
352 setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
354 for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) {
355 setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
356 setAction({G_UNMERGE_VALUES, Ty}, Legal);
360 void X86LegalizerInfo::setLegalizerInfoSSE41() {
361 if (!Subtarget.hasSSE41())
362 return;
364 const LLT v4s32 = LLT::vector(4, 32);
366 setAction({G_MUL, v4s32}, Legal);
369 void X86LegalizerInfo::setLegalizerInfoAVX() {
370 if (!Subtarget.hasAVX())
371 return;
373 const LLT v16s8 = LLT::vector(16, 8);
374 const LLT v8s16 = LLT::vector(8, 16);
375 const LLT v4s32 = LLT::vector(4, 32);
376 const LLT v2s64 = LLT::vector(2, 64);
378 const LLT v32s8 = LLT::vector(32, 8);
379 const LLT v64s8 = LLT::vector(64, 8);
380 const LLT v16s16 = LLT::vector(16, 16);
381 const LLT v32s16 = LLT::vector(32, 16);
382 const LLT v8s32 = LLT::vector(8, 32);
383 const LLT v16s32 = LLT::vector(16, 32);
384 const LLT v4s64 = LLT::vector(4, 64);
385 const LLT v8s64 = LLT::vector(8, 64);
387 for (unsigned MemOp : {G_LOAD, G_STORE})
388 for (auto Ty : {v8s32, v4s64})
389 setAction({MemOp, Ty}, Legal);
391 for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) {
392 setAction({G_INSERT, Ty}, Legal);
393 setAction({G_EXTRACT, 1, Ty}, Legal);
395 for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) {
396 setAction({G_INSERT, 1, Ty}, Legal);
397 setAction({G_EXTRACT, Ty}, Legal);
399 // Merge/Unmerge
400 for (const auto &Ty :
401 {v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) {
402 setAction({G_CONCAT_VECTORS, Ty}, Legal);
403 setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
405 for (const auto &Ty :
406 {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
407 setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
408 setAction({G_UNMERGE_VALUES, Ty}, Legal);
412 void X86LegalizerInfo::setLegalizerInfoAVX2() {
413 if (!Subtarget.hasAVX2())
414 return;
416 const LLT v32s8 = LLT::vector(32, 8);
417 const LLT v16s16 = LLT::vector(16, 16);
418 const LLT v8s32 = LLT::vector(8, 32);
419 const LLT v4s64 = LLT::vector(4, 64);
421 const LLT v64s8 = LLT::vector(64, 8);
422 const LLT v32s16 = LLT::vector(32, 16);
423 const LLT v16s32 = LLT::vector(16, 32);
424 const LLT v8s64 = LLT::vector(8, 64);
426 for (unsigned BinOp : {G_ADD, G_SUB})
427 for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
428 setAction({BinOp, Ty}, Legal);
430 for (auto Ty : {v16s16, v8s32})
431 setAction({G_MUL, Ty}, Legal);
433 // Merge/Unmerge
434 for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) {
435 setAction({G_CONCAT_VECTORS, Ty}, Legal);
436 setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
438 for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) {
439 setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
440 setAction({G_UNMERGE_VALUES, Ty}, Legal);
444 void X86LegalizerInfo::setLegalizerInfoAVX512() {
445 if (!Subtarget.hasAVX512())
446 return;
448 const LLT v16s8 = LLT::vector(16, 8);
449 const LLT v8s16 = LLT::vector(8, 16);
450 const LLT v4s32 = LLT::vector(4, 32);
451 const LLT v2s64 = LLT::vector(2, 64);
453 const LLT v32s8 = LLT::vector(32, 8);
454 const LLT v16s16 = LLT::vector(16, 16);
455 const LLT v8s32 = LLT::vector(8, 32);
456 const LLT v4s64 = LLT::vector(4, 64);
458 const LLT v64s8 = LLT::vector(64, 8);
459 const LLT v32s16 = LLT::vector(32, 16);
460 const LLT v16s32 = LLT::vector(16, 32);
461 const LLT v8s64 = LLT::vector(8, 64);
463 for (unsigned BinOp : {G_ADD, G_SUB})
464 for (auto Ty : {v16s32, v8s64})
465 setAction({BinOp, Ty}, Legal);
467 setAction({G_MUL, v16s32}, Legal);
469 for (unsigned MemOp : {G_LOAD, G_STORE})
470 for (auto Ty : {v16s32, v8s64})
471 setAction({MemOp, Ty}, Legal);
473 for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) {
474 setAction({G_INSERT, Ty}, Legal);
475 setAction({G_EXTRACT, 1, Ty}, Legal);
477 for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) {
478 setAction({G_INSERT, 1, Ty}, Legal);
479 setAction({G_EXTRACT, Ty}, Legal);
482 /************ VLX *******************/
483 if (!Subtarget.hasVLX())
484 return;
486 for (auto Ty : {v4s32, v8s32})
487 setAction({G_MUL, Ty}, Legal);
490 void X86LegalizerInfo::setLegalizerInfoAVX512DQ() {
491 if (!(Subtarget.hasAVX512() && Subtarget.hasDQI()))
492 return;
494 const LLT v8s64 = LLT::vector(8, 64);
496 setAction({G_MUL, v8s64}, Legal);
498 /************ VLX *******************/
499 if (!Subtarget.hasVLX())
500 return;
502 const LLT v2s64 = LLT::vector(2, 64);
503 const LLT v4s64 = LLT::vector(4, 64);
505 for (auto Ty : {v2s64, v4s64})
506 setAction({G_MUL, Ty}, Legal);
509 void X86LegalizerInfo::setLegalizerInfoAVX512BW() {
510 if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
511 return;
513 const LLT v64s8 = LLT::vector(64, 8);
514 const LLT v32s16 = LLT::vector(32, 16);
516 for (unsigned BinOp : {G_ADD, G_SUB})
517 for (auto Ty : {v64s8, v32s16})
518 setAction({BinOp, Ty}, Legal);
520 setAction({G_MUL, v32s16}, Legal);
522 /************ VLX *******************/
523 if (!Subtarget.hasVLX())
524 return;
526 const LLT v8s16 = LLT::vector(8, 16);
527 const LLT v16s16 = LLT::vector(16, 16);
529 for (auto Ty : {v8s16, v16s16})
530 setAction({G_MUL, Ty}, Legal);