[AMDGPU] New gfx940 mfma instructions
[llvm-project.git] / llvm / lib / Target / AArch64 / GISel / AArch64RegisterBankInfo.cpp
blob276d4d36f8dc958783e2f15624292efa71a59f1d
1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64RegisterInfo.h"
17 #include "MCTargetDesc/AArch64MCTargetDesc.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
21 #include "llvm/CodeGen/GlobalISel/Utils.h"
22 #include "llvm/CodeGen/LowLevelType.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterBank.h"
28 #include "llvm/CodeGen/RegisterBankInfo.h"
29 #include "llvm/CodeGen/TargetOpcodes.h"
30 #include "llvm/CodeGen/TargetRegisterInfo.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/IntrinsicsAArch64.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include <algorithm>
35 #include <cassert>
37 #define GET_TARGET_REGBANK_IMPL
38 #include "AArch64GenRegisterBank.inc"
40 // This file will be TableGen'ed at some point.
41 #include "AArch64GenRegisterBankInfo.def"
43 using namespace llvm;
45 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
46 : AArch64GenRegisterBankInfo() {
47 static llvm::once_flag InitializeRegisterBankFlag;
49 static auto InitializeRegisterBankOnce = [&]() {
50 // We have only one set of register banks, whatever the subtarget
51 // is. Therefore, the initialization of the RegBanks table should be
52 // done only once. Indeed the table of all register banks
53 // (AArch64::RegBanks) is unique in the compiler. At some point, it
54 // will get tablegen'ed and the whole constructor becomes empty.
56 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
57 (void)RBGPR;
58 assert(&AArch64::GPRRegBank == &RBGPR &&
59 "The order in RegBanks is messed up");
61 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
62 (void)RBFPR;
63 assert(&AArch64::FPRRegBank == &RBFPR &&
64 "The order in RegBanks is messed up");
66 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
67 (void)RBCCR;
68 assert(&AArch64::CCRegBank == &RBCCR &&
69 "The order in RegBanks is messed up");
71 // The GPR register bank is fully defined by all the registers in
72 // GR64all + its subclasses.
73 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
74 "Subclass not added?");
75 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
77 // The FPR register bank is fully defined by all the registers in
78 // GR64all + its subclasses.
79 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
80 "Subclass not added?");
81 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
82 "Subclass not added?");
83 assert(RBFPR.getSize() == 512 &&
84 "FPRs should hold up to 512-bit via QQQQ sequence");
86 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
87 "Class not added?");
88 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
90 // Check that the TableGen'ed like file is in sync we our expectations.
91 // First, the Idx.
92 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
93 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
94 "PartialMappingIdx's are incorrectly ordered");
95 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
96 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
97 PMI_FPR256, PMI_FPR512}) &&
98 "PartialMappingIdx's are incorrectly ordered");
99 // Now, the content.
100 // Check partial mapping.
101 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
102 do { \
103 assert( \
104 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
105 #Idx " is incorrectly initialized"); \
106 } while (false)
108 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
109 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
110 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
111 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
112 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
113 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
114 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
115 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
118 // Check value mapping.
119 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
120 do { \
121 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
122 PartialMappingIdx::PMI_First##RBName, Size, \
123 Offset) && \
124 #RBName #Size " " #Offset " is incorrectly initialized"); \
125 } while (false)
127 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
129 CHECK_VALUEMAP(GPR, 32);
130 CHECK_VALUEMAP(GPR, 64);
131 CHECK_VALUEMAP(GPR, 128);
132 CHECK_VALUEMAP(FPR, 16);
133 CHECK_VALUEMAP(FPR, 32);
134 CHECK_VALUEMAP(FPR, 64);
135 CHECK_VALUEMAP(FPR, 128);
136 CHECK_VALUEMAP(FPR, 256);
137 CHECK_VALUEMAP(FPR, 512);
139 // Check the value mapping for 3-operands instructions where all the operands
140 // map to the same value mapping.
141 #define CHECK_VALUEMAP_3OPS(RBName, Size) \
142 do { \
143 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
144 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
145 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
146 } while (false)
148 CHECK_VALUEMAP_3OPS(GPR, 32);
149 CHECK_VALUEMAP_3OPS(GPR, 64);
150 CHECK_VALUEMAP_3OPS(GPR, 128);
151 CHECK_VALUEMAP_3OPS(FPR, 32);
152 CHECK_VALUEMAP_3OPS(FPR, 64);
153 CHECK_VALUEMAP_3OPS(FPR, 128);
154 CHECK_VALUEMAP_3OPS(FPR, 256);
155 CHECK_VALUEMAP_3OPS(FPR, 512);
157 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
158 do { \
159 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
160 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
161 (void)PartialMapDstIdx; \
162 (void)PartialMapSrcIdx; \
163 const ValueMapping *Map = getCopyMapping( \
164 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
165 (void)Map; \
166 assert(Map[0].BreakDown == \
167 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
168 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
169 " Dst is incorrectly initialized"); \
170 assert(Map[1].BreakDown == \
171 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
172 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
173 " Src is incorrectly initialized"); \
175 } while (false)
177 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
178 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
179 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
180 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
181 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
182 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
183 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
184 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
186 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
187 do { \
188 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
189 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
190 (void)PartialMapDstIdx; \
191 (void)PartialMapSrcIdx; \
192 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
193 (void)Map; \
194 assert(Map[0].BreakDown == \
195 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
196 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
197 " Dst is incorrectly initialized"); \
198 assert(Map[1].BreakDown == \
199 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
200 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
201 " Src is incorrectly initialized"); \
203 } while (false)
205 CHECK_VALUEMAP_FPEXT(32, 16);
206 CHECK_VALUEMAP_FPEXT(64, 16);
207 CHECK_VALUEMAP_FPEXT(64, 32);
208 CHECK_VALUEMAP_FPEXT(128, 64);
210 assert(verify(TRI) && "Invalid register bank information");
213 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
216 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
217 const RegisterBank &B,
218 unsigned Size) const {
219 // What do we do with different size?
220 // copy are same size.
221 // Will introduce other hooks for different size:
222 // * extract cost.
223 // * build_sequence cost.
225 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
226 // FIXME: This should be deduced from the scheduling model.
227 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
228 // FMOVXDr or FMOVWSr.
229 return 5;
230 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
231 // FMOVDXr or FMOVSWr.
232 return 4;
234 return RegisterBankInfo::copyCost(A, B, Size);
237 const RegisterBank &
238 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
239 LLT) const {
240 switch (RC.getID()) {
241 case AArch64::FPR8RegClassID:
242 case AArch64::FPR16RegClassID:
243 case AArch64::FPR16_loRegClassID:
244 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
245 case AArch64::FPR32RegClassID:
246 case AArch64::FPR64RegClassID:
247 case AArch64::FPR64_loRegClassID:
248 case AArch64::FPR128RegClassID:
249 case AArch64::FPR128_loRegClassID:
250 case AArch64::DDRegClassID:
251 case AArch64::DDDRegClassID:
252 case AArch64::DDDDRegClassID:
253 case AArch64::QQRegClassID:
254 case AArch64::QQQRegClassID:
255 case AArch64::QQQQRegClassID:
256 return getRegBank(AArch64::FPRRegBankID);
257 case AArch64::GPR32commonRegClassID:
258 case AArch64::GPR32RegClassID:
259 case AArch64::GPR32spRegClassID:
260 case AArch64::GPR32sponlyRegClassID:
261 case AArch64::GPR32argRegClassID:
262 case AArch64::GPR32allRegClassID:
263 case AArch64::GPR64commonRegClassID:
264 case AArch64::GPR64RegClassID:
265 case AArch64::GPR64spRegClassID:
266 case AArch64::GPR64sponlyRegClassID:
267 case AArch64::GPR64argRegClassID:
268 case AArch64::GPR64allRegClassID:
269 case AArch64::GPR64noipRegClassID:
270 case AArch64::GPR64common_and_GPR64noipRegClassID:
271 case AArch64::GPR64noip_and_tcGPR64RegClassID:
272 case AArch64::tcGPR64RegClassID:
273 case AArch64::rtcGPR64RegClassID:
274 case AArch64::WSeqPairsClassRegClassID:
275 case AArch64::XSeqPairsClassRegClassID:
276 case AArch64::MatrixIndexGPR32_12_15RegClassID:
277 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
278 return getRegBank(AArch64::GPRRegBankID);
279 case AArch64::CCRRegClassID:
280 return getRegBank(AArch64::CCRegBankID);
281 default:
282 llvm_unreachable("Register class not supported");
286 RegisterBankInfo::InstructionMappings
287 AArch64RegisterBankInfo::getInstrAlternativeMappings(
288 const MachineInstr &MI) const {
289 const MachineFunction &MF = *MI.getParent()->getParent();
290 const TargetSubtargetInfo &STI = MF.getSubtarget();
291 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
292 const MachineRegisterInfo &MRI = MF.getRegInfo();
294 switch (MI.getOpcode()) {
295 case TargetOpcode::G_OR: {
296 // 32 and 64-bit or can be mapped on either FPR or
297 // GPR for the same cost.
298 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
299 if (Size != 32 && Size != 64)
300 break;
302 // If the instruction has any implicit-defs or uses,
303 // do not mess with it.
304 if (MI.getNumOperands() != 3)
305 break;
306 InstructionMappings AltMappings;
307 const InstructionMapping &GPRMapping = getInstructionMapping(
308 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
309 /*NumOperands*/ 3);
310 const InstructionMapping &FPRMapping = getInstructionMapping(
311 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
312 /*NumOperands*/ 3);
314 AltMappings.push_back(&GPRMapping);
315 AltMappings.push_back(&FPRMapping);
316 return AltMappings;
318 case TargetOpcode::G_BITCAST: {
319 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
320 if (Size != 32 && Size != 64)
321 break;
323 // If the instruction has any implicit-defs or uses,
324 // do not mess with it.
325 if (MI.getNumOperands() != 2)
326 break;
328 InstructionMappings AltMappings;
329 const InstructionMapping &GPRMapping = getInstructionMapping(
330 /*ID*/ 1, /*Cost*/ 1,
331 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
332 /*NumOperands*/ 2);
333 const InstructionMapping &FPRMapping = getInstructionMapping(
334 /*ID*/ 2, /*Cost*/ 1,
335 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
336 /*NumOperands*/ 2);
337 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
338 /*ID*/ 3,
339 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
340 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
341 /*NumOperands*/ 2);
342 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
343 /*ID*/ 3,
344 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
345 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
346 /*NumOperands*/ 2);
348 AltMappings.push_back(&GPRMapping);
349 AltMappings.push_back(&FPRMapping);
350 AltMappings.push_back(&GPRToFPRMapping);
351 AltMappings.push_back(&FPRToGPRMapping);
352 return AltMappings;
354 case TargetOpcode::G_LOAD: {
355 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
356 if (Size != 64)
357 break;
359 // If the instruction has any implicit-defs or uses,
360 // do not mess with it.
361 if (MI.getNumOperands() != 2)
362 break;
364 InstructionMappings AltMappings;
365 const InstructionMapping &GPRMapping = getInstructionMapping(
366 /*ID*/ 1, /*Cost*/ 1,
367 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
368 // Addresses are GPR 64-bit.
369 getValueMapping(PMI_FirstGPR, 64)}),
370 /*NumOperands*/ 2);
371 const InstructionMapping &FPRMapping = getInstructionMapping(
372 /*ID*/ 2, /*Cost*/ 1,
373 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
374 // Addresses are GPR 64-bit.
375 getValueMapping(PMI_FirstGPR, 64)}),
376 /*NumOperands*/ 2);
378 AltMappings.push_back(&GPRMapping);
379 AltMappings.push_back(&FPRMapping);
380 return AltMappings;
382 default:
383 break;
385 return RegisterBankInfo::getInstrAlternativeMappings(MI);
388 void AArch64RegisterBankInfo::applyMappingImpl(
389 const OperandsMapper &OpdMapper) const {
390 switch (OpdMapper.getMI().getOpcode()) {
391 case TargetOpcode::G_OR:
392 case TargetOpcode::G_BITCAST:
393 case TargetOpcode::G_LOAD:
394 // Those ID must match getInstrAlternativeMappings.
395 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
396 OpdMapper.getInstrMapping().getID() <= 4) &&
397 "Don't know how to handle that ID");
398 return applyDefaultMapping(OpdMapper);
399 default:
400 llvm_unreachable("Don't know how to handle that operation");
404 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
405 /// having only floating-point operands.
406 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
407 switch (Opc) {
408 case TargetOpcode::G_FADD:
409 case TargetOpcode::G_FSUB:
410 case TargetOpcode::G_FMUL:
411 case TargetOpcode::G_FMA:
412 case TargetOpcode::G_FDIV:
413 case TargetOpcode::G_FCONSTANT:
414 case TargetOpcode::G_FPEXT:
415 case TargetOpcode::G_FPTRUNC:
416 case TargetOpcode::G_FCEIL:
417 case TargetOpcode::G_FFLOOR:
418 case TargetOpcode::G_FNEARBYINT:
419 case TargetOpcode::G_FNEG:
420 case TargetOpcode::G_FCOS:
421 case TargetOpcode::G_FSIN:
422 case TargetOpcode::G_FLOG10:
423 case TargetOpcode::G_FLOG:
424 case TargetOpcode::G_FLOG2:
425 case TargetOpcode::G_FSQRT:
426 case TargetOpcode::G_FABS:
427 case TargetOpcode::G_FEXP:
428 case TargetOpcode::G_FRINT:
429 case TargetOpcode::G_INTRINSIC_TRUNC:
430 case TargetOpcode::G_INTRINSIC_ROUND:
431 case TargetOpcode::G_FMAXNUM:
432 case TargetOpcode::G_FMINNUM:
433 case TargetOpcode::G_FMAXIMUM:
434 case TargetOpcode::G_FMINIMUM:
435 return true;
437 return false;
440 const RegisterBankInfo::InstructionMapping &
441 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
442 const MachineInstr &MI) const {
443 const unsigned Opc = MI.getOpcode();
444 const MachineFunction &MF = *MI.getParent()->getParent();
445 const MachineRegisterInfo &MRI = MF.getRegInfo();
447 unsigned NumOperands = MI.getNumOperands();
448 assert(NumOperands <= 3 &&
449 "This code is for instructions with 3 or less operands");
451 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
452 unsigned Size = Ty.getSizeInBits();
453 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
455 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
457 #ifndef NDEBUG
458 // Make sure all the operands are using similar size and type.
459 // Should probably be checked by the machine verifier.
460 // This code won't catch cases where the number of lanes is
461 // different between the operands.
462 // If we want to go to that level of details, it is probably
463 // best to check that the types are the same, period.
464 // Currently, we just check that the register banks are the same
465 // for each types.
466 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
467 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
468 assert(
469 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
470 RBIdx, OpTy.getSizeInBits()) ==
471 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
472 "Operand has incompatible size");
473 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
474 (void)OpIsFPR;
475 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
477 #endif // End NDEBUG.
479 return getInstructionMapping(DefaultMappingID, 1,
480 getValueMapping(RBIdx, Size), NumOperands);
483 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
484 static bool isFPIntrinsic(unsigned ID) {
485 // TODO: Add more intrinsics.
486 switch (ID) {
487 default:
488 return false;
489 case Intrinsic::aarch64_neon_uaddlv:
490 return true;
494 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
495 const MachineRegisterInfo &MRI,
496 const TargetRegisterInfo &TRI,
497 unsigned Depth) const {
498 unsigned Op = MI.getOpcode();
499 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
500 return true;
502 // Do we have an explicit floating point instruction?
503 if (isPreISelGenericFloatingPointOpcode(Op))
504 return true;
506 // No. Check if we have a copy-like instruction. If we do, then we could
507 // still be fed by floating point instructions.
508 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
509 !isPreISelGenericOptimizationHint(Op))
510 return false;
512 // Check if we already know the register bank.
513 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
514 if (RB == &AArch64::FPRRegBank)
515 return true;
516 if (RB == &AArch64::GPRRegBank)
517 return false;
519 // We don't know anything.
521 // If we have a phi, we may be able to infer that it will be assigned a FPR
522 // based off of its inputs.
523 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
524 return false;
526 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
527 return Op.isReg() &&
528 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
532 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
533 const MachineRegisterInfo &MRI,
534 const TargetRegisterInfo &TRI,
535 unsigned Depth) const {
536 switch (MI.getOpcode()) {
537 case TargetOpcode::G_FPTOSI:
538 case TargetOpcode::G_FPTOUI:
539 case TargetOpcode::G_FCMP:
540 case TargetOpcode::G_LROUND:
541 case TargetOpcode::G_LLROUND:
542 return true;
543 default:
544 break;
546 return hasFPConstraints(MI, MRI, TRI, Depth);
549 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
550 const MachineRegisterInfo &MRI,
551 const TargetRegisterInfo &TRI,
552 unsigned Depth) const {
553 switch (MI.getOpcode()) {
554 case AArch64::G_DUP:
555 case TargetOpcode::G_SITOFP:
556 case TargetOpcode::G_UITOFP:
557 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
558 case TargetOpcode::G_INSERT_VECTOR_ELT:
559 case TargetOpcode::G_BUILD_VECTOR:
560 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
561 return true;
562 default:
563 break;
565 return hasFPConstraints(MI, MRI, TRI, Depth);
568 const RegisterBankInfo::InstructionMapping &
569 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
570 const unsigned Opc = MI.getOpcode();
572 // Try the default logic for non-generic instructions that are either copies
573 // or already have some operands assigned to banks.
574 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
575 Opc == TargetOpcode::G_PHI) {
576 const RegisterBankInfo::InstructionMapping &Mapping =
577 getInstrMappingImpl(MI);
578 if (Mapping.isValid())
579 return Mapping;
582 const MachineFunction &MF = *MI.getParent()->getParent();
583 const MachineRegisterInfo &MRI = MF.getRegInfo();
584 const TargetSubtargetInfo &STI = MF.getSubtarget();
585 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
587 switch (Opc) {
588 // G_{F|S|U}REM are not listed because they are not legal.
589 // Arithmetic ops.
590 case TargetOpcode::G_ADD:
591 case TargetOpcode::G_SUB:
592 case TargetOpcode::G_PTR_ADD:
593 case TargetOpcode::G_MUL:
594 case TargetOpcode::G_SDIV:
595 case TargetOpcode::G_UDIV:
596 // Bitwise ops.
597 case TargetOpcode::G_AND:
598 case TargetOpcode::G_OR:
599 case TargetOpcode::G_XOR:
600 // Floating point ops.
601 case TargetOpcode::G_FADD:
602 case TargetOpcode::G_FSUB:
603 case TargetOpcode::G_FMUL:
604 case TargetOpcode::G_FDIV:
605 case TargetOpcode::G_FMAXIMUM:
606 case TargetOpcode::G_FMINIMUM:
607 return getSameKindOfOperandsMapping(MI);
608 case TargetOpcode::G_FPEXT: {
609 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
610 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
611 return getInstructionMapping(
612 DefaultMappingID, /*Cost*/ 1,
613 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
614 /*NumOperands*/ 2);
616 // Shifts.
617 case TargetOpcode::G_SHL:
618 case TargetOpcode::G_LSHR:
619 case TargetOpcode::G_ASHR: {
620 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
621 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
622 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
623 return getInstructionMapping(DefaultMappingID, 1,
624 &ValMappings[Shift64Imm], 3);
625 return getSameKindOfOperandsMapping(MI);
627 case TargetOpcode::COPY: {
628 Register DstReg = MI.getOperand(0).getReg();
629 Register SrcReg = MI.getOperand(1).getReg();
630 // Check if one of the register is not a generic register.
631 if ((Register::isPhysicalRegister(DstReg) ||
632 !MRI.getType(DstReg).isValid()) ||
633 (Register::isPhysicalRegister(SrcReg) ||
634 !MRI.getType(SrcReg).isValid())) {
635 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
636 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
637 if (!DstRB)
638 DstRB = SrcRB;
639 else if (!SrcRB)
640 SrcRB = DstRB;
641 // If both RB are null that means both registers are generic.
642 // We shouldn't be here.
643 assert(DstRB && SrcRB && "Both RegBank were nullptr");
644 unsigned Size = getSizeInBits(DstReg, MRI, TRI);
645 return getInstructionMapping(
646 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
647 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
648 // We only care about the mapping of the destination.
649 /*NumOperands*/ 1);
651 // Both registers are generic, use G_BITCAST.
652 LLVM_FALLTHROUGH;
654 case TargetOpcode::G_BITCAST: {
655 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
656 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
657 unsigned Size = DstTy.getSizeInBits();
658 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
659 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
660 const RegisterBank &DstRB =
661 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
662 const RegisterBank &SrcRB =
663 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
664 return getInstructionMapping(
665 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
666 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
667 // We only care about the mapping of the destination for COPY.
668 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
670 default:
671 break;
674 unsigned NumOperands = MI.getNumOperands();
676 // Track the size and bank of each register. We don't do partial mappings.
677 SmallVector<unsigned, 4> OpSize(NumOperands);
678 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
679 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
680 auto &MO = MI.getOperand(Idx);
681 if (!MO.isReg() || !MO.getReg())
682 continue;
684 LLT Ty = MRI.getType(MO.getReg());
685 OpSize[Idx] = Ty.getSizeInBits();
687 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
688 // For floating-point instructions, scalars go in FPRs.
689 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
690 Ty.getSizeInBits() > 64)
691 OpRegBankIdx[Idx] = PMI_FirstFPR;
692 else
693 OpRegBankIdx[Idx] = PMI_FirstGPR;
696 unsigned Cost = 1;
697 // Some of the floating-point instructions have mixed GPR and FPR operands:
698 // fine-tune the computed mapping.
699 switch (Opc) {
700 case AArch64::G_DUP: {
701 Register ScalarReg = MI.getOperand(1).getReg();
702 LLT ScalarTy = MRI.getType(ScalarReg);
703 auto ScalarDef = MRI.getVRegDef(ScalarReg);
704 // s8 is an exception for G_DUP, which we always want on gpr.
705 if (ScalarTy.getSizeInBits() != 8 &&
706 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
707 onlyDefinesFP(*ScalarDef, MRI, TRI)))
708 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
709 else
710 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
711 break;
713 case TargetOpcode::G_TRUNC: {
714 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
715 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
716 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
717 break;
719 case TargetOpcode::G_SITOFP:
720 case TargetOpcode::G_UITOFP: {
721 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
722 break;
723 // Integer to FP conversions don't necessarily happen between GPR -> FPR
724 // regbanks. They can also be done within an FPR register.
725 Register SrcReg = MI.getOperand(1).getReg();
726 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
727 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
728 else
729 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
730 break;
732 case TargetOpcode::G_FPTOSI:
733 case TargetOpcode::G_FPTOUI:
734 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
735 break;
736 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
737 break;
738 case TargetOpcode::G_FCMP: {
739 // If the result is a vector, it must use a FPR.
740 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
741 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
742 : PMI_FirstGPR;
743 OpRegBankIdx = {Idx0,
744 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
745 break;
747 case TargetOpcode::G_BITCAST:
748 // This is going to be a cross register bank copy and this is expensive.
749 if (OpRegBankIdx[0] != OpRegBankIdx[1])
750 Cost = copyCost(
751 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
752 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
753 OpSize[0]);
754 break;
755 case TargetOpcode::G_LOAD:
756 // Loading in vector unit is slightly more expensive.
757 // This is actually only true for the LD1R and co instructions,
758 // but anyway for the fast mode this number does not matter and
759 // for the greedy mode the cost of the cross bank copy will
760 // offset this number.
761 // FIXME: Should be derived from the scheduling model.
762 if (OpRegBankIdx[0] != PMI_FirstGPR) {
763 Cost = 2;
764 break;
767 if (cast<GLoad>(MI).isAtomic()) {
768 // Atomics always use GPR destinations. Don't refine any further.
769 OpRegBankIdx[0] = PMI_FirstGPR;
770 break;
773 // Check if that load feeds fp instructions.
774 // In that case, we want the default mapping to be on FPR
775 // instead of blind map every scalar to GPR.
776 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
777 [&](const MachineInstr &UseMI) {
778 // If we have at least one direct use in a FP instruction,
779 // assume this was a floating point load in the IR. If it was
780 // not, we would have had a bitcast before reaching that
781 // instruction.
783 // Int->FP conversion operations are also captured in
784 // onlyDefinesFP().
785 return onlyUsesFP(UseMI, MRI, TRI) ||
786 onlyDefinesFP(UseMI, MRI, TRI);
788 OpRegBankIdx[0] = PMI_FirstFPR;
789 break;
790 case TargetOpcode::G_STORE:
791 // Check if that store is fed by fp instructions.
792 if (OpRegBankIdx[0] == PMI_FirstGPR) {
793 Register VReg = MI.getOperand(0).getReg();
794 if (!VReg)
795 break;
796 MachineInstr *DefMI = MRI.getVRegDef(VReg);
797 if (onlyDefinesFP(*DefMI, MRI, TRI))
798 OpRegBankIdx[0] = PMI_FirstFPR;
799 break;
801 break;
802 case TargetOpcode::G_SELECT: {
803 // If the destination is FPR, preserve that.
804 if (OpRegBankIdx[0] != PMI_FirstGPR)
805 break;
807 // If we're taking in vectors, we have no choice but to put everything on
808 // FPRs, except for the condition. The condition must always be on a GPR.
809 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
810 if (SrcTy.isVector()) {
811 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
812 break;
815 // Try to minimize the number of copies. If we have more floating point
816 // constrained values than not, then we'll put everything on FPR. Otherwise,
817 // everything has to be on GPR.
818 unsigned NumFP = 0;
820 // Check if the uses of the result always produce floating point values.
822 // For example:
824 // %z = G_SELECT %cond %x %y
825 // fpr = G_FOO %z ...
826 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
827 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
828 ++NumFP;
830 // Check if the defs of the source values always produce floating point
831 // values.
833 // For example:
835 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
836 // %z = G_SELECT %cond %x %y
838 // Also check whether or not the sources have already been decided to be
839 // FPR. Keep track of this.
841 // This doesn't check the condition, since it's just whatever is in NZCV.
842 // This isn't passed explicitly in a register to fcsel/csel.
843 for (unsigned Idx = 2; Idx < 4; ++Idx) {
844 Register VReg = MI.getOperand(Idx).getReg();
845 MachineInstr *DefMI = MRI.getVRegDef(VReg);
846 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
847 onlyDefinesFP(*DefMI, MRI, TRI))
848 ++NumFP;
851 // If we have more FP constraints than not, then move everything over to
852 // FPR.
853 if (NumFP >= 2)
854 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
856 break;
858 case TargetOpcode::G_UNMERGE_VALUES: {
859 // If the first operand belongs to a FPR register bank, then make sure that
860 // we preserve that.
861 if (OpRegBankIdx[0] != PMI_FirstGPR)
862 break;
864 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
865 // UNMERGE into scalars from a vector should always use FPR.
866 // Likewise if any of the uses are FP instructions.
867 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
868 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
869 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
870 // Set the register bank of every operand to FPR.
871 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
872 Idx < NumOperands; ++Idx)
873 OpRegBankIdx[Idx] = PMI_FirstFPR;
875 break;
877 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
878 // Destination and source need to be FPRs.
879 OpRegBankIdx[0] = PMI_FirstFPR;
880 OpRegBankIdx[1] = PMI_FirstFPR;
882 // Index needs to be a GPR.
883 OpRegBankIdx[2] = PMI_FirstGPR;
884 break;
885 case TargetOpcode::G_INSERT_VECTOR_ELT:
886 OpRegBankIdx[0] = PMI_FirstFPR;
887 OpRegBankIdx[1] = PMI_FirstFPR;
889 // The element may be either a GPR or FPR. Preserve that behaviour.
890 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
891 OpRegBankIdx[2] = PMI_FirstFPR;
892 else
893 OpRegBankIdx[2] = PMI_FirstGPR;
895 // Index needs to be a GPR.
896 OpRegBankIdx[3] = PMI_FirstGPR;
897 break;
898 case TargetOpcode::G_EXTRACT: {
899 // For s128 sources we have to use fpr unless we know otherwise.
900 auto Src = MI.getOperand(1).getReg();
901 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
902 if (SrcTy.getSizeInBits() != 128)
903 break;
904 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
905 ? PMI_FirstGPR
906 : PMI_FirstFPR;
907 OpRegBankIdx[0] = Idx;
908 OpRegBankIdx[1] = Idx;
909 break;
911 case TargetOpcode::G_BUILD_VECTOR: {
912 // If the first source operand belongs to a FPR register bank, then make
913 // sure that we preserve that.
914 if (OpRegBankIdx[1] != PMI_FirstGPR)
915 break;
916 Register VReg = MI.getOperand(1).getReg();
917 if (!VReg)
918 break;
920 // Get the instruction that defined the source operand reg, and check if
921 // it's a floating point operation. Or, if it's a type like s16 which
922 // doesn't have a exact size gpr register class. The exception is if the
923 // build_vector has all constant operands, which may be better to leave as
924 // gpr without copies, so it can be matched in imported patterns.
925 MachineInstr *DefMI = MRI.getVRegDef(VReg);
926 unsigned DefOpc = DefMI->getOpcode();
927 const LLT SrcTy = MRI.getType(VReg);
928 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
929 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
930 TargetOpcode::G_CONSTANT;
932 break;
933 if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
934 SrcTy.getSizeInBits() < 32 ||
935 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
936 // Have a floating point op.
937 // Make sure every operand gets mapped to a FPR register class.
938 unsigned NumOperands = MI.getNumOperands();
939 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
940 OpRegBankIdx[Idx] = PMI_FirstFPR;
942 break;
944 case TargetOpcode::G_VECREDUCE_FADD:
945 case TargetOpcode::G_VECREDUCE_FMUL:
946 case TargetOpcode::G_VECREDUCE_FMAX:
947 case TargetOpcode::G_VECREDUCE_FMIN:
948 case TargetOpcode::G_VECREDUCE_ADD:
949 case TargetOpcode::G_VECREDUCE_MUL:
950 case TargetOpcode::G_VECREDUCE_AND:
951 case TargetOpcode::G_VECREDUCE_OR:
952 case TargetOpcode::G_VECREDUCE_XOR:
953 case TargetOpcode::G_VECREDUCE_SMAX:
954 case TargetOpcode::G_VECREDUCE_SMIN:
955 case TargetOpcode::G_VECREDUCE_UMAX:
956 case TargetOpcode::G_VECREDUCE_UMIN:
957 // Reductions produce a scalar value from a vector, the scalar should be on
958 // FPR bank.
959 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
960 break;
961 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
962 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
963 // These reductions also take a scalar accumulator input.
964 // Assign them FPR for now.
965 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
966 break;
967 case TargetOpcode::G_INTRINSIC: {
968 // Check if we know that the intrinsic has any constraints on its register
969 // banks. If it does, then update the mapping accordingly.
970 unsigned ID = MI.getIntrinsicID();
971 unsigned Idx = 0;
972 if (!isFPIntrinsic(ID))
973 break;
974 for (const auto &Op : MI.explicit_operands()) {
975 if (Op.isReg())
976 OpRegBankIdx[Idx] = PMI_FirstFPR;
977 ++Idx;
979 break;
981 case TargetOpcode::G_LROUND:
982 case TargetOpcode::G_LLROUND: {
983 // Source is always floating point and destination is always integer.
984 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
985 break;
989 // Finally construct the computed mapping.
990 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
991 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
992 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
993 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
994 if (!Mapping->isValid())
995 return getInvalidInstructionMapping();
997 OpdsMapping[Idx] = Mapping;
1001 return getInstructionMapping(DefaultMappingID, Cost,
1002 getOperandsMapping(OpdsMapping), NumOperands);