llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

   1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements the targeting of the RegisterBankInfo class for
  10 /// AArch64.
  11 /// \todo This should be generated by TableGen.
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64RegisterBankInfo.h"
  15 #include "AArch64RegisterInfo.h"
  16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  21 #include "llvm/CodeGen/GlobalISel/Utils.h"
  22 #include "llvm/CodeGen/LowLevelTypeUtils.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineInstr.h"
  25 #include "llvm/CodeGen/MachineOperand.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/RegisterBank.h"
  28 #include "llvm/CodeGen/RegisterBankInfo.h"
  29 #include "llvm/CodeGen/TargetOpcodes.h"
  30 #include "llvm/CodeGen/TargetRegisterInfo.h"
  31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  32 #include "llvm/IR/IntrinsicsAArch64.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/Threading.h"
  35 #include <cassert>
  36
  37 #define GET_TARGET_REGBANK_IMPL
  38 #include "AArch64GenRegisterBank.inc"
  39
  40 // This file will be TableGen'ed at some point.
  41 #include "AArch64GenRegisterBankInfo.def"
  42
  43 using namespace llvm;
  44 static const unsigned CustomMappingID = 1;
  45
  46 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
  47     const TargetRegisterInfo &TRI) {
  48   static llvm::once_flag InitializeRegisterBankFlag;
  49
  50   static auto InitializeRegisterBankOnce = [&]() {
  51     // We have only one set of register banks, whatever the subtarget
  52     // is. Therefore, the initialization of the RegBanks table should be
  53     // done only once. Indeed the table of all register banks
  54     // (AArch64::RegBanks) is unique in the compiler. At some point, it
  55     // will get tablegen'ed and the whole constructor becomes empty.
  56
  57     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
  58     (void)RBGPR;
  59     assert(&AArch64::GPRRegBank == &RBGPR &&
  60            "The order in RegBanks is messed up");
  61
  62     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
  63     (void)RBFPR;
  64     assert(&AArch64::FPRRegBank == &RBFPR &&
  65            "The order in RegBanks is messed up");
  66
  67     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
  68     (void)RBCCR;
  69     assert(&AArch64::CCRegBank == &RBCCR &&
  70            "The order in RegBanks is messed up");
  71
  72     // The GPR register bank is fully defined by all the registers in
  73     // GR64all + its subclasses.
  74     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
  75            "Subclass not added?");
  76     assert(getMaximumSize(RBGPR.getID()) == 128 &&
  77            "GPRs should hold up to 128-bit");
  78
  79     // The FPR register bank is fully defined by all the registers in
  80     // GR64all + its subclasses.
  81     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
  82            "Subclass not added?");
  83     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
  84            "Subclass not added?");
  85     assert(getMaximumSize(RBFPR.getID()) == 512 &&
  86            "FPRs should hold up to 512-bit via QQQQ sequence");
  87
  88     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
  89            "Class not added?");
  90     assert(getMaximumSize(RBCCR.getID()) == 32 &&
  91            "CCR should hold up to 32-bit");
  92
  93     // Check that the TableGen'ed like file is in sync we our expectations.
  94     // First, the Idx.
  95     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
  96                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
  97            "PartialMappingIdx's are incorrectly ordered");
  98     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
  99                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
 100                                    PMI_FPR256, PMI_FPR512}) &&
 101            "PartialMappingIdx's are incorrectly ordered");
 102 // Now, the content.
 103 // Check partial mapping.
 104 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
 105   do {                                                                         \
 106     assert(                                                                    \
 107         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
 108         #Idx " is incorrectly initialized");                                   \
 109   } while (false)
 110
 111     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
 112     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
 113     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
 114     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
 115     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
 116     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
 117     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
 118     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
 119     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
 120
 121 // Check value mapping.
 122 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
 123   do {                                                                         \
 124     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
 125                              PartialMappingIdx::PMI_First##RBName, Size,       \
 126                              Offset) &&                                        \
 127            #RBName #Size " " #Offset " is incorrectly initialized");           \
 128   } while (false)
 129
 130 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
 131
 132     CHECK_VALUEMAP(GPR, 32);
 133     CHECK_VALUEMAP(GPR, 64);
 134     CHECK_VALUEMAP(GPR, 128);
 135     CHECK_VALUEMAP(FPR, 16);
 136     CHECK_VALUEMAP(FPR, 32);
 137     CHECK_VALUEMAP(FPR, 64);
 138     CHECK_VALUEMAP(FPR, 128);
 139     CHECK_VALUEMAP(FPR, 256);
 140     CHECK_VALUEMAP(FPR, 512);
 141
 142 // Check the value mapping for 3-operands instructions where all the operands
 143 // map to the same value mapping.
 144 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
 145   do {                                                                         \
 146     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
 147     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
 148     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
 149   } while (false)
 150
 151     CHECK_VALUEMAP_3OPS(GPR, 32);
 152     CHECK_VALUEMAP_3OPS(GPR, 64);
 153     CHECK_VALUEMAP_3OPS(GPR, 128);
 154     CHECK_VALUEMAP_3OPS(FPR, 32);
 155     CHECK_VALUEMAP_3OPS(FPR, 64);
 156     CHECK_VALUEMAP_3OPS(FPR, 128);
 157     CHECK_VALUEMAP_3OPS(FPR, 256);
 158     CHECK_VALUEMAP_3OPS(FPR, 512);
 159
 160 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
 161   do {                                                                         \
 162     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
 163     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
 164     (void)PartialMapDstIdx;                                                    \
 165     (void)PartialMapSrcIdx;                                                    \
 166     const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID,    \
 167                                              AArch64::RBNameSrc##RegBankID,    \
 168                                              TypeSize::getFixed(Size));        \
 169     (void)Map;                                                                 \
 170     assert(Map[0].BreakDown ==                                                 \
 171                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
 172            Map[0].NumBreakDowns == 1 &&                                        \
 173            #RBNameDst #Size " Dst is incorrectly initialized");                \
 174     assert(Map[1].BreakDown ==                                                 \
 175                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
 176            Map[1].NumBreakDowns == 1 &&                                        \
 177            #RBNameSrc #Size " Src is incorrectly initialized");                \
 178                                                                                \
 179   } while (false)
 180
 181     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
 182     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
 183     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
 184     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
 185     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
 186     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
 187     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
 188     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
 189
 190 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
 191   do {                                                                         \
 192     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
 193     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
 194     (void)PartialMapDstIdx;                                                    \
 195     (void)PartialMapSrcIdx;                                                    \
 196     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
 197     (void)Map;                                                                 \
 198     assert(Map[0].BreakDown ==                                                 \
 199                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
 200            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
 201                                         " Dst is incorrectly initialized");    \
 202     assert(Map[1].BreakDown ==                                                 \
 203                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
 204            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
 205                                         " Src is incorrectly initialized");    \
 206                                                                                \
 207   } while (false)
 208
 209     CHECK_VALUEMAP_FPEXT(32, 16);
 210     CHECK_VALUEMAP_FPEXT(64, 16);
 211     CHECK_VALUEMAP_FPEXT(64, 32);
 212     CHECK_VALUEMAP_FPEXT(128, 64);
 213
 214     assert(verify(TRI) && "Invalid register bank information");
 215   };
 216
 217   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
 218 }
 219
 220 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
 221                                            const RegisterBank &B,
 222                                            const TypeSize Size) const {
 223   // What do we do with different size?
 224   // copy are same size.
 225   // Will introduce other hooks for different size:
 226   // * extract cost.
 227   // * build_sequence cost.
 228
 229   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
 230   // FIXME: This should be deduced from the scheduling model.
 231   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
 232     // FMOVXDr or FMOVWSr.
 233     return 5;
 234   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
 235     // FMOVDXr or FMOVSWr.
 236     return 4;
 237
 238   return RegisterBankInfo::copyCost(A, B, Size);
 239 }
 240
 241 const RegisterBank &
 242 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
 243                                                 LLT Ty) const {
 244   switch (RC.getID()) {
 245   case AArch64::GPR64sponlyRegClassID:
 246     return getRegBank(AArch64::GPRRegBankID);
 247   default:
 248     return AArch64GenRegisterBankInfo::getRegBankFromRegClass(RC, Ty);
 249   }
 250 }
 251
 252 RegisterBankInfo::InstructionMappings
 253 AArch64RegisterBankInfo::getInstrAlternativeMappings(
 254     const MachineInstr &MI) const {
 255   const MachineFunction &MF = *MI.getParent()->getParent();
 256   const TargetSubtargetInfo &STI = MF.getSubtarget();
 257   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 258   const MachineRegisterInfo &MRI = MF.getRegInfo();
 259
 260   switch (MI.getOpcode()) {
 261   case TargetOpcode::G_OR: {
 262     // 32 and 64-bit or can be mapped on either FPR or
 263     // GPR for the same cost.
 264     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
 265     if (Size != 32 && Size != 64)
 266       break;
 267
 268     // If the instruction has any implicit-defs or uses,
 269     // do not mess with it.
 270     if (MI.getNumOperands() != 3)
 271       break;
 272     InstructionMappings AltMappings;
 273     const InstructionMapping &GPRMapping = getInstructionMapping(
 274         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
 275         /*NumOperands*/ 3);
 276     const InstructionMapping &FPRMapping = getInstructionMapping(
 277         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
 278         /*NumOperands*/ 3);
 279
 280     AltMappings.push_back(&GPRMapping);
 281     AltMappings.push_back(&FPRMapping);
 282     return AltMappings;
 283   }
 284   case TargetOpcode::G_BITCAST: {
 285     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
 286     if (Size != 32 && Size != 64)
 287       break;
 288
 289     // If the instruction has any implicit-defs or uses,
 290     // do not mess with it.
 291     if (MI.getNumOperands() != 2)
 292       break;
 293
 294     InstructionMappings AltMappings;
 295     const InstructionMapping &GPRMapping = getInstructionMapping(
 296         /*ID*/ 1, /*Cost*/ 1,
 297         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
 298         /*NumOperands*/ 2);
 299     const InstructionMapping &FPRMapping = getInstructionMapping(
 300         /*ID*/ 2, /*Cost*/ 1,
 301         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
 302         /*NumOperands*/ 2);
 303     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
 304         /*ID*/ 3,
 305         /*Cost*/
 306         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
 307                  TypeSize::getFixed(Size)),
 308         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
 309         /*NumOperands*/ 2);
 310     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
 311         /*ID*/ 3,
 312         /*Cost*/
 313         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
 314                  TypeSize::getFixed(Size)),
 315         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
 316         /*NumOperands*/ 2);
 317
 318     AltMappings.push_back(&GPRMapping);
 319     AltMappings.push_back(&FPRMapping);
 320     AltMappings.push_back(&GPRToFPRMapping);
 321     AltMappings.push_back(&FPRToGPRMapping);
 322     return AltMappings;
 323   }
 324   case TargetOpcode::G_LOAD: {
 325     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
 326     if (Size != 64)
 327       break;
 328
 329     // If the instruction has any implicit-defs or uses,
 330     // do not mess with it.
 331     if (MI.getNumOperands() != 2)
 332       break;
 333
 334     InstructionMappings AltMappings;
 335     const InstructionMapping &GPRMapping = getInstructionMapping(
 336         /*ID*/ 1, /*Cost*/ 1,
 337         getOperandsMapping(
 338             {getValueMapping(PMI_FirstGPR, Size),
 339              // Addresses are GPR 64-bit.
 340              getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
 341         /*NumOperands*/ 2);
 342     const InstructionMapping &FPRMapping = getInstructionMapping(
 343         /*ID*/ 2, /*Cost*/ 1,
 344         getOperandsMapping(
 345             {getValueMapping(PMI_FirstFPR, Size),
 346              // Addresses are GPR 64-bit.
 347              getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
 348         /*NumOperands*/ 2);
 349
 350     AltMappings.push_back(&GPRMapping);
 351     AltMappings.push_back(&FPRMapping);
 352     return AltMappings;
 353   }
 354   default:
 355     break;
 356   }
 357   return RegisterBankInfo::getInstrAlternativeMappings(MI);
 358 }
 359
 360 void AArch64RegisterBankInfo::applyMappingImpl(
 361     MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
 362   MachineInstr &MI = OpdMapper.getMI();
 363   MachineRegisterInfo &MRI = OpdMapper.getMRI();
 364
 365   switch (MI.getOpcode()) {
 366   case TargetOpcode::G_OR:
 367   case TargetOpcode::G_BITCAST:
 368   case TargetOpcode::G_LOAD:
 369     // Those ID must match getInstrAlternativeMappings.
 370     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
 371             OpdMapper.getInstrMapping().getID() <= 4) &&
 372            "Don't know how to handle that ID");
 373     return applyDefaultMapping(OpdMapper);
 374   case TargetOpcode::G_INSERT_VECTOR_ELT: {
 375     // Extend smaller gpr operands to 32 bit.
 376     Builder.setInsertPt(*MI.getParent(), MI.getIterator());
 377     auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
 378     MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID));
 379     MI.getOperand(2).setReg(Ext.getReg(0));
 380     return applyDefaultMapping(OpdMapper);
 381   }
 382   case AArch64::G_DUP: {
 383     // Extend smaller gpr to 32-bits
 384     assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
 385            "Expected sources smaller than 32-bits");
 386     Builder.setInsertPt(*MI.getParent(), MI.getIterator());
 387
 388     Register ConstReg;
 389     auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
 390     if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
 391       auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
 392       ConstReg =
 393           Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
 394     } else {
 395       ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
 396                      .getReg(0);
 397     }
 398     MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
 399     MI.getOperand(1).setReg(ConstReg);
 400     return applyDefaultMapping(OpdMapper);
 401   }
 402   default:
 403     llvm_unreachable("Don't know how to handle that operation");
 404   }
 405 }
 406
 407 const RegisterBankInfo::InstructionMapping &
 408 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
 409     const MachineInstr &MI) const {
 410   const unsigned Opc = MI.getOpcode();
 411   const MachineFunction &MF = *MI.getParent()->getParent();
 412   const MachineRegisterInfo &MRI = MF.getRegInfo();
 413
 414   unsigned NumOperands = MI.getNumOperands();
 415   assert(NumOperands <= 3 &&
 416          "This code is for instructions with 3 or less operands");
 417
 418   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
 419   TypeSize Size = Ty.getSizeInBits();
 420   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
 421
 422   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
 423
 424 #ifndef NDEBUG
 425   // Make sure all the operands are using similar size and type.
 426   // Should probably be checked by the machine verifier.
 427   // This code won't catch cases where the number of lanes is
 428   // different between the operands.
 429   // If we want to go to that level of details, it is probably
 430   // best to check that the types are the same, period.
 431   // Currently, we just check that the register banks are the same
 432   // for each types.
 433   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
 434     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
 435     assert(
 436         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
 437             RBIdx, OpTy.getSizeInBits()) ==
 438             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
 439         "Operand has incompatible size");
 440     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
 441     (void)OpIsFPR;
 442     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
 443   }
 444 #endif // End NDEBUG.
 445
 446   return getInstructionMapping(DefaultMappingID, 1,
 447                                getValueMapping(RBIdx, Size), NumOperands);
 448 }
 449
 450 /// \returns true if a given intrinsic only uses and defines FPRs.
 451 static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
 452                           const MachineInstr &MI) {
 453   // TODO: Add more intrinsics.
 454   switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
 455   default:
 456     return false;
 457   case Intrinsic::aarch64_neon_uaddlv:
 458   case Intrinsic::aarch64_neon_uaddv:
 459   case Intrinsic::aarch64_neon_saddv:
 460   case Intrinsic::aarch64_neon_umaxv:
 461   case Intrinsic::aarch64_neon_smaxv:
 462   case Intrinsic::aarch64_neon_uminv:
 463   case Intrinsic::aarch64_neon_sminv:
 464   case Intrinsic::aarch64_neon_faddv:
 465   case Intrinsic::aarch64_neon_fmaxv:
 466   case Intrinsic::aarch64_neon_fminv:
 467   case Intrinsic::aarch64_neon_fmaxnmv:
 468   case Intrinsic::aarch64_neon_fminnmv:
 469     return true;
 470   case Intrinsic::aarch64_neon_saddlv: {
 471     const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
 472     return SrcTy.getElementType().getSizeInBits() >= 16 &&
 473            SrcTy.getElementCount().getFixedValue() >= 4;
 474   }
 475   }
 476 }
 477
 478 bool AArch64RegisterBankInfo::isPHIWithFPContraints(
 479     const MachineInstr &MI, const MachineRegisterInfo &MRI,
 480     const TargetRegisterInfo &TRI, const unsigned Depth) const {
 481   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
 482     return false;
 483
 484   return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
 485                 [&](const MachineInstr &UseMI) {
 486                   if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
 487                     return true;
 488                   return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
 489                 });
 490 }
 491
 492 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
 493                                                const MachineRegisterInfo &MRI,
 494                                                const TargetRegisterInfo &TRI,
 495                                                unsigned Depth) const {
 496   unsigned Op = MI.getOpcode();
 497   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
 498     return true;
 499
 500   // Do we have an explicit floating point instruction?
 501   if (isPreISelGenericFloatingPointOpcode(Op))
 502     return true;
 503
 504   // No. Check if we have a copy-like instruction. If we do, then we could
 505   // still be fed by floating point instructions.
 506   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
 507       !isPreISelGenericOptimizationHint(Op))
 508     return false;
 509
 510   // Check if we already know the register bank.
 511   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
 512   if (RB == &AArch64::FPRRegBank)
 513     return true;
 514   if (RB == &AArch64::GPRRegBank)
 515     return false;
 516
 517   // We don't know anything.
 518   //
 519   // If we have a phi, we may be able to infer that it will be assigned a FPR
 520   // based off of its inputs.
 521   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
 522     return false;
 523
 524   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
 525     return Op.isReg() &&
 526            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
 527   });
 528 }
 529
 530 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
 531                                          const MachineRegisterInfo &MRI,
 532                                          const TargetRegisterInfo &TRI,
 533                                          unsigned Depth) const {
 534   switch (MI.getOpcode()) {
 535   case TargetOpcode::G_FPTOSI:
 536   case TargetOpcode::G_FPTOUI:
 537   case TargetOpcode::G_FPTOSI_SAT:
 538   case TargetOpcode::G_FPTOUI_SAT:
 539   case TargetOpcode::G_FCMP:
 540   case TargetOpcode::G_LROUND:
 541   case TargetOpcode::G_LLROUND:
 542     return true;
 543   default:
 544     break;
 545   }
 546   return hasFPConstraints(MI, MRI, TRI, Depth);
 547 }
 548
 549 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
 550                                             const MachineRegisterInfo &MRI,
 551                                             const TargetRegisterInfo &TRI,
 552                                             unsigned Depth) const {
 553   switch (MI.getOpcode()) {
 554   case AArch64::G_DUP:
 555   case TargetOpcode::G_SITOFP:
 556   case TargetOpcode::G_UITOFP:
 557   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
 558   case TargetOpcode::G_INSERT_VECTOR_ELT:
 559   case TargetOpcode::G_BUILD_VECTOR:
 560   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
 561     return true;
 562   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
 563     switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
 564     case Intrinsic::aarch64_neon_ld1x2:
 565     case Intrinsic::aarch64_neon_ld1x3:
 566     case Intrinsic::aarch64_neon_ld1x4:
 567     case Intrinsic::aarch64_neon_ld2:
 568     case Intrinsic::aarch64_neon_ld2lane:
 569     case Intrinsic::aarch64_neon_ld2r:
 570     case Intrinsic::aarch64_neon_ld3:
 571     case Intrinsic::aarch64_neon_ld3lane:
 572     case Intrinsic::aarch64_neon_ld3r:
 573     case Intrinsic::aarch64_neon_ld4:
 574     case Intrinsic::aarch64_neon_ld4lane:
 575     case Intrinsic::aarch64_neon_ld4r:
 576       return true;
 577     default:
 578       break;
 579     }
 580     break;
 581   default:
 582     break;
 583   }
 584   return hasFPConstraints(MI, MRI, TRI, Depth);
 585 }
 586
 587 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
 588   // GMemOperation because we also want to match indexed loads.
 589   auto *MemOp = cast<GMemOperation>(&MI);
 590   const Value *LdVal = MemOp->getMMO().getValue();
 591   if (!LdVal)
 592     return false;
 593
 594   Type *EltTy = nullptr;
 595   if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
 596     EltTy = GV->getValueType();
 597     // Look at the first element of the struct to determine the type we are
 598     // loading
 599     while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
 600       if (StructEltTy->getNumElements() == 0)
 601         break;
 602       EltTy = StructEltTy->getTypeAtIndex(0U);
 603     }
 604     // Look at the first element of the array to determine its type
 605     if (isa<ArrayType>(EltTy))
 606       EltTy = EltTy->getArrayElementType();
 607   } else {
 608     // FIXME: grubbing around uses is pretty ugly, but with no more
 609     // `getPointerElementType` there's not much else we can do.
 610     for (const auto *LdUser : LdVal->users()) {
 611       if (isa<LoadInst>(LdUser)) {
 612         EltTy = LdUser->getType();
 613         break;
 614       }
 615       if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
 616         EltTy = LdUser->getOperand(0)->getType();
 617         break;
 618       }
 619     }
 620   }
 621   return EltTy && EltTy->isFPOrFPVectorTy();
 622 }
 623
 624 const RegisterBankInfo::InstructionMapping &
 625 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 626   const unsigned Opc = MI.getOpcode();
 627
 628   // Try the default logic for non-generic instructions that are either copies
 629   // or already have some operands assigned to banks.
 630   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
 631       Opc == TargetOpcode::G_PHI) {
 632     const RegisterBankInfo::InstructionMapping &Mapping =
 633         getInstrMappingImpl(MI);
 634     if (Mapping.isValid())
 635       return Mapping;
 636   }
 637
 638   const MachineFunction &MF = *MI.getParent()->getParent();
 639   const MachineRegisterInfo &MRI = MF.getRegInfo();
 640   const TargetSubtargetInfo &STI = MF.getSubtarget();
 641   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 642
 643   switch (Opc) {
 644     // G_{F|S|U}REM are not listed because they are not legal.
 645     // Arithmetic ops.
 646   case TargetOpcode::G_ADD:
 647   case TargetOpcode::G_SUB:
 648   case TargetOpcode::G_PTR_ADD:
 649   case TargetOpcode::G_MUL:
 650   case TargetOpcode::G_SDIV:
 651   case TargetOpcode::G_UDIV:
 652     // Bitwise ops.
 653   case TargetOpcode::G_AND:
 654   case TargetOpcode::G_OR:
 655   case TargetOpcode::G_XOR:
 656     // Floating point ops.
 657   case TargetOpcode::G_FADD:
 658   case TargetOpcode::G_FSUB:
 659   case TargetOpcode::G_FMUL:
 660   case TargetOpcode::G_FDIV:
 661   case TargetOpcode::G_FMAXIMUM:
 662   case TargetOpcode::G_FMINIMUM:
 663     return getSameKindOfOperandsMapping(MI);
 664   case TargetOpcode::G_FPEXT: {
 665     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
 666     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 667     return getInstructionMapping(
 668         DefaultMappingID, /*Cost*/ 1,
 669         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
 670         /*NumOperands*/ 2);
 671   }
 672     // Shifts.
 673   case TargetOpcode::G_SHL:
 674   case TargetOpcode::G_LSHR:
 675   case TargetOpcode::G_ASHR: {
 676     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
 677     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 678     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
 679       return getInstructionMapping(DefaultMappingID, 1,
 680                                    &ValMappings[Shift64Imm], 3);
 681     return getSameKindOfOperandsMapping(MI);
 682   }
 683   case TargetOpcode::COPY: {
 684     Register DstReg = MI.getOperand(0).getReg();
 685     Register SrcReg = MI.getOperand(1).getReg();
 686     // Check if one of the register is not a generic register.
 687     if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
 688         (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
 689       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
 690       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
 691       if (!DstRB)
 692         DstRB = SrcRB;
 693       else if (!SrcRB)
 694         SrcRB = DstRB;
 695       // If both RB are null that means both registers are generic.
 696       // We shouldn't be here.
 697       assert(DstRB && SrcRB && "Both RegBank were nullptr");
 698       TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
 699       return getInstructionMapping(
 700           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
 701           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
 702           // We only care about the mapping of the destination.
 703           /*NumOperands*/ 1);
 704     }
 705     // Both registers are generic, use G_BITCAST.
 706     [[fallthrough]];
 707   }
 708   case TargetOpcode::G_BITCAST: {
 709     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
 710     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 711     TypeSize Size = DstTy.getSizeInBits();
 712     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
 713     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
 714     const RegisterBank &DstRB =
 715         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
 716     const RegisterBank &SrcRB =
 717         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
 718     return getInstructionMapping(
 719         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
 720         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
 721         // We only care about the mapping of the destination for COPY.
 722         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
 723   }
 724   default:
 725     break;
 726   }
 727
 728   unsigned NumOperands = MI.getNumOperands();
 729   unsigned MappingID = DefaultMappingID;
 730
 731   // Track the size and bank of each register.  We don't do partial mappings.
 732   SmallVector<unsigned, 4> OpSize(NumOperands);
 733   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
 734   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
 735     auto &MO = MI.getOperand(Idx);
 736     if (!MO.isReg() || !MO.getReg())
 737       continue;
 738
 739     LLT Ty = MRI.getType(MO.getReg());
 740     if (!Ty.isValid())
 741       continue;
 742     OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
 743
 744     // As a top-level guess, vectors including both scalable and non-scalable
 745     // ones go in FPRs, scalars and pointers in GPRs.
 746     // For floating-point instructions, scalars go in FPRs.
 747     if (Ty.isVector())
 748       OpRegBankIdx[Idx] = PMI_FirstFPR;
 749     else if (isPreISelGenericFloatingPointOpcode(Opc) ||
 750              Ty.getSizeInBits() > 64)
 751       OpRegBankIdx[Idx] = PMI_FirstFPR;
 752     else
 753       OpRegBankIdx[Idx] = PMI_FirstGPR;
 754   }
 755
 756   unsigned Cost = 1;
 757   // Some of the floating-point instructions have mixed GPR and FPR operands:
 758   // fine-tune the computed mapping.
 759   switch (Opc) {
 760   case AArch64::G_DUP: {
 761     Register ScalarReg = MI.getOperand(1).getReg();
 762     LLT ScalarTy = MRI.getType(ScalarReg);
 763     auto ScalarDef = MRI.getVRegDef(ScalarReg);
 764     // We want to select dup(load) into LD1R.
 765     if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
 766       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
 767     // s8 is an exception for G_DUP, which we always want on gpr.
 768     else if (ScalarTy.getSizeInBits() != 8 &&
 769              (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
 770               onlyDefinesFP(*ScalarDef, MRI, TRI)))
 771       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
 772     else {
 773       if (ScalarTy.getSizeInBits() < 32 &&
 774           getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
 775         // Calls applyMappingImpl()
 776         MappingID = CustomMappingID;
 777       }
 778       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
 779     }
 780     break;
 781   }
 782   case TargetOpcode::G_TRUNC: {
 783     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 784     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
 785       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
 786     break;
 787   }
 788   case TargetOpcode::G_SITOFP:
 789   case TargetOpcode::G_UITOFP: {
 790     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
 791       break;
 792     // Integer to FP conversions don't necessarily happen between GPR -> FPR
 793     // regbanks. They can also be done within an FPR register.
 794     Register SrcReg = MI.getOperand(1).getReg();
 795     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
 796       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
 797     else
 798       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
 799     break;
 800   }
 801   case TargetOpcode::G_FPTOSI:
 802   case TargetOpcode::G_FPTOUI:
 803   case TargetOpcode::G_FPTOSI_SAT:
 804   case TargetOpcode::G_FPTOUI_SAT:
 805   case TargetOpcode::G_INTRINSIC_LRINT:
 806   case TargetOpcode::G_INTRINSIC_LLRINT:
 807     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
 808       break;
 809     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
 810     break;
 811   case TargetOpcode::G_FCMP: {
 812     // If the result is a vector, it must use a FPR.
 813     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
 814         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
 815                                                           : PMI_FirstGPR;
 816     OpRegBankIdx = {Idx0,
 817                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
 818     break;
 819   }
 820   case TargetOpcode::G_BITCAST:
 821     // This is going to be a cross register bank copy and this is expensive.
 822     if (OpRegBankIdx[0] != OpRegBankIdx[1])
 823       Cost = copyCost(
 824           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
 825           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
 826           TypeSize::getFixed(OpSize[0]));
 827     break;
 828   case TargetOpcode::G_LOAD: {
 829     // Loading in vector unit is slightly more expensive.
 830     // This is actually only true for the LD1R and co instructions,
 831     // but anyway for the fast mode this number does not matter and
 832     // for the greedy mode the cost of the cross bank copy will
 833     // offset this number.
 834     // FIXME: Should be derived from the scheduling model.
 835     if (OpRegBankIdx[0] != PMI_FirstGPR) {
 836       Cost = 2;
 837       break;
 838     }
 839
 840     if (cast<GLoad>(MI).isAtomic()) {
 841       // Atomics always use GPR destinations. Don't refine any further.
 842       OpRegBankIdx[0] = PMI_FirstGPR;
 843       break;
 844     }
 845
 846     // Try to guess the type of the load from the MMO.
 847     if (isLoadFromFPType(MI)) {
 848       OpRegBankIdx[0] = PMI_FirstFPR;
 849       break;
 850     }
 851
 852     // Check if that load feeds fp instructions.
 853     // In that case, we want the default mapping to be on FPR
 854     // instead of blind map every scalar to GPR.
 855     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
 856                [&](const MachineInstr &UseMI) {
 857                  // If we have at least one direct or indirect use
 858                  // in a FP instruction,
 859                  // assume this was a floating point load in the IR. If it was
 860                  // not, we would have had a bitcast before reaching that
 861                  // instruction.
 862                  //
 863                  // Int->FP conversion operations are also captured in
 864                  // onlyDefinesFP().
 865
 866                  if (isPHIWithFPContraints(UseMI, MRI, TRI))
 867                    return true;
 868
 869                  return onlyUsesFP(UseMI, MRI, TRI) ||
 870                         onlyDefinesFP(UseMI, MRI, TRI);
 871                }))
 872       OpRegBankIdx[0] = PMI_FirstFPR;
 873     break;
 874   }
 875   case TargetOpcode::G_STORE:
 876     // Check if that store is fed by fp instructions.
 877     if (OpRegBankIdx[0] == PMI_FirstGPR) {
 878       Register VReg = MI.getOperand(0).getReg();
 879       if (!VReg)
 880         break;
 881       MachineInstr *DefMI = MRI.getVRegDef(VReg);
 882       if (onlyDefinesFP(*DefMI, MRI, TRI))
 883         OpRegBankIdx[0] = PMI_FirstFPR;
 884       break;
 885     }
 886     break;
 887   case TargetOpcode::G_INDEXED_STORE:
 888     if (OpRegBankIdx[1] == PMI_FirstGPR) {
 889       Register VReg = MI.getOperand(1).getReg();
 890       if (!VReg)
 891         break;
 892       MachineInstr *DefMI = MRI.getVRegDef(VReg);
 893       if (onlyDefinesFP(*DefMI, MRI, TRI))
 894         OpRegBankIdx[1] = PMI_FirstFPR;
 895       break;
 896     }
 897     break;
 898   case TargetOpcode::G_INDEXED_SEXTLOAD:
 899   case TargetOpcode::G_INDEXED_ZEXTLOAD:
 900     // These should always be GPR.
 901     OpRegBankIdx[0] = PMI_FirstGPR;
 902     break;
 903   case TargetOpcode::G_INDEXED_LOAD: {
 904     if (isLoadFromFPType(MI))
 905       OpRegBankIdx[0] = PMI_FirstFPR;
 906     break;
 907   }
 908   case TargetOpcode::G_SELECT: {
 909     // If the destination is FPR, preserve that.
 910     if (OpRegBankIdx[0] != PMI_FirstGPR)
 911       break;
 912
 913     // If we're taking in vectors, we have no choice but to put everything on
 914     // FPRs, except for the condition. The condition must always be on a GPR.
 915     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
 916     if (SrcTy.isVector()) {
 917       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
 918       break;
 919     }
 920
 921     // Try to minimize the number of copies. If we have more floating point
 922     // constrained values than not, then we'll put everything on FPR. Otherwise,
 923     // everything has to be on GPR.
 924     unsigned NumFP = 0;
 925
 926     // Check if the uses of the result always produce floating point values.
 927     //
 928     // For example:
 929     //
 930     // %z = G_SELECT %cond %x %y
 931     // fpr = G_FOO %z ...
 932     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
 933                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
 934       ++NumFP;
 935
 936     // Check if the defs of the source values always produce floating point
 937     // values.
 938     //
 939     // For example:
 940     //
 941     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
 942     // %z = G_SELECT %cond %x %y
 943     //
 944     // Also check whether or not the sources have already been decided to be
 945     // FPR. Keep track of this.
 946     //
 947     // This doesn't check the condition, since it's just whatever is in NZCV.
 948     // This isn't passed explicitly in a register to fcsel/csel.
 949     for (unsigned Idx = 2; Idx < 4; ++Idx) {
 950       Register VReg = MI.getOperand(Idx).getReg();
 951       MachineInstr *DefMI = MRI.getVRegDef(VReg);
 952       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
 953           onlyDefinesFP(*DefMI, MRI, TRI))
 954         ++NumFP;
 955     }
 956
 957     // If we have more FP constraints than not, then move everything over to
 958     // FPR.
 959     if (NumFP >= 2)
 960       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
 961
 962     break;
 963   }
 964   case TargetOpcode::G_UNMERGE_VALUES: {
 965     // If the first operand belongs to a FPR register bank, then make sure that
 966     // we preserve that.
 967     if (OpRegBankIdx[0] != PMI_FirstGPR)
 968       break;
 969
 970     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
 971     // UNMERGE into scalars from a vector should always use FPR.
 972     // Likewise if any of the uses are FP instructions.
 973     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
 974         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
 975                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
 976       // Set the register bank of every operand to FPR.
 977       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
 978            Idx < NumOperands; ++Idx)
 979         OpRegBankIdx[Idx] = PMI_FirstFPR;
 980     }
 981     break;
 982   }
 983   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
 984     // Destination and source need to be FPRs.
 985     OpRegBankIdx[0] = PMI_FirstFPR;
 986     OpRegBankIdx[1] = PMI_FirstFPR;
 987
 988     // Index needs to be a GPR.
 989     OpRegBankIdx[2] = PMI_FirstGPR;
 990     break;
 991   case TargetOpcode::G_INSERT_VECTOR_ELT:
 992     OpRegBankIdx[0] = PMI_FirstFPR;
 993     OpRegBankIdx[1] = PMI_FirstFPR;
 994
 995     // The element may be either a GPR or FPR. Preserve that behaviour.
 996     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
 997       OpRegBankIdx[2] = PMI_FirstFPR;
 998     else {
 999       // If the type is i8/i16, and the regank will be GPR, then we change the
1000       // type to i32 in applyMappingImpl.
1001       LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1002       if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
1003         // Calls applyMappingImpl()
1004         MappingID = CustomMappingID;
1005       }
1006       OpRegBankIdx[2] = PMI_FirstGPR;
1007     }
1008
1009     // Index needs to be a GPR.
1010     OpRegBankIdx[3] = PMI_FirstGPR;
1011     break;
1012   case TargetOpcode::G_EXTRACT: {
1013     // For s128 sources we have to use fpr unless we know otherwise.
1014     auto Src = MI.getOperand(1).getReg();
1015     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1016     if (SrcTy.getSizeInBits() != 128)
1017       break;
1018     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1019                    ? PMI_FirstGPR
1020                    : PMI_FirstFPR;
1021     OpRegBankIdx[0] = Idx;
1022     OpRegBankIdx[1] = Idx;
1023     break;
1024   }
1025   case TargetOpcode::G_BUILD_VECTOR: {
1026     // If the first source operand belongs to a FPR register bank, then make
1027     // sure that we preserve that.
1028     if (OpRegBankIdx[1] != PMI_FirstGPR)
1029       break;
1030     Register VReg = MI.getOperand(1).getReg();
1031     if (!VReg)
1032       break;
1033
1034     // Get the instruction that defined the source operand reg, and check if
1035     // it's a floating point operation. Or, if it's a type like s16 which
1036     // doesn't have a exact size gpr register class. The exception is if the
1037     // build_vector has all constant operands, which may be better to leave as
1038     // gpr without copies, so it can be matched in imported patterns.
1039     MachineInstr *DefMI = MRI.getVRegDef(VReg);
1040     unsigned DefOpc = DefMI->getOpcode();
1041     const LLT SrcTy = MRI.getType(VReg);
1042     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1043           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1044                                    TargetOpcode::G_CONSTANT;
1045         }))
1046       break;
1047     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
1048         SrcTy.getSizeInBits() < 32 ||
1049         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1050       // Have a floating point op.
1051       // Make sure every operand gets mapped to a FPR register class.
1052       unsigned NumOperands = MI.getNumOperands();
1053       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1054         OpRegBankIdx[Idx] = PMI_FirstFPR;
1055     }
1056     break;
1057   }
1058   case TargetOpcode::G_VECREDUCE_FADD:
1059   case TargetOpcode::G_VECREDUCE_FMUL:
1060   case TargetOpcode::G_VECREDUCE_FMAX:
1061   case TargetOpcode::G_VECREDUCE_FMIN:
1062   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1063   case TargetOpcode::G_VECREDUCE_FMINIMUM:
1064   case TargetOpcode::G_VECREDUCE_ADD:
1065   case TargetOpcode::G_VECREDUCE_MUL:
1066   case TargetOpcode::G_VECREDUCE_AND:
1067   case TargetOpcode::G_VECREDUCE_OR:
1068   case TargetOpcode::G_VECREDUCE_XOR:
1069   case TargetOpcode::G_VECREDUCE_SMAX:
1070   case TargetOpcode::G_VECREDUCE_SMIN:
1071   case TargetOpcode::G_VECREDUCE_UMAX:
1072   case TargetOpcode::G_VECREDUCE_UMIN:
1073     // Reductions produce a scalar value from a vector, the scalar should be on
1074     // FPR bank.
1075     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1076     break;
1077   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1078   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1079     // These reductions also take a scalar accumulator input.
1080     // Assign them FPR for now.
1081     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1082     break;
1083   case TargetOpcode::G_INTRINSIC:
1084   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1085     // Check if we know that the intrinsic has any constraints on its register
1086     // banks. If it does, then update the mapping accordingly.
1087     unsigned Idx = 0;
1088     if (onlyDefinesFP(MI, MRI, TRI))
1089       for (const auto &Op : MI.defs()) {
1090         if (Op.isReg())
1091           OpRegBankIdx[Idx] = PMI_FirstFPR;
1092         ++Idx;
1093       }
1094     else
1095       Idx += MI.getNumExplicitDefs();
1096
1097     if (onlyUsesFP(MI, MRI, TRI))
1098       for (const auto &Op : MI.explicit_uses()) {
1099         if (Op.isReg())
1100           OpRegBankIdx[Idx] = PMI_FirstFPR;
1101         ++Idx;
1102       }
1103     break;
1104   }
1105   case TargetOpcode::G_LROUND:
1106   case TargetOpcode::G_LLROUND: {
1107     // Source is always floating point and destination is always integer.
1108     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1109     break;
1110   }
1111   }
1112
1113   // Finally construct the computed mapping.
1114   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1115   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1116     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1117       LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1118       if (!Ty.isValid())
1119         continue;
1120       auto Mapping =
1121           getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
1122       if (!Mapping->isValid())
1123         return getInvalidInstructionMapping();
1124
1125       OpdsMapping[Idx] = Mapping;
1126     }
1127   }
1128
1129   return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping),
1130                                NumOperands);
1131 }