lib/Target/PowerPC/PPCTargetMachine.cpp

   1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Top-level implementation for the PowerPC target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "PPCTargetMachine.h"
  14 #include "MCTargetDesc/PPCMCTargetDesc.h"
  15 #include "PPC.h"
  16 #include "PPCMachineScheduler.h"
  17 #include "PPCSubtarget.h"
  18 #include "PPCTargetObjectFile.h"
  19 #include "PPCTargetTransformInfo.h"
  20 #include "TargetInfo/PowerPCTargetInfo.h"
  21 #include "llvm/ADT/Optional.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/ADT/Triple.h"
  25 #include "llvm/Analysis/TargetTransformInfo.h"
  26 #include "llvm/CodeGen/Passes.h"
  27 #include "llvm/CodeGen/TargetPassConfig.h"
  28 #include "llvm/CodeGen/MachineScheduler.h"
  29 #include "llvm/IR/Attributes.h"
  30 #include "llvm/IR/DataLayout.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/Pass.h"
  33 #include "llvm/Support/CodeGen.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/TargetRegistry.h"
  36 #include "llvm/Target/TargetLoweringObjectFile.h"
  37 #include "llvm/Target/TargetOptions.h"
  38 #include "llvm/Transforms/Scalar.h"
  39 #include <cassert>
  40 #include <memory>
  41 #include <string>
  42
  43 using namespace llvm;
  44
  45
  46 static cl::opt<bool>
  47     EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
  48                            cl::desc("enable coalescing of duplicate branches for PPC"));
  49 static cl::
  50 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
  51                         cl::desc("Disable CTR loops for PPC"));
  52
  53 static cl::
  54 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
  55                             cl::desc("Disable PPC loop preinc prep"));
  56
  57 static cl::opt<bool>
  58 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
  59   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
  60
  61 static cl::
  62 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
  63                                 cl::desc("Disable VSX Swap Removal for PPC"));
  64
  65 static cl::
  66 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
  67                               cl::desc("Disable QPX load splat simplification"));
  68
  69 static cl::
  70 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
  71                             cl::desc("Disable machine peepholes for PPC"));
  72
  73 static cl::opt<bool>
  74 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
  75              cl::desc("Enable optimizations on complex GEPs"),
  76              cl::init(true));
  77
  78 static cl::opt<bool>
  79 EnablePrefetch("enable-ppc-prefetching",
  80                   cl::desc("disable software prefetching on PPC"),
  81                   cl::init(false), cl::Hidden);
  82
  83 static cl::opt<bool>
  84 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
  85                       cl::desc("Add extra TOC register dependencies"),
  86                       cl::init(true), cl::Hidden);
  87
  88 static cl::opt<bool>
  89 EnableMachineCombinerPass("ppc-machine-combiner",
  90                           cl::desc("Enable the machine combiner pass"),
  91                           cl::init(true), cl::Hidden);
  92
  93 static cl::opt<bool>
  94   ReduceCRLogical("ppc-reduce-cr-logicals",
  95                   cl::desc("Expand eligible cr-logical binary ops to branches"),
  96                   cl::init(false), cl::Hidden);
  97 extern "C" void LLVMInitializePowerPCTarget() {
  98   // Register the targets
  99   RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
 100   RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
 101   RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
 102
 103   PassRegistry &PR = *PassRegistry::getPassRegistry();
 104 #ifndef NDEBUG
 105   initializePPCCTRLoopsVerifyPass(PR);
 106 #endif
 107   initializePPCLoopPreIncPrepPass(PR);
 108   initializePPCTOCRegDepsPass(PR);
 109   initializePPCEarlyReturnPass(PR);
 110   initializePPCVSXCopyPass(PR);
 111   initializePPCVSXFMAMutatePass(PR);
 112   initializePPCVSXSwapRemovalPass(PR);
 113   initializePPCReduceCRLogicalsPass(PR);
 114   initializePPCBSelPass(PR);
 115   initializePPCBranchCoalescingPass(PR);
 116   initializePPCQPXLoadSplatPass(PR);
 117   initializePPCBoolRetToIntPass(PR);
 118   initializePPCExpandISELPass(PR);
 119   initializePPCPreEmitPeepholePass(PR);
 120   initializePPCTLSDynamicCallPass(PR);
 121   initializePPCMIPeepholePass(PR);
 122 }
 123
 124 /// Return the datalayout string of a subtarget.
 125 static std::string getDataLayoutString(const Triple &T) {
 126   bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
 127   std::string Ret;
 128
 129   // Most PPC* platforms are big endian, PPC64LE is little endian.
 130   if (T.getArch() == Triple::ppc64le)
 131     Ret = "e";
 132   else
 133     Ret = "E";
 134
 135   Ret += DataLayout::getManglingComponent(T);
 136
 137   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
 138   // pointers.
 139   if (!is64Bit || T.getOS() == Triple::Lv2)
 140     Ret += "-p:32:32";
 141
 142   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
 143   // documentation are wrong; these are correct (i.e. "what gcc does").
 144   if (is64Bit || !T.isOSDarwin())
 145     Ret += "-i64:64";
 146   else
 147     Ret += "-f64:32:64";
 148
 149   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
 150   if (is64Bit)
 151     Ret += "-n32:64";
 152   else
 153     Ret += "-n32";
 154
 155   return Ret;
 156 }
 157
 158 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
 159                                       const Triple &TT) {
 160   std::string FullFS = FS;
 161
 162   // Make sure 64-bit features are available when CPUname is generic
 163   if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
 164     if (!FullFS.empty())
 165       FullFS = "+64bit," + FullFS;
 166     else
 167       FullFS = "+64bit";
 168   }
 169
 170   if (OL >= CodeGenOpt::Default) {
 171     if (!FullFS.empty())
 172       FullFS = "+crbits," + FullFS;
 173     else
 174       FullFS = "+crbits";
 175   }
 176
 177   if (OL != CodeGenOpt::None) {
 178     if (!FullFS.empty())
 179       FullFS = "+invariant-function-descriptors," + FullFS;
 180     else
 181       FullFS = "+invariant-function-descriptors";
 182   }
 183
 184   return FullFS;
 185 }
 186
 187 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 188   // If it isn't a Mach-O file then it's going to be a linux ELF
 189   // object file.
 190   if (TT.isOSDarwin())
 191     return llvm::make_unique<TargetLoweringObjectFileMachO>();
 192
 193   return llvm::make_unique<PPC64LinuxTargetObjectFile>();
 194 }
 195
 196 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
 197                                                  const TargetOptions &Options) {
 198   if (TT.isOSDarwin())
 199     report_fatal_error("Darwin is no longer supported for PowerPC");
 200
 201   if (Options.MCOptions.getABIName().startswith("elfv1"))
 202     return PPCTargetMachine::PPC_ABI_ELFv1;
 203   else if (Options.MCOptions.getABIName().startswith("elfv2"))
 204     return PPCTargetMachine::PPC_ABI_ELFv2;
 205
 206   assert(Options.MCOptions.getABIName().empty() &&
 207          "Unknown target-abi option!");
 208
 209   if (TT.isMacOSX())
 210     return PPCTargetMachine::PPC_ABI_UNKNOWN;
 211
 212   switch (TT.getArch()) {
 213   case Triple::ppc64le:
 214     return PPCTargetMachine::PPC_ABI_ELFv2;
 215   case Triple::ppc64:
 216     if (TT.getEnvironment() == llvm::Triple::ELFv2)
 217       return PPCTargetMachine::PPC_ABI_ELFv2;
 218     return PPCTargetMachine::PPC_ABI_ELFv1;
 219   default:
 220     return PPCTargetMachine::PPC_ABI_UNKNOWN;
 221   }
 222 }
 223
 224 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
 225                                            Optional<Reloc::Model> RM) {
 226   if (RM.hasValue())
 227     return *RM;
 228
 229   // Darwin defaults to dynamic-no-pic.
 230   if (TT.isOSDarwin())
 231     return Reloc::DynamicNoPIC;
 232
 233   // Big Endian PPC is PIC by default.
 234   if (TT.getArch() == Triple::ppc64)
 235     return Reloc::PIC_;
 236
 237   // Rest are static by default.
 238   return Reloc::Static;
 239 }
 240
 241 static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
 242                                                  Optional<CodeModel::Model> CM,
 243                                                  bool JIT) {
 244   if (CM) {
 245     if (*CM == CodeModel::Tiny)
 246       report_fatal_error("Target does not support the tiny CodeModel", false);
 247     if (*CM == CodeModel::Kernel)
 248       report_fatal_error("Target does not support the kernel CodeModel", false);
 249     return *CM;
 250   }
 251   if (!TT.isOSDarwin() && !JIT &&
 252       (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
 253     return CodeModel::Medium;
 254   return CodeModel::Small;
 255 }
 256
 257
 258 static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
 259   const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
 260   ScheduleDAGMILive *DAG =
 261     new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
 262                           llvm::make_unique<PPCPreRASchedStrategy>(C) :
 263                           llvm::make_unique<GenericScheduler>(C));
 264   // add DAG Mutations here.
 265   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
 266   return DAG;
 267 }
 268
 269 static ScheduleDAGInstrs *createPPCPostMachineScheduler(
 270   MachineSchedContext *C) {
 271   const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
 272   ScheduleDAGMI *DAG =
 273     new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
 274                       llvm::make_unique<PPCPostRASchedStrategy>(C) :
 275                       llvm::make_unique<PostGenericScheduler>(C), true);
 276   // add DAG Mutations here.
 277   return DAG;
 278 }
 279
 280 // The FeatureString here is a little subtle. We are modifying the feature
 281 // string with what are (currently) non-function specific overrides as it goes
 282 // into the LLVMTargetMachine constructor and then using the stored value in the
 283 // Subtarget constructor below it.
 284 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
 285                                    StringRef CPU, StringRef FS,
 286                                    const TargetOptions &Options,
 287                                    Optional<Reloc::Model> RM,
 288                                    Optional<CodeModel::Model> CM,
 289                                    CodeGenOpt::Level OL, bool JIT)
 290     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
 291                         computeFSAdditions(FS, OL, TT), Options,
 292                         getEffectiveRelocModel(TT, RM),
 293                         getEffectivePPCCodeModel(TT, CM, JIT), OL),
 294       TLOF(createTLOF(getTargetTriple())),
 295       TargetABI(computeTargetABI(TT, Options)) {
 296   initAsmInfo();
 297 }
 298
 299 PPCTargetMachine::~PPCTargetMachine() = default;
 300
 301 const PPCSubtarget *
 302 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
 303   Attribute CPUAttr = F.getFnAttribute("target-cpu");
 304   Attribute FSAttr = F.getFnAttribute("target-features");
 305
 306   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
 307                         ? CPUAttr.getValueAsString().str()
 308                         : TargetCPU;
 309   std::string FS = !FSAttr.hasAttribute(Attribute::None)
 310                        ? FSAttr.getValueAsString().str()
 311                        : TargetFS;
 312
 313   // FIXME: This is related to the code below to reset the target options,
 314   // we need to know whether or not the soft float flag is set on the
 315   // function before we can generate a subtarget. We also need to use
 316   // it as a key for the subtarget since that can be the only difference
 317   // between two functions.
 318   bool SoftFloat =
 319       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
 320   // If the soft float attribute is set on the function turn on the soft float
 321   // subtarget feature.
 322   if (SoftFloat)
 323     FS += FS.empty() ? "-hard-float" : ",-hard-float";
 324
 325   auto &I = SubtargetMap[CPU + FS];
 326   if (!I) {
 327     // This needs to be done before we create a new subtarget since any
 328     // creation will depend on the TM and the code generation flags on the
 329     // function that reside in TargetOptions.
 330     resetTargetOptions(F);
 331     I = llvm::make_unique<PPCSubtarget>(
 332         TargetTriple, CPU,
 333         // FIXME: It would be good to have the subtarget additions here
 334         // not necessary. Anything that turns them on/off (overrides) ends
 335         // up being put at the end of the feature string, but the defaults
 336         // shouldn't require adding them. Fixing this means pulling Feature64Bit
 337         // out of most of the target cpus in the .td file and making it set only
 338         // as part of initialization via the TargetTriple.
 339         computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
 340   }
 341   return I.get();
 342 }
 343
 344 //===----------------------------------------------------------------------===//
 345 // Pass Pipeline Configuration
 346 //===----------------------------------------------------------------------===//
 347
 348 namespace {
 349
 350 /// PPC Code Generator Pass Configuration Options.
 351 class PPCPassConfig : public TargetPassConfig {
 352 public:
 353   PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
 354     : TargetPassConfig(TM, PM) {
 355     // At any optimization level above -O0 we use the Machine Scheduler and not
 356     // the default Post RA List Scheduler.
 357     if (TM.getOptLevel() != CodeGenOpt::None)
 358       substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
 359   }
 360
 361   PPCTargetMachine &getPPCTargetMachine() const {
 362     return getTM<PPCTargetMachine>();
 363   }
 364
 365   void addIRPasses() override;
 366   bool addPreISel() override;
 367   bool addILPOpts() override;
 368   bool addInstSelector() override;
 369   void addMachineSSAOptimization() override;
 370   void addPreRegAlloc() override;
 371   void addPreSched2() override;
 372   void addPreEmitPass() override;
 373   ScheduleDAGInstrs *
 374   createMachineScheduler(MachineSchedContext *C) const override {
 375     return createPPCMachineScheduler(C);
 376   }
 377   ScheduleDAGInstrs *
 378   createPostMachineScheduler(MachineSchedContext *C) const override {
 379     return createPPCPostMachineScheduler(C);
 380   }
 381 };
 382
 383 } // end anonymous namespace
 384
 385 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
 386   return new PPCPassConfig(*this, PM);
 387 }
 388
 389 void PPCPassConfig::addIRPasses() {
 390   if (TM->getOptLevel() != CodeGenOpt::None)
 391     addPass(createPPCBoolRetToIntPass());
 392   addPass(createAtomicExpandPass());
 393
 394   // For the BG/Q (or if explicitly requested), add explicit data prefetch
 395   // intrinsics.
 396   bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
 397                         getOptLevel() != CodeGenOpt::None;
 398   if (EnablePrefetch.getNumOccurrences() > 0)
 399     UsePrefetching = EnablePrefetch;
 400   if (UsePrefetching)
 401     addPass(createLoopDataPrefetchPass());
 402
 403   if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
 404     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
 405     // and lower a GEP with multiple indices to either arithmetic operations or
 406     // multiple GEPs with single index.
 407     addPass(createSeparateConstOffsetFromGEPPass(true));
 408     // Call EarlyCSE pass to find and remove subexpressions in the lowered
 409     // result.
 410     addPass(createEarlyCSEPass());
 411     // Do loop invariant code motion in case part of the lowered result is
 412     // invariant.
 413     addPass(createLICMPass());
 414   }
 415
 416   TargetPassConfig::addIRPasses();
 417 }
 418
 419 bool PPCPassConfig::addPreISel() {
 420   if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
 421     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
 422
 423   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
 424     addPass(createHardwareLoopsPass());
 425
 426   return false;
 427 }
 428
 429 bool PPCPassConfig::addILPOpts() {
 430   addPass(&EarlyIfConverterID);
 431
 432   if (EnableMachineCombinerPass)
 433     addPass(&MachineCombinerID);
 434
 435   return true;
 436 }
 437
 438 bool PPCPassConfig::addInstSelector() {
 439   // Install an instruction selector.
 440   addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
 441
 442 #ifndef NDEBUG
 443   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
 444     addPass(createPPCCTRLoopsVerify());
 445 #endif
 446
 447   addPass(createPPCVSXCopyPass());
 448   return false;
 449 }
 450
 451 void PPCPassConfig::addMachineSSAOptimization() {
 452   // PPCBranchCoalescingPass need to be done before machine sinking
 453   // since it merges empty blocks.
 454   if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
 455     addPass(createPPCBranchCoalescingPass());
 456   TargetPassConfig::addMachineSSAOptimization();
 457   // For little endian, remove where possible the vector swap instructions
 458   // introduced at code generation to normalize vector element order.
 459   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
 460       !DisableVSXSwapRemoval)
 461     addPass(createPPCVSXSwapRemovalPass());
 462   // Reduce the number of cr-logical ops.
 463   if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None)
 464     addPass(createPPCReduceCRLogicalsPass());
 465   // Target-specific peephole cleanups performed after instruction
 466   // selection.
 467   if (!DisableMIPeephole) {
 468     addPass(createPPCMIPeepholePass());
 469     addPass(&DeadMachineInstructionElimID);
 470   }
 471 }
 472
 473 void PPCPassConfig::addPreRegAlloc() {
 474   if (getOptLevel() != CodeGenOpt::None) {
 475     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
 476     insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
 477                &PPCVSXFMAMutateID);
 478   }
 479
 480   // FIXME: We probably don't need to run these for -fPIE.
 481   if (getPPCTargetMachine().isPositionIndependent()) {
 482     // FIXME: LiveVariables should not be necessary here!
 483     // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
 484     // LiveVariables. This (unnecessary) dependency has been removed now,
 485     // however a stage-2 clang build fails without LiveVariables computed here.
 486     addPass(&LiveVariablesID, false);
 487     addPass(createPPCTLSDynamicCallPass());
 488   }
 489   if (EnableExtraTOCRegDeps)
 490     addPass(createPPCTOCRegDepsPass());
 491
 492   if (getOptLevel() != CodeGenOpt::None)
 493     addPass(&MachinePipelinerID);
 494 }
 495
 496 void PPCPassConfig::addPreSched2() {
 497   if (getOptLevel() != CodeGenOpt::None) {
 498     addPass(&IfConverterID);
 499
 500     // This optimization must happen after anything that might do store-to-load
 501     // forwarding. Here we're after RA (and, thus, when spills are inserted)
 502     // but before post-RA scheduling.
 503     if (!DisableQPXLoadSplat)
 504       addPass(createPPCQPXLoadSplatPass());
 505   }
 506 }
 507
 508 void PPCPassConfig::addPreEmitPass() {
 509   addPass(createPPCPreEmitPeepholePass());
 510   addPass(createPPCExpandISELPass());
 511
 512   if (getOptLevel() != CodeGenOpt::None)
 513     addPass(createPPCEarlyReturnPass(), false);
 514   // Must run branch selection immediately preceding the asm printer.
 515   addPass(createPPCBranchSelectionPass(), false);
 516 }
 517
 518 TargetTransformInfo
 519 PPCTargetMachine::getTargetTransformInfo(const Function &F) {
 520   return TargetTransformInfo(PPCTTIImpl(this, F));
 521 }
 522
 523 static MachineSchedRegistry
 524 PPCPreRASchedRegistry("ppc-prera",
 525                       "Run PowerPC PreRA specific scheduler",
 526                       createPPCMachineScheduler);
 527
 528 static MachineSchedRegistry
 529 PPCPostRASchedRegistry("ppc-postra",
 530                        "Run PowerPC PostRA specific scheduler",
 531                        createPPCPostMachineScheduler);