llvm/lib/Target/X86/X86TargetMachine.cpp

   1 //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the X86 specific subclass of TargetMachine.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "X86TargetMachine.h"
  14 #include "MCTargetDesc/X86MCTargetDesc.h"
  15 #include "TargetInfo/X86TargetInfo.h"
  16 #include "X86.h"
  17 #include "X86MachineFunctionInfo.h"
  18 #include "X86MacroFusion.h"
  19 #include "X86Subtarget.h"
  20 #include "X86TargetObjectFile.h"
  21 #include "X86TargetTransformInfo.h"
  22 #include "llvm/ADT/SmallString.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/Analysis/TargetTransformInfo.h"
  25 #include "llvm/CodeGen/ExecutionDomainFix.h"
  26 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
  27 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  28 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  29 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  30 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  31 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  32 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  33 #include "llvm/CodeGen/MIRParser/MIParser.h"
  34 #include "llvm/CodeGen/MIRYamlMapping.h"
  35 #include "llvm/CodeGen/MachineScheduler.h"
  36 #include "llvm/CodeGen/Passes.h"
  37 #include "llvm/CodeGen/TargetPassConfig.h"
  38 #include "llvm/IR/Attributes.h"
  39 #include "llvm/IR/DataLayout.h"
  40 #include "llvm/IR/Function.h"
  41 #include "llvm/MC/MCAsmInfo.h"
  42 #include "llvm/MC/TargetRegistry.h"
  43 #include "llvm/Pass.h"
  44 #include "llvm/Support/CodeGen.h"
  45 #include "llvm/Support/CommandLine.h"
  46 #include "llvm/Support/ErrorHandling.h"
  47 #include "llvm/Target/TargetLoweringObjectFile.h"
  48 #include "llvm/Target/TargetOptions.h"
  49 #include "llvm/TargetParser/Triple.h"
  50 #include "llvm/Transforms/CFGuard.h"
  51 #include <memory>
  52 #include <optional>
  53 #include <string>
  54
  55 using namespace llvm;
  56
  57 static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
  58                                cl::desc("Enable the machine combiner pass"),
  59                                cl::init(true), cl::Hidden);
  60
  61 static cl::opt<bool>
  62     EnableTileRAPass("x86-tile-ra",
  63                      cl::desc("Enable the tile register allocation pass"),
  64                      cl::init(true), cl::Hidden);
  65
  66 extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
  67   // Register the target.
  68   RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
  69   RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
  70
  71   PassRegistry &PR = *PassRegistry::getPassRegistry();
  72   initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
  73   initializeX86LowerAMXTypeLegacyPassPass(PR);
  74   initializeX86PreTileConfigPass(PR);
  75   initializeGlobalISel(PR);
  76   initializeWinEHStatePassPass(PR);
  77   initializeFixupBWInstPassPass(PR);
  78   initializeCompressEVEXPassPass(PR);
  79   initializeFixupLEAPassPass(PR);
  80   initializeFPSPass(PR);
  81   initializeX86FixupSetCCPassPass(PR);
  82   initializeX86CallFrameOptimizationPass(PR);
  83   initializeX86CmovConverterPassPass(PR);
  84   initializeX86TileConfigPass(PR);
  85   initializeX86FastPreTileConfigPass(PR);
  86   initializeX86FastTileConfigPass(PR);
  87   initializeKCFIPass(PR);
  88   initializeX86LowerTileCopyPass(PR);
  89   initializeX86ExpandPseudoPass(PR);
  90   initializeX86ExecutionDomainFixPass(PR);
  91   initializeX86DomainReassignmentPass(PR);
  92   initializeX86AvoidSFBPassPass(PR);
  93   initializeX86AvoidTrailingCallPassPass(PR);
  94   initializeX86SpeculativeLoadHardeningPassPass(PR);
  95   initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
  96   initializeX86FlagsCopyLoweringPassPass(PR);
  97   initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
  98   initializeX86LoadValueInjectionRetHardeningPassPass(PR);
  99   initializeX86OptimizeLEAPassPass(PR);
 100   initializeX86PartialReductionPass(PR);
 101   initializePseudoProbeInserterPass(PR);
 102   initializeX86ReturnThunksPass(PR);
 103   initializeX86DAGToDAGISelLegacyPass(PR);
 104   initializeX86ArgumentStackSlotPassPass(PR);
 105   initializeX86FixupInstTuningPassPass(PR);
 106   initializeX86FixupVectorConstantsPassPass(PR);
 107   initializeX86DynAllocaExpanderPass(PR);
 108 }
 109
 110 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 111   if (TT.isOSBinFormatMachO()) {
 112     if (TT.getArch() == Triple::x86_64)
 113       return std::make_unique<X86_64MachoTargetObjectFile>();
 114     return std::make_unique<TargetLoweringObjectFileMachO>();
 115   }
 116
 117   if (TT.isOSBinFormatCOFF())
 118     return std::make_unique<TargetLoweringObjectFileCOFF>();
 119
 120   if (TT.getArch() == Triple::x86_64)
 121     return std::make_unique<X86_64ELFTargetObjectFile>();
 122   return std::make_unique<X86ELFTargetObjectFile>();
 123 }
 124
 125 static std::string computeDataLayout(const Triple &TT) {
 126   // X86 is little endian
 127   std::string Ret = "e";
 128
 129   Ret += DataLayout::getManglingComponent(TT);
 130   // X86 and x32 have 32 bit pointers.
 131   if (!TT.isArch64Bit() || TT.isX32() || TT.isOSNaCl())
 132     Ret += "-p:32:32";
 133
 134   // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers.
 135   Ret += "-p270:32:32-p271:32:32-p272:64:64";
 136
 137   // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
 138   // 128 bit integers are not specified in the 32-bit ABIs but are used
 139   // internally for lowering f128, so we match the alignment to that.
 140   if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
 141     Ret += "-i64:64-i128:128";
 142   else if (TT.isOSIAMCU())
 143     Ret += "-i64:32-f64:32";
 144   else
 145     Ret += "-i128:128-f64:32:64";
 146
 147   // Some ABIs align long double to 128 bits, others to 32.
 148   if (TT.isOSNaCl() || TT.isOSIAMCU())
 149     ; // No f80
 150   else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment())
 151     Ret += "-f80:128";
 152   else
 153     Ret += "-f80:32";
 154
 155   if (TT.isOSIAMCU())
 156     Ret += "-f128:32";
 157
 158   // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
 159   if (TT.isArch64Bit())
 160     Ret += "-n8:16:32:64";
 161   else
 162     Ret += "-n8:16:32";
 163
 164   // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
 165   if ((!TT.isArch64Bit() && TT.isOSWindows()) || TT.isOSIAMCU())
 166     Ret += "-a:0:32-S32";
 167   else
 168     Ret += "-S128";
 169
 170   return Ret;
 171 }
 172
 173 static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT,
 174                                            std::optional<Reloc::Model> RM) {
 175   bool is64Bit = TT.getArch() == Triple::x86_64;
 176   if (!RM) {
 177     // JIT codegen should use static relocations by default, since it's
 178     // typically executed in process and not relocatable.
 179     if (JIT)
 180       return Reloc::Static;
 181
 182     // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
 183     // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
 184     // use static relocation model by default.
 185     if (TT.isOSDarwin()) {
 186       if (is64Bit)
 187         return Reloc::PIC_;
 188       return Reloc::DynamicNoPIC;
 189     }
 190     if (TT.isOSWindows() && is64Bit)
 191       return Reloc::PIC_;
 192     return Reloc::Static;
 193   }
 194
 195   // ELF and X86-64 don't have a distinct DynamicNoPIC model.  DynamicNoPIC
 196   // is defined as a model for code which may be used in static or dynamic
 197   // executables but not necessarily a shared library. On X86-32 we just
 198   // compile in -static mode, in x86-64 we use PIC.
 199   if (*RM == Reloc::DynamicNoPIC) {
 200     if (is64Bit)
 201       return Reloc::PIC_;
 202     if (!TT.isOSDarwin())
 203       return Reloc::Static;
 204   }
 205
 206   // If we are on Darwin, disallow static relocation model in X86-64 mode, since
 207   // the Mach-O file format doesn't support it.
 208   if (*RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
 209     return Reloc::PIC_;
 210
 211   return *RM;
 212 }
 213
 214 static CodeModel::Model
 215 getEffectiveX86CodeModel(const Triple &TT, std::optional<CodeModel::Model> CM,
 216                          bool JIT) {
 217   bool Is64Bit = TT.getArch() == Triple::x86_64;
 218   if (CM) {
 219     if (*CM == CodeModel::Tiny)
 220       report_fatal_error("Target does not support the tiny CodeModel", false);
 221     return *CM;
 222   }
 223   if (JIT)
 224     return Is64Bit ? CodeModel::Large : CodeModel::Small;
 225   return CodeModel::Small;
 226 }
 227
 228 /// Create an X86 target.
 229 ///
 230 X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
 231                                    StringRef CPU, StringRef FS,
 232                                    const TargetOptions &Options,
 233                                    std::optional<Reloc::Model> RM,
 234                                    std::optional<CodeModel::Model> CM,
 235                                    CodeGenOptLevel OL, bool JIT)
 236     : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options,
 237                                getEffectiveRelocModel(TT, JIT, RM),
 238                                getEffectiveX86CodeModel(TT, CM, JIT), OL),
 239       TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) {
 240   // On PS4/PS5, the "return address" of a 'noreturn' call must still be within
 241   // the calling function. Note that this also includes __stack_chk_fail,
 242   // so there was some target-specific logic in the instruction selectors
 243   // to handle that. That code has since been generalized, so the only thing
 244   // needed is to set TrapUnreachable here.
 245   if (TT.isPS() || TT.isOSBinFormatMachO()) {
 246     this->Options.TrapUnreachable = true;
 247     this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
 248   }
 249
 250   setMachineOutliner(true);
 251
 252   // x86 supports the debug entry values.
 253   setSupportsDebugEntryValues(true);
 254
 255   initAsmInfo();
 256 }
 257
 258 X86TargetMachine::~X86TargetMachine() = default;
 259
 260 const X86Subtarget *
 261 X86TargetMachine::getSubtargetImpl(const Function &F) const {
 262   Attribute CPUAttr = F.getFnAttribute("target-cpu");
 263   Attribute TuneAttr = F.getFnAttribute("tune-cpu");
 264   Attribute FSAttr = F.getFnAttribute("target-features");
 265
 266   StringRef CPU =
 267       CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU;
 268   // "x86-64" is a default target setting for many front ends. In these cases,
 269   // they actually request for "generic" tuning unless the "tune-cpu" was
 270   // specified.
 271   StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString()
 272                       : CPU == "x86-64"  ? "generic"
 273                                          : (StringRef)CPU;
 274   StringRef FS =
 275       FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS;
 276
 277   SmallString<512> Key;
 278   // The additions here are ordered so that the definitely short strings are
 279   // added first so we won't exceed the small size. We append the
 280   // much longer FS string at the end so that we only heap allocate at most
 281   // one time.
 282
 283   // Extract prefer-vector-width attribute.
 284   unsigned PreferVectorWidthOverride = 0;
 285   Attribute PreferVecWidthAttr = F.getFnAttribute("prefer-vector-width");
 286   if (PreferVecWidthAttr.isValid()) {
 287     StringRef Val = PreferVecWidthAttr.getValueAsString();
 288     unsigned Width;
 289     if (!Val.getAsInteger(0, Width)) {
 290       Key += 'p';
 291       Key += Val;
 292       PreferVectorWidthOverride = Width;
 293     }
 294   }
 295
 296   // Extract min-legal-vector-width attribute.
 297   unsigned RequiredVectorWidth = UINT32_MAX;
 298   Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width");
 299   if (MinLegalVecWidthAttr.isValid()) {
 300     StringRef Val = MinLegalVecWidthAttr.getValueAsString();
 301     unsigned Width;
 302     if (!Val.getAsInteger(0, Width)) {
 303       Key += 'm';
 304       Key += Val;
 305       RequiredVectorWidth = Width;
 306     }
 307   }
 308
 309   // Add CPU to the Key.
 310   Key += CPU;
 311
 312   // Add tune CPU to the Key.
 313   Key += TuneCPU;
 314
 315   // Keep track of the start of the feature portion of the string.
 316   unsigned FSStart = Key.size();
 317
 318   // FIXME: This is related to the code below to reset the target options,
 319   // we need to know whether or not the soft float flag is set on the
 320   // function before we can generate a subtarget. We also need to use
 321   // it as a key for the subtarget since that can be the only difference
 322   // between two functions.
 323   bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
 324   // If the soft float attribute is set on the function turn on the soft float
 325   // subtarget feature.
 326   if (SoftFloat)
 327     Key += FS.empty() ? "+soft-float" : "+soft-float,";
 328
 329   Key += FS;
 330
 331   // We may have added +soft-float to the features so move the StringRef to
 332   // point to the full string in the Key.
 333   FS = Key.substr(FSStart);
 334
 335   auto &I = SubtargetMap[Key];
 336   if (!I) {
 337     // This needs to be done before we create a new subtarget since any
 338     // creation will depend on the TM and the code generation flags on the
 339     // function that reside in TargetOptions.
 340     resetTargetOptions(F);
 341     I = std::make_unique<X86Subtarget>(
 342         TargetTriple, CPU, TuneCPU, FS, *this,
 343         MaybeAlign(F.getParent()->getOverrideStackAlignment()),
 344         PreferVectorWidthOverride, RequiredVectorWidth);
 345   }
 346   return I.get();
 347 }
 348
 349 yaml::MachineFunctionInfo *X86TargetMachine::createDefaultFuncInfoYAML() const {
 350   return new yaml::X86MachineFunctionInfo();
 351 }
 352
 353 yaml::MachineFunctionInfo *
 354 X86TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
 355   const auto *MFI = MF.getInfo<X86MachineFunctionInfo>();
 356   return new yaml::X86MachineFunctionInfo(*MFI);
 357 }
 358
 359 bool X86TargetMachine::parseMachineFunctionInfo(
 360     const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
 361     SMDiagnostic &Error, SMRange &SourceRange) const {
 362   const auto &YamlMFI = static_cast<const yaml::X86MachineFunctionInfo &>(MFI);
 363   PFS.MF.getInfo<X86MachineFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
 364   return false;
 365 }
 366
 367 bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
 368                                            unsigned DestAS) const {
 369   assert(SrcAS != DestAS && "Expected different address spaces!");
 370   if (getPointerSize(SrcAS) != getPointerSize(DestAS))
 371     return false;
 372   return SrcAS < 256 && DestAS < 256;
 373 }
 374
 375 void X86TargetMachine::reset() { SubtargetMap.clear(); }
 376
 377 //===----------------------------------------------------------------------===//
 378 // X86 TTI query.
 379 //===----------------------------------------------------------------------===//
 380
 381 TargetTransformInfo
 382 X86TargetMachine::getTargetTransformInfo(const Function &F) const {
 383   return TargetTransformInfo(X86TTIImpl(this, F));
 384 }
 385
 386 //===----------------------------------------------------------------------===//
 387 // Pass Pipeline Configuration
 388 //===----------------------------------------------------------------------===//
 389
 390 namespace {
 391
 392 /// X86 Code Generator Pass Configuration Options.
 393 class X86PassConfig : public TargetPassConfig {
 394 public:
 395   X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM)
 396     : TargetPassConfig(TM, PM) {}
 397
 398   X86TargetMachine &getX86TargetMachine() const {
 399     return getTM<X86TargetMachine>();
 400   }
 401
 402   ScheduleDAGInstrs *
 403   createMachineScheduler(MachineSchedContext *C) const override {
 404     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
 405     DAG->addMutation(createX86MacroFusionDAGMutation());
 406     return DAG;
 407   }
 408
 409   ScheduleDAGInstrs *
 410   createPostMachineScheduler(MachineSchedContext *C) const override {
 411     ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
 412     DAG->addMutation(createX86MacroFusionDAGMutation());
 413     return DAG;
 414   }
 415
 416   void addIRPasses() override;
 417   bool addInstSelector() override;
 418   bool addIRTranslator() override;
 419   bool addLegalizeMachineIR() override;
 420   bool addRegBankSelect() override;
 421   bool addGlobalInstructionSelect() override;
 422   bool addILPOpts() override;
 423   bool addPreISel() override;
 424   void addMachineSSAOptimization() override;
 425   void addPreRegAlloc() override;
 426   bool addPostFastRegAllocRewrite() override;
 427   void addPostRegAlloc() override;
 428   void addPreEmitPass() override;
 429   void addPreEmitPass2() override;
 430   void addPreSched2() override;
 431   bool addRegAssignAndRewriteOptimized() override;
 432
 433   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 434 };
 435
 436 class X86ExecutionDomainFix : public ExecutionDomainFix {
 437 public:
 438   static char ID;
 439   X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {}
 440   StringRef getPassName() const override {
 441     return "X86 Execution Dependency Fix";
 442   }
 443 };
 444 char X86ExecutionDomainFix::ID;
 445
 446 } // end anonymous namespace
 447
 448 INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix",
 449   "X86 Execution Domain Fix", false, false)
 450 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
 451 INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix",
 452   "X86 Execution Domain Fix", false, false)
 453
 454 TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
 455   return new X86PassConfig(*this, PM);
 456 }
 457
 458 MachineFunctionInfo *X86TargetMachine::createMachineFunctionInfo(
 459     BumpPtrAllocator &Allocator, const Function &F,
 460     const TargetSubtargetInfo *STI) const {
 461   return X86MachineFunctionInfo::create<X86MachineFunctionInfo>(Allocator, F,
 462                                                                 STI);
 463 }
 464
 465 void X86PassConfig::addIRPasses() {
 466   addPass(createAtomicExpandLegacyPass());
 467
 468   // We add both pass anyway and when these two passes run, we skip the pass
 469   // based on the option level and option attribute.
 470   addPass(createX86LowerAMXIntrinsicsPass());
 471   addPass(createX86LowerAMXTypePass());
 472
 473   TargetPassConfig::addIRPasses();
 474
 475   if (TM->getOptLevel() != CodeGenOptLevel::None) {
 476     addPass(createInterleavedAccessPass());
 477     addPass(createX86PartialReductionPass());
 478   }
 479
 480   // Add passes that handle indirect branch removal and insertion of a retpoline
 481   // thunk. These will be a no-op unless a function subtarget has the retpoline
 482   // feature enabled.
 483   addPass(createIndirectBrExpandPass());
 484
 485   // Add Control Flow Guard checks.
 486   const Triple &TT = TM->getTargetTriple();
 487   if (TT.isOSWindows()) {
 488     if (TT.getArch() == Triple::x86_64) {
 489       addPass(createCFGuardDispatchPass());
 490     } else {
 491       addPass(createCFGuardCheckPass());
 492     }
 493   }
 494
 495   if (TM->Options.JMCInstrument)
 496     addPass(createJMCInstrumenterPass());
 497 }
 498
 499 bool X86PassConfig::addInstSelector() {
 500   // Install an instruction selector.
 501   addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
 502
 503   // For ELF, cleanup any local-dynamic TLS accesses.
 504   if (TM->getTargetTriple().isOSBinFormatELF() &&
 505       getOptLevel() != CodeGenOptLevel::None)
 506     addPass(createCleanupLocalDynamicTLSPass());
 507
 508   addPass(createX86GlobalBaseRegPass());
 509   addPass(createX86ArgumentStackSlotPass());
 510   return false;
 511 }
 512
 513 bool X86PassConfig::addIRTranslator() {
 514   addPass(new IRTranslator(getOptLevel()));
 515   return false;
 516 }
 517
 518 bool X86PassConfig::addLegalizeMachineIR() {
 519   addPass(new Legalizer());
 520   return false;
 521 }
 522
 523 bool X86PassConfig::addRegBankSelect() {
 524   addPass(new RegBankSelect());
 525   return false;
 526 }
 527
 528 bool X86PassConfig::addGlobalInstructionSelect() {
 529   addPass(new InstructionSelect(getOptLevel()));
 530   // Add GlobalBaseReg in case there is no SelectionDAG passes afterwards
 531   if (isGlobalISelAbortEnabled())
 532     addPass(createX86GlobalBaseRegPass());
 533   return false;
 534 }
 535
 536 bool X86PassConfig::addILPOpts() {
 537   addPass(&EarlyIfConverterLegacyID);
 538   if (EnableMachineCombinerPass)
 539     addPass(&MachineCombinerID);
 540   addPass(createX86CmovConverterPass());
 541   return true;
 542 }
 543
 544 bool X86PassConfig::addPreISel() {
 545   // Only add this pass for 32-bit x86 Windows.
 546   const Triple &TT = TM->getTargetTriple();
 547   if (TT.isOSWindows() && TT.getArch() == Triple::x86)
 548     addPass(createX86WinEHStatePass());
 549   return true;
 550 }
 551
 552 void X86PassConfig::addPreRegAlloc() {
 553   if (getOptLevel() != CodeGenOptLevel::None) {
 554     addPass(&LiveRangeShrinkID);
 555     addPass(createX86WinFixupBufferSecurityCheckPass());
 556     addPass(createX86FixupSetCC());
 557     addPass(createX86OptimizeLEAs());
 558     addPass(createX86CallFrameOptimization());
 559     addPass(createX86AvoidStoreForwardingBlocks());
 560   }
 561
 562   addPass(createX86SpeculativeLoadHardeningPass());
 563   addPass(createX86FlagsCopyLoweringPass());
 564   addPass(createX86DynAllocaExpander());
 565
 566   if (getOptLevel() != CodeGenOptLevel::None)
 567     addPass(createX86PreTileConfigPass());
 568   else
 569     addPass(createX86FastPreTileConfigPass());
 570 }
 571
 572 void X86PassConfig::addMachineSSAOptimization() {
 573   addPass(createX86DomainReassignmentPass());
 574   TargetPassConfig::addMachineSSAOptimization();
 575 }
 576
 577 void X86PassConfig::addPostRegAlloc() {
 578   addPass(createX86LowerTileCopyPass());
 579   addPass(createX86FloatingPointStackifierPass());
 580   // When -O0 is enabled, the Load Value Injection Hardening pass will fall back
 581   // to using the Speculative Execution Side Effect Suppression pass for
 582   // mitigation. This is to prevent slow downs due to
 583   // analyses needed by the LVIHardening pass when compiling at -O0.
 584   if (getOptLevel() != CodeGenOptLevel::None)
 585     addPass(createX86LoadValueInjectionLoadHardeningPass());
 586 }
 587
 588 void X86PassConfig::addPreSched2() {
 589   addPass(createX86ExpandPseudoPass());
 590   addPass(createKCFIPass());
 591 }
 592
 593 void X86PassConfig::addPreEmitPass() {
 594   if (getOptLevel() != CodeGenOptLevel::None) {
 595     addPass(new X86ExecutionDomainFix());
 596     addPass(createBreakFalseDeps());
 597   }
 598
 599   addPass(createX86IndirectBranchTrackingPass());
 600
 601   addPass(createX86IssueVZeroUpperPass());
 602
 603   if (getOptLevel() != CodeGenOptLevel::None) {
 604     addPass(createX86FixupBWInsts());
 605     addPass(createX86PadShortFunctions());
 606     addPass(createX86FixupLEAs());
 607     addPass(createX86FixupInstTuning());
 608     addPass(createX86FixupVectorConstants());
 609   }
 610   addPass(createX86CompressEVEXPass());
 611   addPass(createX86DiscriminateMemOpsPass());
 612   addPass(createX86InsertPrefetchPass());
 613   addPass(createX86InsertX87waitPass());
 614 }
 615
 616 void X86PassConfig::addPreEmitPass2() {
 617   const Triple &TT = TM->getTargetTriple();
 618   const MCAsmInfo *MAI = TM->getMCAsmInfo();
 619
 620   // The X86 Speculative Execution Pass must run after all control
 621   // flow graph modifying passes. As a result it was listed to run right before
 622   // the X86 Retpoline Thunks pass. The reason it must run after control flow
 623   // graph modifications is that the model of LFENCE in LLVM has to be updated
 624   // (FIXME: https://bugs.llvm.org/show_bug.cgi?id=45167). Currently the
 625   // placement of this pass was hand checked to ensure that the subsequent
 626   // passes don't move the code around the LFENCEs in a way that will hurt the
 627   // correctness of this pass. This placement has been shown to work based on
 628   // hand inspection of the codegen output.
 629   addPass(createX86SpeculativeExecutionSideEffectSuppression());
 630   addPass(createX86IndirectThunksPass());
 631   addPass(createX86ReturnThunksPass());
 632
 633   // Insert extra int3 instructions after trailing call instructions to avoid
 634   // issues in the unwinder.
 635   if (TT.isOSWindows() && TT.getArch() == Triple::x86_64)
 636     addPass(createX86AvoidTrailingCallPass());
 637
 638   // Verify basic block incoming and outgoing cfa offset and register values and
 639   // correct CFA calculation rule where needed by inserting appropriate CFI
 640   // instructions.
 641   if (!TT.isOSDarwin() &&
 642       (!TT.isOSWindows() ||
 643        MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
 644     addPass(createCFIInstrInserter());
 645
 646   if (TT.isOSWindows()) {
 647     // Identify valid longjmp targets for Windows Control Flow Guard.
 648     addPass(createCFGuardLongjmpPass());
 649     // Identify valid eh continuation targets for Windows EHCont Guard.
 650     addPass(createEHContGuardCatchretPass());
 651   }
 652   addPass(createX86LoadValueInjectionRetHardeningPass());
 653
 654   // Insert pseudo probe annotation for callsite profiling
 655   addPass(createPseudoProbeInserter());
 656
 657   // KCFI indirect call checks are lowered to a bundle, and on Darwin platforms,
 658   // also CALL_RVMARKER.
 659   addPass(createUnpackMachineBundles([&TT](const MachineFunction &MF) {
 660     // Only run bundle expansion if the module uses kcfi, or there are relevant
 661     // ObjC runtime functions present in the module.
 662     const Function &F = MF.getFunction();
 663     const Module *M = F.getParent();
 664     return M->getModuleFlag("kcfi") ||
 665            (TT.isOSDarwin() &&
 666             (M->getFunction("objc_retainAutoreleasedReturnValue") ||
 667              M->getFunction("objc_unsafeClaimAutoreleasedReturnValue")));
 668   }));
 669 }
 670
 671 bool X86PassConfig::addPostFastRegAllocRewrite() {
 672   addPass(createX86FastTileConfigPass());
 673   return true;
 674 }
 675
 676 std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
 677   return getStandardCSEConfigForOpt(TM->getOptLevel());
 678 }
 679
 680 static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI,
 681                                       const MachineRegisterInfo &MRI,
 682                                       const Register Reg) {
 683   const TargetRegisterClass *RC = MRI.getRegClass(Reg);
 684   return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(RC);
 685 }
 686
 687 bool X86PassConfig::addRegAssignAndRewriteOptimized() {
 688   // Don't support tile RA when RA is specified by command line "-regalloc".
 689   if (!isCustomizedRegAlloc() && EnableTileRAPass) {
 690     // Allocate tile register first.
 691     addPass(createGreedyRegisterAllocator(onlyAllocateTileRegisters));
 692     addPass(createX86TileConfigPass());
 693   }
 694   return TargetPassConfig::addRegAssignAndRewriteOptimized();
 695 }