llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

   1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AMDGPU.h"
  14 #include "GCNSubtarget.h"
  15 #include "llvm/CodeGen/TargetPassConfig.h"
  16 #include "llvm/IR/IntrinsicsAMDGPU.h"
  17 #include "llvm/IR/IntrinsicsR600.h"
  18 #include "llvm/Target/TargetMachine.h"
  19 #include "llvm/Transforms/IPO/Attributor.h"
  20
  21 #define DEBUG_TYPE "amdgpu-attributor"
  22
  23 using namespace llvm;
  24
  25 static constexpr StringLiteral ImplicitAttrNames[] = {
  26     // X ids unnecessarily propagated to kernels.
  27     "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
  28     "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
  29     "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
  30     "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
  31     "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};
  32
  33 // We do not need to note the x workitem or workgroup id because they are always
  34 // initialized.
  35 //
  36 // TODO: We should not add the attributes if the known compile time workgroup
  37 // size is 1 for y/z.
  38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
  39                                      bool &IsQueuePtr) {
  40   switch (ID) {
  41   case Intrinsic::amdgcn_workitem_id_x:
  42     NonKernelOnly = true;
  43     return "amdgpu-work-item-id-x";
  44   case Intrinsic::amdgcn_workgroup_id_x:
  45     NonKernelOnly = true;
  46     return "amdgpu-work-group-id-x";
  47   case Intrinsic::amdgcn_workitem_id_y:
  48   case Intrinsic::r600_read_tidig_y:
  49     return "amdgpu-work-item-id-y";
  50   case Intrinsic::amdgcn_workitem_id_z:
  51   case Intrinsic::r600_read_tidig_z:
  52     return "amdgpu-work-item-id-z";
  53   case Intrinsic::amdgcn_workgroup_id_y:
  54   case Intrinsic::r600_read_tgid_y:
  55     return "amdgpu-work-group-id-y";
  56   case Intrinsic::amdgcn_workgroup_id_z:
  57   case Intrinsic::r600_read_tgid_z:
  58     return "amdgpu-work-group-id-z";
  59   case Intrinsic::amdgcn_dispatch_ptr:
  60     return "amdgpu-dispatch-ptr";
  61   case Intrinsic::amdgcn_dispatch_id:
  62     return "amdgpu-dispatch-id";
  63   case Intrinsic::amdgcn_kernarg_segment_ptr:
  64     return "amdgpu-kernarg-segment-ptr";
  65   case Intrinsic::amdgcn_implicitarg_ptr:
  66     return "amdgpu-implicitarg-ptr";
  67   case Intrinsic::amdgcn_queue_ptr:
  68   case Intrinsic::amdgcn_is_shared:
  69   case Intrinsic::amdgcn_is_private:
  70     // TODO: Does not require queue ptr on gfx9+
  71   case Intrinsic::trap:
  72   case Intrinsic::debugtrap:
  73     IsQueuePtr = true;
  74     return "amdgpu-queue-ptr";
  75   default:
  76     return "";
  77   }
  78 }
  79
  80 static bool castRequiresQueuePtr(unsigned SrcAS) {
  81   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
  82 }
  83
  84 static bool isDSAddress(const Constant *C) {
  85   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
  86   if (!GV)
  87     return false;
  88   unsigned AS = GV->getAddressSpace();
  89   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
  90 }
  91
  92 class AMDGPUInformationCache : public InformationCache {
  93 public:
  94   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
  95                          BumpPtrAllocator &Allocator,
  96                          SetVector<Function *> *CGSCC, TargetMachine &TM)
  97       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
  98   TargetMachine &TM;
  99
 100   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
 101
 102   /// Check if the subtarget has aperture regs.
 103   bool hasApertureRegs(Function &F) {
 104     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
 105     return ST.hasApertureRegs();
 106   }
 107
 108 private:
 109   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
 110   static bool visitConstExpr(const ConstantExpr *CE) {
 111     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
 112       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
 113       return castRequiresQueuePtr(SrcAS);
 114     }
 115     return false;
 116   }
 117
 118   /// Get the constant access bitmap for \p C.
 119   uint8_t getConstantAccess(const Constant *C) {
 120     auto It = ConstantStatus.find(C);
 121     if (It != ConstantStatus.end())
 122       return It->second;
 123
 124     uint8_t Result = 0;
 125     if (isDSAddress(C))
 126       Result = DS_GLOBAL;
 127
 128     if (const auto *CE = dyn_cast<ConstantExpr>(C))
 129       if (visitConstExpr(CE))
 130         Result |= ADDR_SPACE_CAST;
 131
 132     for (const Use &U : C->operands()) {
 133       const auto *OpC = dyn_cast<Constant>(U);
 134       if (!OpC)
 135         continue;
 136
 137       Result |= getConstantAccess(OpC);
 138     }
 139     return Result;
 140   }
 141
 142 public:
 143   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
 144   bool needsQueuePtr(const Constant *C, Function &Fn) {
 145     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
 146     bool HasAperture = hasApertureRegs(Fn);
 147
 148     // No need to explore the constants.
 149     if (!IsNonEntryFunc && HasAperture)
 150       return false;
 151
 152     uint8_t Access = getConstantAccess(C);
 153
 154     // We need to trap on DS globals in non-entry functions.
 155     if (IsNonEntryFunc && (Access & DS_GLOBAL))
 156       return true;
 157
 158     return !HasAperture && (Access & ADDR_SPACE_CAST);
 159   }
 160
 161 private:
 162   /// Used to determine if the Constant needs a queue ptr attribute.
 163   DenseMap<const Constant *, uint8_t> ConstantStatus;
 164 };
 165
 166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
 167   using Base = StateWrapper<BooleanState, AbstractAttribute>;
 168   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
 169
 170   /// Create an abstract attribute view for the position \p IRP.
 171   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
 172                                             Attributor &A);
 173
 174   /// See AbstractAttribute::getName().
 175   const std::string getName() const override { return "AAAMDAttributes"; }
 176
 177   /// See AbstractAttribute::getIdAddr().
 178   const char *getIdAddr() const override { return &ID; }
 179
 180   /// This function should return true if the type of the \p AA is
 181   /// AAAMDAttributes.
 182   static bool classof(const AbstractAttribute *AA) {
 183     return (AA->getIdAddr() == &ID);
 184   }
 185
 186   virtual const DenseSet<StringRef> &getAttributes() const = 0;
 187
 188   /// Unique ID (due to the unique address)
 189   static const char ID;
 190 };
 191 const char AAAMDAttributes::ID = 0;
 192
 193 struct AAAMDWorkGroupSize
 194     : public StateWrapper<BooleanState, AbstractAttribute> {
 195   using Base = StateWrapper<BooleanState, AbstractAttribute>;
 196   AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
 197
 198   /// Create an abstract attribute view for the position \p IRP.
 199   static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
 200                                                Attributor &A);
 201
 202   /// See AbstractAttribute::getName().
 203   const std::string getName() const override { return "AAAMDWorkGroupSize"; }
 204
 205   /// See AbstractAttribute::getIdAddr().
 206   const char *getIdAddr() const override { return &ID; }
 207
 208   /// This function should return true if the type of the \p AA is
 209   /// AAAMDAttributes.
 210   static bool classof(const AbstractAttribute *AA) {
 211     return (AA->getIdAddr() == &ID);
 212   }
 213
 214   /// Unique ID (due to the unique address)
 215   static const char ID;
 216 };
 217 const char AAAMDWorkGroupSize::ID = 0;
 218
 219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
 220   AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
 221       : AAAMDWorkGroupSize(IRP, A) {}
 222
 223   void initialize(Attributor &A) override {
 224     Function *F = getAssociatedFunction();
 225     CallingConv::ID CC = F->getCallingConv();
 226
 227     if (CC != CallingConv::AMDGPU_KERNEL)
 228       return;
 229
 230     bool InitialValue = false;
 231     if (F->hasFnAttribute("uniform-work-group-size"))
 232       InitialValue = F->getFnAttribute("uniform-work-group-size")
 233                          .getValueAsString()
 234                          .equals("true");
 235
 236     if (InitialValue)
 237       indicateOptimisticFixpoint();
 238     else
 239       indicatePessimisticFixpoint();
 240   }
 241
 242   ChangeStatus updateImpl(Attributor &A) override {
 243     ChangeStatus Change = ChangeStatus::UNCHANGED;
 244
 245     auto CheckCallSite = [&](AbstractCallSite CS) {
 246       Function *Caller = CS.getInstruction()->getFunction();
 247       LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
 248                         << "->" << getAssociatedFunction()->getName() << "\n");
 249
 250       const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
 251           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
 252
 253       Change = Change | clampStateAndIndicateChange(this->getState(),
 254                                                     CallerInfo.getState());
 255
 256       return true;
 257     };
 258
 259     bool AllCallSitesKnown = true;
 260     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
 261       indicatePessimisticFixpoint();
 262
 263     return Change;
 264   }
 265
 266   ChangeStatus manifest(Attributor &A) override {
 267     SmallVector<Attribute, 8> AttrList;
 268     LLVMContext &Ctx = getAssociatedFunction()->getContext();
 269
 270     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
 271                                       getAssumed() ? "true" : "false"));
 272     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
 273                                               /* ForceReplace */ true);
 274   }
 275
 276   bool isValidState() const override {
 277     // This state is always valid, even when the state is false.
 278     return true;
 279   }
 280
 281   const std::string getAsStr() const override {
 282     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
 283   }
 284
 285   /// See AbstractAttribute::trackStatistics()
 286   void trackStatistics() const override {}
 287 };
 288
 289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
 290                                                           Attributor &A) {
 291   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
 292     return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
 293   llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
 294 }
 295
 296 struct AAAMDAttributesFunction : public AAAMDAttributes {
 297   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
 298       : AAAMDAttributes(IRP, A) {}
 299
 300   void initialize(Attributor &A) override {
 301     Function *F = getAssociatedFunction();
 302     CallingConv::ID CC = F->getCallingConv();
 303     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
 304
 305     // Ignore functions with graphics calling conventions, these are currently
 306     // not allowed to have kernel arguments.
 307     if (AMDGPU::isGraphics(F->getCallingConv())) {
 308       indicatePessimisticFixpoint();
 309       return;
 310     }
 311
 312     for (StringRef Attr : ImplicitAttrNames) {
 313       if (F->hasFnAttribute(Attr))
 314         Attributes.insert(Attr);
 315     }
 316
 317     // TODO: We shouldn't need this in the future.
 318     if (CallingConvSupportsAllImplicits &&
 319         F->hasAddressTaken(nullptr, true, true, true)) {
 320       for (StringRef AttrName : ImplicitAttrNames) {
 321         Attributes.insert(AttrName);
 322       }
 323     }
 324   }
 325
 326   ChangeStatus updateImpl(Attributor &A) override {
 327     Function *F = getAssociatedFunction();
 328     ChangeStatus Change = ChangeStatus::UNCHANGED;
 329     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
 330     CallingConv::ID CC = F->getCallingConv();
 331     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
 332     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
 333
 334     auto AddAttribute = [&](StringRef AttrName) {
 335       if (Attributes.insert(AttrName).second)
 336         Change = ChangeStatus::CHANGED;
 337     };
 338
 339     // Check for Intrinsics and propagate attributes.
 340     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
 341         *this, this->getIRPosition(), DepClassTy::REQUIRED);
 342
 343     // We have to assume that we can reach a function with these attributes.
 344     // We do not consider inline assembly as a unknown callee.
 345     if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
 346       for (StringRef AttrName : ImplicitAttrNames) {
 347         AddAttribute(AttrName);
 348       }
 349     }
 350
 351     bool NeedsQueuePtr = false;
 352     bool HasCall = false;
 353     for (Function *Callee : AAEdges.getOptimisticEdges()) {
 354       Intrinsic::ID IID = Callee->getIntrinsicID();
 355       if (IID != Intrinsic::not_intrinsic) {
 356         if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
 357           AddAttribute("amdgpu-kernarg-segment-ptr");
 358           continue;
 359         }
 360
 361         bool NonKernelOnly = false;
 362         StringRef AttrName =
 363             intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
 364
 365         if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
 366           AddAttribute(AttrName);
 367
 368         continue;
 369       }
 370
 371       HasCall = true;
 372       const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
 373           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
 374       const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
 375       // Propagate implicit attributes from called function.
 376       for (StringRef AttrName : ImplicitAttrNames)
 377         if (CalleeAttributes.count(AttrName))
 378           AddAttribute(AttrName);
 379     }
 380
 381     HasCall |= AAEdges.hasUnknownCallee();
 382     if (!IsNonEntryFunc && HasCall)
 383       AddAttribute("amdgpu-calls");
 384
 385     // Check the function body.
 386     auto CheckAlloca = [&](Instruction &I) {
 387       AddAttribute("amdgpu-stack-objects");
 388       return false;
 389     };
 390
 391     bool UsedAssumedInformation = false;
 392     A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
 393                               UsedAssumedInformation);
 394
 395     // If we found that we need amdgpu-queue-ptr, nothing else to do.
 396     if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
 397       AddAttribute("amdgpu-queue-ptr");
 398       return Change;
 399     }
 400
 401     auto CheckAddrSpaceCasts = [&](Instruction &I) {
 402       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
 403       if (castRequiresQueuePtr(SrcAS)) {
 404         NeedsQueuePtr = true;
 405         return false;
 406       }
 407       return true;
 408     };
 409
 410     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
 411
 412     // `checkForAllInstructions` is much more cheaper than going through all
 413     // instructions, try it first.
 414
 415     // amdgpu-queue-ptr is not needed if aperture regs is present.
 416     if (!HasApertureRegs)
 417       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
 418                                 {Instruction::AddrSpaceCast},
 419                                 UsedAssumedInformation);
 420
 421     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
 422     if (NeedsQueuePtr) {
 423       AddAttribute("amdgpu-queue-ptr");
 424       return Change;
 425     }
 426
 427     if (!IsNonEntryFunc && HasApertureRegs)
 428       return Change;
 429
 430     for (BasicBlock &BB : *F) {
 431       for (Instruction &I : BB) {
 432         for (const Use &U : I.operands()) {
 433           if (const auto *C = dyn_cast<Constant>(U)) {
 434             if (InfoCache.needsQueuePtr(C, *F)) {
 435               AddAttribute("amdgpu-queue-ptr");
 436               return Change;
 437             }
 438           }
 439         }
 440       }
 441     }
 442
 443     return Change;
 444   }
 445
 446   ChangeStatus manifest(Attributor &A) override {
 447     SmallVector<Attribute, 8> AttrList;
 448     LLVMContext &Ctx = getAssociatedFunction()->getContext();
 449
 450     for (StringRef AttrName : Attributes)
 451       AttrList.push_back(Attribute::get(Ctx, AttrName));
 452
 453     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
 454                                               /* ForceReplace */ true);
 455   }
 456
 457   const std::string getAsStr() const override {
 458     return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
 459   }
 460
 461   const DenseSet<StringRef> &getAttributes() const override {
 462     return Attributes;
 463   }
 464
 465   /// See AbstractAttribute::trackStatistics()
 466   void trackStatistics() const override {}
 467
 468 private:
 469   DenseSet<StringRef> Attributes;
 470 };
 471
 472 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
 473                                                     Attributor &A) {
 474   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
 475     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
 476   llvm_unreachable("AAAMDAttributes is only valid for function position");
 477 }
 478
 479 class AMDGPUAttributor : public ModulePass {
 480 public:
 481   AMDGPUAttributor() : ModulePass(ID) {}
 482
 483   /// doInitialization - Virtual method overridden by subclasses to do
 484   /// any necessary initialization before any pass is run.
 485   bool doInitialization(Module &) override {
 486     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
 487     if (!TPC)
 488       report_fatal_error("TargetMachine is required");
 489
 490     TM = &TPC->getTM<TargetMachine>();
 491     return false;
 492   }
 493
 494   bool runOnModule(Module &M) override {
 495     SetVector<Function *> Functions;
 496     AnalysisGetter AG;
 497     for (Function &F : M) {
 498       if (!F.isIntrinsic())
 499         Functions.insert(&F);
 500     }
 501
 502     CallGraphUpdater CGUpdater;
 503     BumpPtrAllocator Allocator;
 504     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
 505     Attributor A(Functions, InfoCache, CGUpdater);
 506
 507     for (Function &F : M) {
 508       if (!F.isIntrinsic()) {
 509         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
 510         A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
 511       }
 512     }
 513
 514     ChangeStatus Change = A.run();
 515     return Change == ChangeStatus::CHANGED;
 516   }
 517
 518   StringRef getPassName() const override { return "AMDGPU Attributor"; }
 519   TargetMachine *TM;
 520   static char ID;
 521 };
 522
 523 char AMDGPUAttributor::ID = 0;
 524
 525 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
 526 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)