clang/lib/Sema/SemaCUDA.cpp

   1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements semantic analysis for CUDA constructs.
  10 ///
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "clang/Sema/SemaCUDA.h"
  14 #include "clang/AST/ASTContext.h"
  15 #include "clang/AST/Decl.h"
  16 #include "clang/AST/ExprCXX.h"
  17 #include "clang/Basic/Cuda.h"
  18 #include "clang/Basic/TargetInfo.h"
  19 #include "clang/Lex/Preprocessor.h"
  20 #include "clang/Sema/Lookup.h"
  21 #include "clang/Sema/ScopeInfo.h"
  22 #include "clang/Sema/Sema.h"
  23 #include "clang/Sema/Template.h"
  24 #include "llvm/ADT/STLForwardCompat.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include <optional>
  27 using namespace clang;
  28
  29 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {}
  30
  31 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
  32   if (!D)
  33     return false;
  34   if (auto *A = D->getAttr<AttrT>())
  35     return !A->isImplicit();
  36   return false;
  37 }
  38
  39 void SemaCUDA::PushForceHostDevice() {
  40   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  41   ForceHostDeviceDepth++;
  42 }
  43
  44 bool SemaCUDA::PopForceHostDevice() {
  45   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  46   if (ForceHostDeviceDepth == 0)
  47     return false;
  48   ForceHostDeviceDepth--;
  49   return true;
  50 }
  51
  52 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
  53                                          MultiExprArg ExecConfig,
  54                                          SourceLocation GGGLoc) {
  55   FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
  56   if (!ConfigDecl)
  57     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
  58                      << getConfigureFuncName());
  59   QualType ConfigQTy = ConfigDecl->getType();
  60
  61   DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(
  62       getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
  63   SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);
  64
  65   return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
  66                                /*IsExecConfig=*/true);
  67 }
  68
  69 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
  70   bool HasHostAttr = false;
  71   bool HasDeviceAttr = false;
  72   bool HasGlobalAttr = false;
  73   bool HasInvalidTargetAttr = false;
  74   for (const ParsedAttr &AL : Attrs) {
  75     switch (AL.getKind()) {
  76     case ParsedAttr::AT_CUDAGlobal:
  77       HasGlobalAttr = true;
  78       break;
  79     case ParsedAttr::AT_CUDAHost:
  80       HasHostAttr = true;
  81       break;
  82     case ParsedAttr::AT_CUDADevice:
  83       HasDeviceAttr = true;
  84       break;
  85     case ParsedAttr::AT_CUDAInvalidTarget:
  86       HasInvalidTargetAttr = true;
  87       break;
  88     default:
  89       break;
  90     }
  91   }
  92
  93   if (HasInvalidTargetAttr)
  94     return CUDAFunctionTarget::InvalidTarget;
  95
  96   if (HasGlobalAttr)
  97     return CUDAFunctionTarget::Global;
  98
  99   if (HasHostAttr && HasDeviceAttr)
 100     return CUDAFunctionTarget::HostDevice;
 101
 102   if (HasDeviceAttr)
 103     return CUDAFunctionTarget::Device;
 104
 105   return CUDAFunctionTarget::Host;
 106 }
 107
 108 template <typename A>
 109 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
 110   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
 111            return isa<A>(Attribute) &&
 112                   !(IgnoreImplicitAttr && Attribute->isImplicit());
 113          });
 114 }
 115
 116 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
 117     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
 118     : S(S_) {
 119   SavedCtx = S.CurCUDATargetCtx;
 120   assert(K == SemaCUDA::CTCK_InitGlobalVar);
 121   auto *VD = dyn_cast_or_null<VarDecl>(D);
 122   if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
 123     auto Target = CUDAFunctionTarget::Host;
 124     if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
 125          !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
 126         hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
 127         hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
 128       Target = CUDAFunctionTarget::Device;
 129     S.CurCUDATargetCtx = {Target, K, VD};
 130   }
 131 }
 132
 133 /// IdentifyTarget - Determine the CUDA compilation target for this function
 134 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
 135                                             bool IgnoreImplicitHDAttr) {
 136   // Code that lives outside a function gets the target from CurCUDATargetCtx.
 137   if (D == nullptr)
 138     return CurCUDATargetCtx.Target;
 139
 140   if (D->hasAttr<CUDAInvalidTargetAttr>())
 141     return CUDAFunctionTarget::InvalidTarget;
 142
 143   if (D->hasAttr<CUDAGlobalAttr>())
 144     return CUDAFunctionTarget::Global;
 145
 146   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
 147     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
 148       return CUDAFunctionTarget::HostDevice;
 149     return CUDAFunctionTarget::Device;
 150   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
 151     return CUDAFunctionTarget::Host;
 152   } else if ((D->isImplicit() || !D->isUserProvided()) &&
 153              !IgnoreImplicitHDAttr) {
 154     // Some implicit declarations (like intrinsic functions) are not marked.
 155     // Set the most lenient target on them for maximal flexibility.
 156     return CUDAFunctionTarget::HostDevice;
 157   }
 158
 159   return CUDAFunctionTarget::Host;
 160 }
 161
 162 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
 163 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
 164   if (Var->hasAttr<HIPManagedAttr>())
 165     return CVT_Unified;
 166   // Only constexpr and const variabless with implicit constant attribute
 167   // are emitted on both sides. Such variables are promoted to device side
 168   // only if they have static constant intializers on device side.
 169   if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
 170       Var->hasAttr<CUDAConstantAttr>() &&
 171       !hasExplicitAttr<CUDAConstantAttr>(Var))
 172     return CVT_Both;
 173   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
 174       Var->hasAttr<CUDASharedAttr>() ||
 175       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
 176       Var->getType()->isCUDADeviceBuiltinTextureType())
 177     return CVT_Device;
 178   // Function-scope static variable without explicit device or constant
 179   // attribute are emitted
 180   //  - on both sides in host device functions
 181   //  - on device side in device or global functions
 182   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
 183     switch (IdentifyTarget(FD)) {
 184     case CUDAFunctionTarget::HostDevice:
 185       return CVT_Both;
 186     case CUDAFunctionTarget::Device:
 187     case CUDAFunctionTarget::Global:
 188       return CVT_Device;
 189     default:
 190       return CVT_Host;
 191     }
 192   }
 193   return CVT_Host;
 194 }
 195
 196 // * CUDA Call preference table
 197 //
 198 // F - from,
 199 // T - to
 200 // Ph - preference in host mode
 201 // Pd - preference in device mode
 202 // H  - handled in (x)
 203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
 204 //
 205 // | F  | T  | Ph  | Pd  |  H  |
 206 // |----+----+-----+-----+-----+
 207 // | d  | d  | N   | N   | (c) |
 208 // | d  | g  | --  | --  | (a) |
 209 // | d  | h  | --  | --  | (e) |
 210 // | d  | hd | HD  | HD  | (b) |
 211 // | g  | d  | N   | N   | (c) |
 212 // | g  | g  | --  | --  | (a) |
 213 // | g  | h  | --  | --  | (e) |
 214 // | g  | hd | HD  | HD  | (b) |
 215 // | h  | d  | --  | --  | (e) |
 216 // | h  | g  | N   | N   | (c) |
 217 // | h  | h  | N   | N   | (c) |
 218 // | h  | hd | HD  | HD  | (b) |
 219 // | hd | d  | WS  | SS  | (d) |
 220 // | hd | g  | SS  | --  |(d/a)|
 221 // | hd | h  | SS  | WS  | (d) |
 222 // | hd | hd | HD  | HD  | (b) |
 223
 224 SemaCUDA::CUDAFunctionPreference
 225 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
 226                              const FunctionDecl *Callee) {
 227   assert(Callee && "Callee must be valid.");
 228
 229   // Treat ctor/dtor as host device function in device var initializer to allow
 230   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
 231   // will be diagnosed by checkAllowedInitializer.
 232   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
 233       CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
 234       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
 235     return CFP_HostDevice;
 236
 237   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
 238   CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee);
 239
 240   // If one of the targets is invalid, the check always fails, no matter what
 241   // the other target is.
 242   if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||
 243       CalleeTarget == CUDAFunctionTarget::InvalidTarget)
 244     return CFP_Never;
 245
 246   // (a) Can't call global from some contexts until we support CUDA's
 247   // dynamic parallelism.
 248   if (CalleeTarget == CUDAFunctionTarget::Global &&
 249       (CallerTarget == CUDAFunctionTarget::Global ||
 250        CallerTarget == CUDAFunctionTarget::Device))
 251     return CFP_Never;
 252
 253   // (b) Calling HostDevice is OK for everyone.
 254   if (CalleeTarget == CUDAFunctionTarget::HostDevice)
 255     return CFP_HostDevice;
 256
 257   // (c) Best case scenarios
 258   if (CalleeTarget == CallerTarget ||
 259       (CallerTarget == CUDAFunctionTarget::Host &&
 260        CalleeTarget == CUDAFunctionTarget::Global) ||
 261       (CallerTarget == CUDAFunctionTarget::Global &&
 262        CalleeTarget == CUDAFunctionTarget::Device))
 263     return CFP_Native;
 264
 265   // HipStdPar mode is special, in that assessing whether a device side call to
 266   // a host target is deferred to a subsequent pass, and cannot unambiguously be
 267   // adjudicated in the AST, hence we optimistically allow them to pass here.
 268   if (getLangOpts().HIPStdPar &&
 269       (CallerTarget == CUDAFunctionTarget::Global ||
 270        CallerTarget == CUDAFunctionTarget::Device ||
 271        CallerTarget == CUDAFunctionTarget::HostDevice) &&
 272       CalleeTarget == CUDAFunctionTarget::Host)
 273     return CFP_HostDevice;
 274
 275   // (d) HostDevice behavior depends on compilation mode.
 276   if (CallerTarget == CUDAFunctionTarget::HostDevice) {
 277     // It's OK to call a compilation-mode matching function from an HD one.
 278     if ((getLangOpts().CUDAIsDevice &&
 279          CalleeTarget == CUDAFunctionTarget::Device) ||
 280         (!getLangOpts().CUDAIsDevice &&
 281          (CalleeTarget == CUDAFunctionTarget::Host ||
 282           CalleeTarget == CUDAFunctionTarget::Global)))
 283       return CFP_SameSide;
 284
 285     // Calls from HD to non-mode-matching functions (i.e., to host functions
 286     // when compiling in device mode or to device functions when compiling in
 287     // host mode) are allowed at the sema level, but eventually rejected if
 288     // they're ever codegened.  TODO: Reject said calls earlier.
 289     return CFP_WrongSide;
 290   }
 291
 292   // (e) Calling across device/host boundary is not something you should do.
 293   if ((CallerTarget == CUDAFunctionTarget::Host &&
 294        CalleeTarget == CUDAFunctionTarget::Device) ||
 295       (CallerTarget == CUDAFunctionTarget::Device &&
 296        CalleeTarget == CUDAFunctionTarget::Host) ||
 297       (CallerTarget == CUDAFunctionTarget::Global &&
 298        CalleeTarget == CUDAFunctionTarget::Host))
 299     return CFP_Never;
 300
 301   llvm_unreachable("All cases should've been handled by now.");
 302 }
 303
 304 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
 305   if (!D)
 306     return false;
 307   if (auto *A = D->getAttr<AttrT>())
 308     return A->isImplicit();
 309   return D->isImplicit();
 310 }
 311
 312 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
 313   bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
 314   bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
 315   return IsImplicitDevAttr && IsImplicitHostAttr;
 316 }
 317
 318 void SemaCUDA::EraseUnwantedMatches(
 319     const FunctionDecl *Caller,
 320     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
 321   if (Matches.size() <= 1)
 322     return;
 323
 324   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
 325
 326   // Gets the CUDA function preference for a call from Caller to Match.
 327   auto GetCFP = [&](const Pair &Match) {
 328     return IdentifyPreference(Caller, Match.second);
 329   };
 330
 331   // Find the best call preference among the functions in Matches.
 332   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
 333       Matches.begin(), Matches.end(),
 334       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
 335
 336   // Erase all functions with lower priority.
 337   llvm::erase_if(Matches,
 338                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
 339 }
 340
 341 /// When an implicitly-declared special member has to invoke more than one
 342 /// base/field special member, conflicts may occur in the targets of these
 343 /// members. For example, if one base's member __host__ and another's is
 344 /// __device__, it's a conflict.
 345 /// This function figures out if the given targets \param Target1 and
 346 /// \param Target2 conflict, and if they do not it fills in
 347 /// \param ResolvedTarget with a target that resolves for both calls.
 348 /// \return true if there's a conflict, false otherwise.
 349 static bool
 350 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
 351                                 CUDAFunctionTarget Target2,
 352                                 CUDAFunctionTarget *ResolvedTarget) {
 353   // Only free functions and static member functions may be global.
 354   assert(Target1 != CUDAFunctionTarget::Global);
 355   assert(Target2 != CUDAFunctionTarget::Global);
 356
 357   if (Target1 == CUDAFunctionTarget::HostDevice) {
 358     *ResolvedTarget = Target2;
 359   } else if (Target2 == CUDAFunctionTarget::HostDevice) {
 360     *ResolvedTarget = Target1;
 361   } else if (Target1 != Target2) {
 362     return true;
 363   } else {
 364     *ResolvedTarget = Target1;
 365   }
 366
 367   return false;
 368 }
 369
 370 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
 371                                                    CXXSpecialMemberKind CSM,
 372                                                    CXXMethodDecl *MemberDecl,
 373                                                    bool ConstRHS,
 374                                                    bool Diagnose) {
 375   // If the defaulted special member is defined lexically outside of its
 376   // owning class, or the special member already has explicit device or host
 377   // attributes, do not infer.
 378   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
 379   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
 380   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
 381   bool HasExplicitAttr =
 382       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
 383       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
 384   if (!InClass || HasExplicitAttr)
 385     return false;
 386
 387   std::optional<CUDAFunctionTarget> InferredTarget;
 388
 389   // We're going to invoke special member lookup; mark that these special
 390   // members are called from this one, and not from its caller.
 391   Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
 392
 393   // Look for special members in base classes that should be invoked from here.
 394   // Infer the target of this member base on the ones it should call.
 395   // Skip direct and indirect virtual bases for abstract classes.
 396   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
 397   for (const auto &B : ClassDecl->bases()) {
 398     if (!B.isVirtual()) {
 399       Bases.push_back(&B);
 400     }
 401   }
 402
 403   if (!ClassDecl->isAbstract()) {
 404     llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
 405   }
 406
 407   for (const auto *B : Bases) {
 408     const RecordType *BaseType = B->getType()->getAs<RecordType>();
 409     if (!BaseType) {
 410       continue;
 411     }
 412
 413     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
 414     Sema::SpecialMemberOverloadResult SMOR =
 415         SemaRef.LookupSpecialMember(BaseClassDecl, CSM,
 416                                     /* ConstArg */ ConstRHS,
 417                                     /* VolatileArg */ false,
 418                                     /* RValueThis */ false,
 419                                     /* ConstThis */ false,
 420                                     /* VolatileThis */ false);
 421
 422     if (!SMOR.getMethod())
 423       continue;
 424
 425     CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod());
 426     if (!InferredTarget) {
 427       InferredTarget = BaseMethodTarget;
 428     } else {
 429       bool ResolutionError = resolveCalleeCUDATargetConflict(
 430           *InferredTarget, BaseMethodTarget, &*InferredTarget);
 431       if (ResolutionError) {
 432         if (Diagnose) {
 433           Diag(ClassDecl->getLocation(),
 434                diag::note_implicit_member_target_infer_collision)
 435               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
 436               << llvm::to_underlying(BaseMethodTarget);
 437         }
 438         MemberDecl->addAttr(
 439             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
 440         return true;
 441       }
 442     }
 443   }
 444
 445   // Same as for bases, but now for special members of fields.
 446   for (const auto *F : ClassDecl->fields()) {
 447     if (F->isInvalidDecl()) {
 448       continue;
 449     }
 450
 451     const RecordType *FieldType =
 452         getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();
 453     if (!FieldType) {
 454       continue;
 455     }
 456
 457     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
 458     Sema::SpecialMemberOverloadResult SMOR =
 459         SemaRef.LookupSpecialMember(FieldRecDecl, CSM,
 460                                     /* ConstArg */ ConstRHS && !F->isMutable(),
 461                                     /* VolatileArg */ false,
 462                                     /* RValueThis */ false,
 463                                     /* ConstThis */ false,
 464                                     /* VolatileThis */ false);
 465
 466     if (!SMOR.getMethod())
 467       continue;
 468
 469     CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod());
 470     if (!InferredTarget) {
 471       InferredTarget = FieldMethodTarget;
 472     } else {
 473       bool ResolutionError = resolveCalleeCUDATargetConflict(
 474           *InferredTarget, FieldMethodTarget, &*InferredTarget);
 475       if (ResolutionError) {
 476         if (Diagnose) {
 477           Diag(ClassDecl->getLocation(),
 478                diag::note_implicit_member_target_infer_collision)
 479               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
 480               << llvm::to_underlying(FieldMethodTarget);
 481         }
 482         MemberDecl->addAttr(
 483             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
 484         return true;
 485       }
 486     }
 487   }
 488
 489
 490   // If no target was inferred, mark this member as __host__ __device__;
 491   // it's the least restrictive option that can be invoked from any target.
 492   bool NeedsH = true, NeedsD = true;
 493   if (InferredTarget) {
 494     if (*InferredTarget == CUDAFunctionTarget::Device)
 495       NeedsH = false;
 496     else if (*InferredTarget == CUDAFunctionTarget::Host)
 497       NeedsD = false;
 498   }
 499
 500   // We either setting attributes first time, or the inferred ones must match
 501   // previously set ones.
 502   if (NeedsD && !HasD)
 503     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
 504   if (NeedsH && !HasH)
 505     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
 506
 507   return false;
 508 }
 509
 510 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
 511   if (!CD->isDefined() && CD->isTemplateInstantiation())
 512     SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
 513
 514   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
 515   // empty at a point in the translation unit, if it is either a
 516   // trivial constructor
 517   if (CD->isTrivial())
 518     return true;
 519
 520   // ... or it satisfies all of the following conditions:
 521   // The constructor function has been defined.
 522   // The constructor function has no parameters,
 523   // and the function body is an empty compound statement.
 524   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
 525     return false;
 526
 527   // Its class has no virtual functions and no virtual base classes.
 528   if (CD->getParent()->isDynamicClass())
 529     return false;
 530
 531   // Union ctor does not call ctors of its data members.
 532   if (CD->getParent()->isUnion())
 533     return true;
 534
 535   // The only form of initializer allowed is an empty constructor.
 536   // This will recursively check all base classes and member initializers
 537   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
 538         if (const CXXConstructExpr *CE =
 539                 dyn_cast<CXXConstructExpr>(CI->getInit()))
 540           return isEmptyConstructor(Loc, CE->getConstructor());
 541         return false;
 542       }))
 543     return false;
 544
 545   return true;
 546 }
 547
 548 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
 549   // No destructor -> no problem.
 550   if (!DD)
 551     return true;
 552
 553   if (!DD->isDefined() && DD->isTemplateInstantiation())
 554     SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
 555
 556   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
 557   // empty at a point in the translation unit, if it is either a
 558   // trivial constructor
 559   if (DD->isTrivial())
 560     return true;
 561
 562   // ... or it satisfies all of the following conditions:
 563   // The destructor function has been defined.
 564   // and the function body is an empty compound statement.
 565   if (!DD->hasTrivialBody())
 566     return false;
 567
 568   const CXXRecordDecl *ClassDecl = DD->getParent();
 569
 570   // Its class has no virtual functions and no virtual base classes.
 571   if (ClassDecl->isDynamicClass())
 572     return false;
 573
 574   // Union does not have base class and union dtor does not call dtors of its
 575   // data members.
 576   if (DD->getParent()->isUnion())
 577     return true;
 578
 579   // Only empty destructors are allowed. This will recursively check
 580   // destructors for all base classes...
 581   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
 582         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
 583           return isEmptyDestructor(Loc, RD->getDestructor());
 584         return true;
 585       }))
 586     return false;
 587
 588   // ... and member fields.
 589   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
 590         if (CXXRecordDecl *RD = Field->getType()
 591                                     ->getBaseElementTypeUnsafe()
 592                                     ->getAsCXXRecordDecl())
 593           return isEmptyDestructor(Loc, RD->getDestructor());
 594         return true;
 595       }))
 596     return false;
 597
 598   return true;
 599 }
 600
 601 namespace {
 602 enum CUDAInitializerCheckKind {
 603   CICK_DeviceOrConstant, // Check initializer for device/constant variable
 604   CICK_Shared,           // Check initializer for shared variable
 605 };
 606
 607 bool IsDependentVar(VarDecl *VD) {
 608   if (VD->getType()->isDependentType())
 609     return true;
 610   if (const auto *Init = VD->getInit())
 611     return Init->isValueDependent();
 612   return false;
 613 }
 614
 615 // Check whether a variable has an allowed initializer for a CUDA device side
 616 // variable with global storage. \p VD may be a host variable to be checked for
 617 // potential promotion to device side variable.
 618 //
 619 // CUDA/HIP allows only empty constructors as initializers for global
 620 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
 621 // __shared__ variables whether they are local or not (they all are implicitly
 622 // static in CUDA). One exception is that CUDA allows constant initializers
 623 // for __constant__ and __device__ variables.
 624 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
 625                                            CUDAInitializerCheckKind CheckKind) {
 626   assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
 627   assert(!IsDependentVar(VD) && "do not check dependent var");
 628   const Expr *Init = VD->getInit();
 629   auto IsEmptyInit = [&](const Expr *Init) {
 630     if (!Init)
 631       return true;
 632     if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
 633       return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());
 634     }
 635     return false;
 636   };
 637   auto IsConstantInit = [&](const Expr *Init) {
 638     assert(Init);
 639     ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
 640                                                     /*NoWronSidedVars=*/true);
 641     return Init->isConstantInitializer(S.getASTContext(),
 642                                        VD->getType()->isReferenceType());
 643   };
 644   auto HasEmptyDtor = [&](VarDecl *VD) {
 645     if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
 646       return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());
 647     return true;
 648   };
 649   if (CheckKind == CICK_Shared)
 650     return IsEmptyInit(Init) && HasEmptyDtor(VD);
 651   return S.getLangOpts().GPUAllowDeviceInit ||
 652          ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
 653 }
 654 } // namespace
 655
 656 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
 657   // Return early if VD is inside a non-instantiated template function since
 658   // the implicit constructor is not defined yet.
 659   if (const FunctionDecl *FD =
 660           dyn_cast_or_null<FunctionDecl>(VD->getDeclContext());
 661       FD && FD->isDependentContext())
 662     return;
 663
 664   // Do not check dependent variables since the ctor/dtor/initializer are not
 665   // determined. Do it after instantiation.
 666   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
 667       IsDependentVar(VD))
 668     return;
 669   const Expr *Init = VD->getInit();
 670   bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
 671   bool IsDeviceOrConstantVar =
 672       !IsSharedVar &&
 673       (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
 674   if (IsDeviceOrConstantVar || IsSharedVar) {
 675     if (HasAllowedCUDADeviceStaticInitializer(
 676             *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
 677       return;
 678     Diag(VD->getLocation(),
 679          IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
 680         << Init->getSourceRange();
 681     VD->setInvalidDecl();
 682   } else {
 683     // This is a host-side global variable.  Check that the initializer is
 684     // callable from the host side.
 685     const FunctionDecl *InitFn = nullptr;
 686     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
 687       InitFn = CE->getConstructor();
 688     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
 689       InitFn = CE->getDirectCallee();
 690     }
 691     if (InitFn) {
 692       CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn);
 693       if (InitFnTarget != CUDAFunctionTarget::Host &&
 694           InitFnTarget != CUDAFunctionTarget::HostDevice) {
 695         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
 696             << llvm::to_underlying(InitFnTarget) << InitFn;
 697         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
 698         VD->setInvalidDecl();
 699       }
 700     }
 701   }
 702 }
 703
 704 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
 705     const FunctionDecl *Callee) {
 706   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
 707   if (!Caller)
 708     return;
 709
 710   if (!isImplicitHostDeviceFunction(Callee))
 711     return;
 712
 713   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
 714
 715   // Record whether an implicit host device function is used on device side.
 716   if (CallerTarget != CUDAFunctionTarget::Device &&
 717       CallerTarget != CUDAFunctionTarget::Global &&
 718       (CallerTarget != CUDAFunctionTarget::HostDevice ||
 719        (isImplicitHostDeviceFunction(Caller) &&
 720         !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
 721     return;
 722
 723   getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
 724 }
 725
 726 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
 727 // treated as implicitly __host__ __device__, unless:
 728 //  * it is a variadic function (device-side variadic functions are not
 729 //    allowed), or
 730 //  * a __device__ function with this signature was already declared, in which
 731 //    case in which case we output an error, unless the __device__ decl is in a
 732 //    system header, in which case we leave the constexpr function unattributed.
 733 //
 734 // In addition, all function decls are treated as __host__ __device__ when
 735 // ForceHostDeviceDepth > 0 (corresponding to code within a
 736 //   #pragma clang force_cuda_host_device_begin/end
 737 // pair).
 738 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
 739                                        const LookupResult &Previous) {
 740   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
 741
 742   if (ForceHostDeviceDepth > 0) {
 743     if (!NewD->hasAttr<CUDAHostAttr>())
 744       NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
 745     if (!NewD->hasAttr<CUDADeviceAttr>())
 746       NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
 747     return;
 748   }
 749
 750   // If a template function has no host/device/global attributes,
 751   // make it implicitly host device function.
 752   if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
 753       !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
 754       !NewD->hasAttr<CUDAGlobalAttr>() &&
 755       (NewD->getDescribedFunctionTemplate() ||
 756        NewD->isFunctionTemplateSpecialization())) {
 757     NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
 758     NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
 759     return;
 760   }
 761
 762   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
 763       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
 764       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
 765     return;
 766
 767   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
 768   // attributes?
 769   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
 770     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
 771       D = Using->getTargetDecl();
 772     FunctionDecl *OldD = D->getAsFunction();
 773     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
 774            !OldD->hasAttr<CUDAHostAttr>() &&
 775            !SemaRef.IsOverload(NewD, OldD,
 776                                /* UseMemberUsingDeclRules = */ false,
 777                                /* ConsiderCudaAttrs = */ false);
 778   };
 779   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
 780   if (It != Previous.end()) {
 781     // We found a __device__ function with the same name and signature as NewD
 782     // (ignoring CUDA attrs).  This is an error unless that function is defined
 783     // in a system header, in which case we simply return without making NewD
 784     // host+device.
 785     NamedDecl *Match = *It;
 786     if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) {
 787       Diag(NewD->getLocation(),
 788            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
 789           << NewD;
 790       Diag(Match->getLocation(),
 791            diag::note_cuda_conflicting_device_function_declared_here);
 792     }
 793     return;
 794   }
 795
 796   NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
 797   NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
 798 }
 799
 800 // TODO: `__constant__` memory may be a limited resource for certain targets.
 801 // A safeguard may be needed at the end of compilation pipeline if
 802 // `__constant__` memory usage goes beyond limit.
 803 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
 804   // Do not promote dependent variables since the cotr/dtor/initializer are
 805   // not determined. Do it after instantiation.
 806   if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
 807       !VD->hasAttr<CUDASharedAttr>() &&
 808       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
 809       !IsDependentVar(VD) &&
 810       ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
 811        HasAllowedCUDADeviceStaticInitializer(*this, VD,
 812                                              CICK_DeviceOrConstant))) {
 813     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
 814   }
 815 }
 816
 817 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
 818                                                            unsigned DiagID) {
 819   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
 820   FunctionDecl *CurFunContext =
 821       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
 822   SemaDiagnosticBuilder::Kind DiagKind = [&] {
 823     if (!CurFunContext)
 824       return SemaDiagnosticBuilder::K_Nop;
 825     switch (CurrentTarget()) {
 826     case CUDAFunctionTarget::Global:
 827     case CUDAFunctionTarget::Device:
 828       return SemaDiagnosticBuilder::K_Immediate;
 829     case CUDAFunctionTarget::HostDevice:
 830       // An HD function counts as host code if we're compiling for host, and
 831       // device code if we're compiling for device.  Defer any errors in device
 832       // mode until the function is known-emitted.
 833       if (!getLangOpts().CUDAIsDevice)
 834         return SemaDiagnosticBuilder::K_Nop;
 835       if (SemaRef.IsLastErrorImmediate &&
 836           getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
 837         return SemaDiagnosticBuilder::K_Immediate;
 838       return (SemaRef.getEmissionStatus(CurFunContext) ==
 839               Sema::FunctionEmissionStatus::Emitted)
 840                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
 841                  : SemaDiagnosticBuilder::K_Deferred;
 842     default:
 843       return SemaDiagnosticBuilder::K_Nop;
 844     }
 845   }();
 846   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
 847 }
 848
 849 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
 850                                                      unsigned DiagID) {
 851   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
 852   FunctionDecl *CurFunContext =
 853       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
 854   SemaDiagnosticBuilder::Kind DiagKind = [&] {
 855     if (!CurFunContext)
 856       return SemaDiagnosticBuilder::K_Nop;
 857     switch (CurrentTarget()) {
 858     case CUDAFunctionTarget::Host:
 859       return SemaDiagnosticBuilder::K_Immediate;
 860     case CUDAFunctionTarget::HostDevice:
 861       // An HD function counts as host code if we're compiling for host, and
 862       // device code if we're compiling for device.  Defer any errors in device
 863       // mode until the function is known-emitted.
 864       if (getLangOpts().CUDAIsDevice)
 865         return SemaDiagnosticBuilder::K_Nop;
 866       if (SemaRef.IsLastErrorImmediate &&
 867           getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
 868         return SemaDiagnosticBuilder::K_Immediate;
 869       return (SemaRef.getEmissionStatus(CurFunContext) ==
 870               Sema::FunctionEmissionStatus::Emitted)
 871                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
 872                  : SemaDiagnosticBuilder::K_Deferred;
 873     default:
 874       return SemaDiagnosticBuilder::K_Nop;
 875     }
 876   }();
 877   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
 878 }
 879
 880 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
 881   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
 882   assert(Callee && "Callee may not be null.");
 883
 884   const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
 885   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
 886     return true;
 887
 888   // FIXME: Is bailing out early correct here?  Should we instead assume that
 889   // the caller is a global initializer?
 890   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
 891   if (!Caller)
 892     return true;
 893
 894   // If the caller is known-emitted, mark the callee as known-emitted.
 895   // Otherwise, mark the call in our call graph so we can traverse it later.
 896   bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==
 897                             Sema::FunctionEmissionStatus::Emitted;
 898   SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
 899                                           CallerKnownEmitted] {
 900     switch (IdentifyPreference(Caller, Callee)) {
 901     case CFP_Never:
 902     case CFP_WrongSide:
 903       assert(Caller && "Never/wrongSide calls require a non-null caller");
 904       // If we know the caller will be emitted, we know this wrong-side call
 905       // will be emitted, so it's an immediate error.  Otherwise, defer the
 906       // error until we know the caller is emitted.
 907       return CallerKnownEmitted
 908                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
 909                  : SemaDiagnosticBuilder::K_Deferred;
 910     default:
 911       return SemaDiagnosticBuilder::K_Nop;
 912     }
 913   }();
 914
 915   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 916     // For -fgpu-rdc, keep track of external kernels used by host functions.
 917     if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
 918         Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
 919         (!Caller || (!Caller->getDescribedFunctionTemplate() &&
 920                      getASTContext().GetGVALinkageForFunction(Caller) ==
 921                          GVA_StrongExternal)))
 922       getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 923     return true;
 924   }
 925
 926   // Avoid emitting this error twice for the same location.  Using a hashtable
 927   // like this is unfortunate, but because we must continue parsing as normal
 928   // after encountering a deferred error, it's otherwise very tricky for us to
 929   // ensure that we only emit this deferred error once.
 930   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
 931     return true;
 932
 933   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,
 934                         SemaRef)
 935       << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee
 936       << llvm::to_underlying(IdentifyTarget(Caller));
 937   if (!Callee->getBuiltinID())
 938     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
 939                           diag::note_previous_decl, Caller, SemaRef)
 940         << Callee;
 941   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
 942          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
 943 }
 944
 945 // Check the wrong-sided reference capture of lambda for CUDA/HIP.
 946 // A lambda function may capture a stack variable by reference when it is
 947 // defined and uses the capture by reference when the lambda is called. When
 948 // the capture and use happen on different sides, the capture is invalid and
 949 // should be diagnosed.
 950 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
 951                                   const sema::Capture &Capture) {
 952   // In host compilation we only need to check lambda functions emitted on host
 953   // side. In such lambda functions, a reference capture is invalid only
 954   // if the lambda structure is populated by a device function or kernel then
 955   // is passed to and called by a host function. However that is impossible,
 956   // since a device function or kernel can only call a device function, also a
 957   // kernel cannot pass a lambda back to a host function since we cannot
 958   // define a kernel argument type which can hold the lambda before the lambda
 959   // itself is defined.
 960   if (!getLangOpts().CUDAIsDevice)
 961     return;
 962
 963   // File-scope lambda can only do init captures for global variables, which
 964   // results in passing by value for these global variables.
 965   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
 966   if (!Caller)
 967     return;
 968
 969   // In device compilation, we only need to check lambda functions which are
 970   // emitted on device side. For such lambdas, a reference capture is invalid
 971   // only if the lambda structure is populated by a host function then passed
 972   // to and called in a device function or kernel.
 973   bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
 974   bool CallerIsHost =
 975       !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
 976   bool ShouldCheck = CalleeIsDevice && CallerIsHost;
 977   if (!ShouldCheck || !Capture.isReferenceCapture())
 978     return;
 979   auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
 980   if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
 981     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
 982                           diag::err_capture_bad_target, Callee, SemaRef)
 983         << Capture.getVariable();
 984   } else if (Capture.isThisCapture()) {
 985     // Capture of this pointer is allowed since this pointer may be pointing to
 986     // managed memory which is accessible on both device and host sides. It only
 987     // results in invalid memory access if this pointer points to memory not
 988     // accessible on device side.
 989     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
 990                           diag::warn_maybe_capture_bad_target_this_ptr, Callee,
 991                           SemaRef);
 992   }
 993 }
 994
 995 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
 996   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
 997   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
 998     return;
 999   Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
1000   Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
1001 }
1002
1003 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1004                                    const LookupResult &Previous) {
1005   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1006   CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD);
1007   for (NamedDecl *OldND : Previous) {
1008     FunctionDecl *OldFD = OldND->getAsFunction();
1009     if (!OldFD)
1010       continue;
1011
1012     CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD);
1013     // Don't allow HD and global functions to overload other functions with the
1014     // same signature.  We allow overloading based on CUDA attributes so that
1015     // functions can have different implementations on the host and device, but
1016     // HD/global functions "exist" in some sense on both the host and device, so
1017     // should have the same implementation on both sides.
1018     if (NewTarget != OldTarget &&
1019         !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
1020                             /* ConsiderCudaAttrs = */ false)) {
1021       if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1022            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1023              isImplicitHostDeviceFunction(NewFD) &&
1024              OldTarget == CUDAFunctionTarget::Device)) ||
1025           (OldTarget == CUDAFunctionTarget::HostDevice &&
1026            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1027              isImplicitHostDeviceFunction(OldFD) &&
1028              NewTarget == CUDAFunctionTarget::Device)) ||
1029           (NewTarget == CUDAFunctionTarget::Global) ||
1030           (OldTarget == CUDAFunctionTarget::Global)) {
1031         Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1032             << llvm::to_underlying(NewTarget) << NewFD->getDeclName()
1033             << llvm::to_underlying(OldTarget) << OldFD;
1034         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1035         NewFD->setInvalidDecl();
1036         break;
1037       }
1038       if ((NewTarget == CUDAFunctionTarget::Host &&
1039            OldTarget == CUDAFunctionTarget::Device) ||
1040           (NewTarget == CUDAFunctionTarget::Device &&
1041            OldTarget == CUDAFunctionTarget::Host)) {
1042         Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare)
1043             << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget);
1044         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1045       }
1046     }
1047   }
1048 }
1049
1050 template <typename AttrTy>
1051 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1052                               const FunctionDecl &TemplateFD) {
1053   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1054     AttrTy *Clone = Attribute->clone(S.Context);
1055     Clone->setInherited(true);
1056     FD->addAttr(Clone);
1057   }
1058 }
1059
1060 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1061                                   const FunctionTemplateDecl &TD) {
1062   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1063   copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);
1064   copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);
1065   copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);
1066 }
1067
1068 std::string SemaCUDA::getConfigureFuncName() const {
1069   if (getLangOpts().OffloadViaLLVM)
1070     return "__llvmPushCallConfiguration";
1071
1072   if (getLangOpts().HIP)
1073     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1074                                             : "hipConfigureCall";
1075
1076   // New CUDA kernel launch sequence.
1077   if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1078                          CudaFeature::CUDA_USES_NEW_LAUNCH))
1079     return "__cudaPushCallConfiguration";
1080
1081   // Legacy CUDA kernel configuration call
1082   return "cudaConfigureCall";
1083 }