clang/test/CodeGenCUDA/kernel-amdgcn.cu

   1 // RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
   2 #include "Inputs/cuda.h"
   3
   4 // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
   5 class A {
   6 public:
   7   static __global__ void kernel(){}
   8 };
   9
  10 // CHECK: define{{.*}} void @_Z10non_kernelv
  11 __device__ void non_kernel(){}
  12
  13 // CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneli
  14 __global__ void kernel(int x) {
  15   non_kernel();
  16 }
  17
  18 // CHECK: define{{.*}} amdgpu_kernel void @_Z11EmptyKernelIvEvv
  19 template <typename T>
  20 __global__ void EmptyKernel(void) {}
  21
  22 struct Dummy {
  23   /// Type definition of the EmptyKernel kernel entry point
  24   typedef void (*EmptyKernelPtr)();
  25   EmptyKernelPtr Empty() { return EmptyKernel<void>; }
  26 };
  27
  28 // CHECK: define{{.*}} amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
  29 template<class T>
  30 __global__ void template_kernel(T x) {}
  31
  32 void launch(void *f);
  33
  34 int main() {
  35   Dummy D;
  36   launch((void*)A::kernel);
  37   launch((void*)kernel);
  38   launch((void*)template_kernel<A>);
  39   launch((void*)D.Empty());
  40   return 0;
  41 }
  42 // CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"