1 // REQUIRES: amdgpu-registered-target
2 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
3 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
4 // RUN: -internal-isystem %S/Inputs/include \
5 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
6 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
7 // RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s
8 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
9 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
10 // RUN: -internal-isystem %S/Inputs/include \
11 // RUN: -include cmath \
12 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
13 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
14 // RUN: -D__HIPCC_RTC__ | FileCheck %s -check-prefixes=AMD_BOOL_RETURN
15 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
16 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
17 // RUN: -internal-isystem %S/Inputs/include \
18 // RUN: -include cmath \
19 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
20 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
21 // RUN: -D__HIPCC_RTC__ -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s -check-prefixes=AMD_INT_RETURN
22 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
23 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
24 // RUN: -internal-isystem %S/Inputs/include \
25 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
26 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
27 // RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
28 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
29 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
30 // RUN: -internal-isystem %S/Inputs/include \
31 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
32 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
33 // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
34 // RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
35 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
36 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
37 // RUN: -internal-isystem %S/Inputs/include \
38 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
39 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
40 // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
41 // RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
42 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
43 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
44 // RUN: -internal-isystem %S/Inputs/include \
45 // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
46 // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
47 // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
48 // RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
49 // RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
50 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
51 // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
52 // RUN: -internal-isystem %S/Inputs/include \
53 // RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
54 // RUN: -emit-llvm %s -o - \
55 // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
56 // RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s
58 // expected-no-diagnostics
60 // Check handling of overriden, implicitly __host__ dtor (should emit as a
68 struct vderived : public vbase {
72 template struct vderived<void>;
74 // CHECK: @_ZTV8vderivedIvE = weak_odr unnamed_addr addrspace(1) constant { [4 x ptr addrspace(1)] } zeroinitializer, comdat, align 8
76 // Check support for pure and deleted virtual functions
80 virtual void pv() = 0;
83 virtual void dv() = delete;
88 virtual void pv() override {};
90 __device__ void test_vf() {
93 // CHECK: @_ZTV7derived = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr addrspace(1)] } { [4 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @_ZN7derived2pvEv to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @__cxa_deleted_virtual to ptr addrspace(1))] }, comdat, align 8
94 // CHECK: @_ZTV4base = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr addrspace(1)] } { [4 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @__cxa_pure_virtual to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @__cxa_deleted_virtual to ptr addrspace(1))] }, comdat, align 8
95 // CHECK: define{{.*}}void @__cxa_pure_virtual()
96 // CHECK: define{{.*}}void @__cxa_deleted_virtual()
99 __device__ Number(float _x) : x(_x) {}
103 #if __cplusplus >= 201103L
104 // Check __hip::__numeric_type can be used with a class without default ctor.
105 __device__ void test_numeric_type() {
106 int x = __hip::__numeric_type<Number>::value;
109 // ToDo: Fix __clang_hip_cmake.h to specialize __hip::is_arithmetic<_Float16>
110 // to resolve fma(_Float16, _Float16, int) to fma(double, double, double)
111 // instead of fma(_Float16, _Float16, _Float16).
113 // CXX14-LABEL: define{{.*}}@_Z8test_fma
114 // CXX14: call contract noundef half @llvm.fma.f16
115 __device__ double test_fma(_Float16 h, int i) {
121 // CHECK-LABEL: amdgpu_kernel void @_Z4kernPff
122 __global__ void kern(float *x, float y) {
126 // CHECK-LABEL: define{{.*}} i64 @_Z11test_size_tv
128 __device__ size_t test_size_t() {
129 return sizeof(size_t);
132 // Check there is no ambiguity when calling overloaded math functions.
134 // CHECK-LABEL: define{{.*}}@_Z10test_floorv
135 // CHECK: call {{.*}}double @llvm.floor.f64(double
136 __device__ float test_floor() {
140 // CHECK-LABEL: define{{.*}}@_Z8test_maxv
141 // CHECK: call {{.*}}double @llvm.maxnum.f64(double {{.*}}, double
142 __device__ float test_max() {
146 // CHECK-LABEL: define{{.*}}@_Z10test_isnanv
147 __device__ double test_isnan() {
152 // AMD_INT_RETURN: call noundef i1 @llvm.is.fpclass.f32(float {{.*}}, i32 3)
153 // AMD_BOOL_RETURN: call noundef i1 @llvm.is.fpclass.f32(float {{.*}}, i32 3)
156 // AMD_INT_RETURN: call noundef i1 @llvm.is.fpclass.f64(double {{.*}}, i32 3)
157 // AMD_BOOL_RETURN: call noundef i1 @llvm.is.fpclass.f64(double {{.*}}, i32 3)
163 // Check that device malloc and free do not conflict with std headers.
165 // MALLOC-LABEL: define{{.*}}@_Z11test_malloc
166 // MALLOC: call {{.*}}ptr @malloc(i64
167 // MALLOC: call {{.*}}ptr @malloc(i64
168 // MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
169 // MALLOC: call i64 @__ockl_dm_alloc
170 // NOMALLOC: call void @llvm.trap
171 // MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
172 // MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
173 // MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
174 __device__ void test_malloc(void *a) {
179 // MALLOC-LABEL: define{{.*}}@_Z9test_free
180 // MALLOC: call {{.*}}void @free(ptr
181 // MALLOC: call {{.*}}void @free(ptr
182 // MALLOC-LABEL: define weak {{.*}}void @free(ptr
183 // MALLOC: call void @__ockl_dm_dealloc
184 // NOMALLOC: call void @llvm.trap
185 // MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
186 // MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
187 // MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
188 __device__ void test_free(void *a) {
193 // MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
194 // MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
195 // MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
196 // MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
197 // MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
198 void test_malloc_host(void *a) {