1 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
2 // RUN: -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s
4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
5 // RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
7 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
8 // RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
10 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
11 // RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
13 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
14 // RUN: %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\
15 // RUN: FileCheck -check-prefix=LINKED4 %s
17 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
18 // RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
19 // RUN: FileCheck -check-prefix=LINKED5 %s
21 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
22 // RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
23 // RUN: FileCheck -check-prefix=LINKED6 %s
25 #include "Inputs/cuda.h"
27 // LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
29 // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
30 // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
31 // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
32 // LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
33 // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
34 // LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
35 // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
36 // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
38 // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
39 // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
40 // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
41 // LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
42 // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
43 // LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
44 // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
45 // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
47 // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
48 // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
49 // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
50 // LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
51 // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
52 // LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
53 // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
54 // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
55 // LINKED4: "amdhsa_code_object_version", i32 400
57 // LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
59 // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
60 // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
61 // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
62 // LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
63 // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
64 // LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
65 // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
66 // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
68 // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
69 // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
70 // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
71 // LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
72 // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
73 // LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
74 // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
75 // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
77 // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
78 // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
79 // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
80 // LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
81 // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
82 // LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
83 // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
84 // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
85 // LINKED5: "amdhsa_code_object_version", i32 500
87 // LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
89 // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
90 // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
91 // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
92 // LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
93 // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
94 // LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
95 // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
96 // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
98 // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
99 // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
100 // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
101 // LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
102 // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
103 // LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
104 // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
105 // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
107 // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
108 // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
109 // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
110 // LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
111 // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
112 // LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
113 // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
114 // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
115 // LINKED6: "amdhsa_code_object_version", i32 600
118 __device__ void bar(int *x, int *y, int *z)
120 *x = __builtin_amdgcn_workgroup_size_x();
121 *y = __builtin_amdgcn_workgroup_size_y();
122 *z = __builtin_amdgcn_workgroup_size_z();
127 __device__ void bar(int *x, int *y, int *z);
128 __device__ void foo()