1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2 // REQUIRES: x86-registered-target
3 // REQUIRES: amdgpu-registered-target
5 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK %s
6 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK-SPIRV %s
7 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT
8 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT-SPIRV
9 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
11 #include "Inputs/cuda.h"
13 // Coerced struct from `struct S` without all generic pointers lowered into
16 // On the host-side compilation, generic pointer won't be coerced.
18 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi(
19 // CHECK-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
20 // CHECK-NEXT: [[ENTRY:.*:]]
21 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
22 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
23 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
24 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
25 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
26 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
27 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
28 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
29 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
30 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
31 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
32 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
33 // CHECK-NEXT: ret void
35 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
36 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
37 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
38 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
39 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
40 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
41 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
42 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
43 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
44 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
45 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
46 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
47 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
48 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
49 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
50 // CHECK-SPIRV-NEXT: ret void
52 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi(
53 // OPT-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
54 // OPT-NEXT: [[ENTRY:.*:]]
55 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
56 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
57 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
60 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
61 // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
62 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
63 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
64 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
65 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
66 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
67 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
68 // OPT-SPIRV-NEXT: ret void
70 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
71 // HOST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
72 // HOST-NEXT: [[ENTRY:.*:]]
73 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
74 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
75 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
76 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
77 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
78 // HOST: [[SETUP_NEXT]]:
79 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel1Pi)
80 // HOST-NEXT: br label %[[SETUP_END]]
81 // HOST: [[SETUP_END]]:
82 // HOST-NEXT: ret void
84 __global__ void kernel1(int *x) {
88 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri(
89 // CHECK-SAME: ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) #[[ATTR0]] {
90 // CHECK-NEXT: [[ENTRY:.*:]]
91 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
92 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
93 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
94 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
95 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
96 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
97 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
98 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
99 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
100 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
101 // CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
102 // CHECK-NEXT: ret void
104 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
105 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
106 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
107 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
108 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
109 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
110 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
111 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
112 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
113 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
114 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
115 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
116 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
117 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
118 // CHECK-SPIRV-NEXT: ret void
120 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri(
121 // OPT-SAME: ptr addrspace(1) nocapture noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
122 // OPT-NEXT: [[ENTRY:.*:]]
123 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
124 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
125 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
126 // OPT-NEXT: ret void
128 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
129 // OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
130 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
131 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
132 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
133 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
134 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
135 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
136 // OPT-SPIRV-NEXT: ret void
138 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
139 // HOST-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
140 // HOST-NEXT: [[ENTRY:.*:]]
141 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
142 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
143 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
144 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
145 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
146 // HOST: [[SETUP_NEXT]]:
147 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel2Ri)
148 // HOST-NEXT: br label %[[SETUP_END]]
149 // HOST: [[SETUP_END]]:
150 // HOST-NEXT: ret void
152 __global__ void kernel2(int &x) {
156 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
157 // CHECK-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] {
158 // CHECK-NEXT: [[ENTRY:.*:]]
159 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8, addrspace(5)
160 // CHECK-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
161 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
162 // CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
163 // CHECK-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR_ASCAST]], align 8
164 // CHECK-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR_ASCAST]], align 8
165 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr [[X_ADDR_ASCAST]], align 8
166 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0
167 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4
168 // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[Y_ADDR_ASCAST]], align 8
169 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0
170 // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4
171 // CHECK-NEXT: ret void
173 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
174 // CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] {
175 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
176 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
177 // CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
178 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
179 // CHECK-SPIRV-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr [[Y_ADDR]] to ptr addrspace(4)
180 // CHECK-SPIRV-NEXT: store ptr addrspace(2) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
181 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[Y]], ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8
182 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
183 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0
184 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4
185 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8
186 // CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0
187 // CHECK-SPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4
188 // CHECK-SPIRV-NEXT: ret void
190 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
191 // OPT-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
192 // OPT-NEXT: [[ENTRY:.*:]]
193 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
194 // OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
195 // OPT-NEXT: ret void
197 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
198 // OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
199 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
200 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
201 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
202 // OPT-SPIRV-NEXT: ret void
204 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(
205 // HOST-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] {
206 // HOST-NEXT: [[ENTRY:.*:]]
207 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
208 // HOST-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
209 // HOST-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR]], align 8
210 // HOST-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR]], align 8
211 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
212 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
213 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
214 // HOST: [[SETUP_NEXT]]:
215 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[Y_ADDR]], i64 8, i64 8)
216 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
217 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT1:.*]], label %[[SETUP_END]]
218 // HOST: [[SETUP_NEXT1]]:
219 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel3PU3AS2iPU3AS1i)
220 // HOST-NEXT: br label %[[SETUP_END]]
221 // HOST: [[SETUP_END]]:
222 // HOST-NEXT: ret void
224 __global__ void kernel3(__attribute__((address_space(2))) int *x,
225 __attribute__((address_space(1))) int *y) {
229 // CHECK-LABEL: define dso_local void @_Z4funcPi(
230 // CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] {
231 // CHECK-NEXT: [[ENTRY:.*:]]
232 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
233 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
234 // CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
235 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
236 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
237 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
238 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
239 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
240 // CHECK-NEXT: ret void
242 // CHECK-SPIRV-LABEL: define spir_func void @_Z4funcPi(
243 // CHECK-SPIRV-SAME: ptr addrspace(4) noundef [[X:%.*]]) addrspace(4) #[[ATTR1:[0-9]+]] {
244 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
245 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
246 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
247 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
248 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
249 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
250 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
251 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
252 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
253 // CHECK-SPIRV-NEXT: ret void
255 // OPT-LABEL: define dso_local void @_Z4funcPi(
256 // OPT-SAME: ptr nocapture noundef [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
257 // OPT-NEXT: [[ENTRY:.*:]]
258 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4
259 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
260 // OPT-NEXT: store i32 [[INC]], ptr [[X]], align 4
261 // OPT-NEXT: ret void
263 // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
264 // OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
265 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
266 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
267 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
268 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[X]], align 4
269 // OPT-SPIRV-NEXT: ret void
271 __device__ void func(int *x) {
279 // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
280 // by-val). However, the enhanced address inferring pass should be able to
281 // assume they are global pointers.
282 // For SPIR-V, since byref is not supported at the moment, we pass it as direct.
284 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S(
285 // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
286 // CHECK-NEXT: [[ENTRY:.*:]]
287 // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8, addrspace(5)
288 // CHECK-NEXT: [[S:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
289 // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[S]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
290 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
291 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8
292 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
293 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
294 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
295 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
296 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
297 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Y]], align 8
298 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0
299 // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
300 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00
301 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4
302 // CHECK-NEXT: ret void
304 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
305 // CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
306 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
307 // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
308 // CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
309 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
310 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
311 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
312 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
313 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
314 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8
315 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
316 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
317 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0
318 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
319 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
320 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
321 // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
322 // CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
323 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0
324 // CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
325 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00
326 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
327 // CHECK-SPIRV-NEXT: ret void
329 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S(
330 // OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
331 // OPT-NEXT: [[ENTRY:.*:]]
332 // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]]
333 // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
334 // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
335 // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
336 // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
337 // OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]
338 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
339 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[TMP1]], align 4
340 // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4
341 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00
342 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP2]], align 4
343 // OPT-NEXT: ret void
345 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
346 // OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
347 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
348 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
349 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
350 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
351 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
352 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
353 // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4
354 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
355 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4
356 // OPT-SPIRV-NEXT: ret void
358 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S(
359 // HOST-SAME: ptr [[S_COERCE0:%.*]], ptr [[S_COERCE1:%.*]]) #[[ATTR0]] {
360 // HOST-NEXT: [[ENTRY:.*:]]
361 // HOST-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 8
362 // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 0
363 // HOST-NEXT: store ptr [[S_COERCE0]], ptr [[TMP0]], align 8
364 // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 1
365 // HOST-NEXT: store ptr [[S_COERCE1]], ptr [[TMP1]], align 8
366 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[S]], i64 16, i64 0)
367 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
368 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
369 // HOST: [[SETUP_NEXT]]:
370 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel41S)
371 // HOST-NEXT: br label %[[SETUP_END]]
372 // HOST: [[SETUP_END]]:
373 // HOST-NEXT: ret void
375 __global__ void kernel4(struct S s) {
380 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
382 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
383 // CHECK-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) #[[ATTR0]] {
384 // CHECK-NEXT: [[ENTRY:.*:]]
385 // CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
386 // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
387 // CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
388 // CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
389 // CHECK-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr [[S_ASCAST]], align 8
390 // CHECK-NEXT: [[S1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
391 // CHECK-NEXT: store ptr [[S1]], ptr [[S_ADDR_ASCAST]], align 8
392 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8
393 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0
394 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8
395 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
396 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
397 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
398 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
399 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8
400 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP3]], i32 0, i32 1
401 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[Y]], align 8
402 // CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0
403 // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
404 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
405 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
406 // CHECK-NEXT: ret void
408 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
409 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
410 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
411 // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
412 // CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
413 // CHECK-SPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
414 // CHECK-SPIRV-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr [[S_ADDR]] to ptr addrspace(4)
415 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr addrspace(4) [[S_ASCAST]], align 8
416 // CHECK-SPIRV-NEXT: [[S1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8
417 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[S1]], ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
418 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
419 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr addrspace(4) [[TMP0]], i32 0, i32 0
420 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
421 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0
422 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
423 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
424 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
425 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
426 // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[TMP3]], i32 0, i32 1
427 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
428 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0
429 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
430 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
431 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
432 // CHECK-SPIRV-NEXT: ret void
434 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
435 // OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
436 // OPT-NEXT: [[ENTRY:.*:]]
437 // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
438 // OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
439 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
440 // OPT-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
441 // OPT-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
442 // OPT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8
443 // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
444 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
445 // OPT-NEXT: store float [[ADD]], ptr [[TMP2]], align 4
446 // OPT-NEXT: ret void
448 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
449 // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
450 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
451 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
452 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
453 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
454 // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
455 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
456 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
457 // OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
458 // OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
459 // OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
460 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
461 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
462 // OPT-SPIRV-NEXT: ret void
464 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
465 // HOST-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
466 // HOST-NEXT: [[ENTRY:.*:]]
467 // HOST-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8
468 // HOST-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8
469 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[S_ADDR]], i64 8, i64 0)
470 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
471 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
472 // HOST: [[SETUP_NEXT]]:
473 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel5P1S)
474 // HOST-NEXT: br label %[[SETUP_END]]
475 // HOST: [[SETUP_END]]:
476 // HOST-NEXT: ret void
478 __global__ void kernel5(struct S *s) {
486 // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
487 // by-val). However, the enhanced address inferring pass should be able to
488 // assume they are global pointers.
489 // For SPIR-V, since byref is not supported at the moment, we pass it as direct.
491 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T(
492 // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
493 // CHECK-NEXT: [[ENTRY:.*:]]
494 // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8, addrspace(5)
495 // CHECK-NEXT: [[T:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
496 // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[T]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
497 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0
498 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X]], i64 0, i64 0
499 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
500 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0
501 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
502 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00
503 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4
504 // CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0
505 // CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X2]], i64 0, i64 1
506 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
507 // CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0
508 // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
509 // CHECK-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00
510 // CHECK-NEXT: store float [[ADD5]], ptr [[ARRAYIDX4]], align 4
511 // CHECK-NEXT: ret void
513 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
514 // CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
515 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
516 // CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
517 // CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
518 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
519 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
520 // CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
521 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
522 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0
523 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8
524 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0
525 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
526 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
527 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
528 // CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
529 // CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1
530 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8
531 // CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0
532 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4
533 // CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00
534 // CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4
535 // CHECK-SPIRV-NEXT: ret void
537 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T(
538 // OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
539 // OPT-NEXT: [[ENTRY:.*:]]
540 // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]]
541 // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
542 // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
543 // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
544 // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
545 // OPT-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]
546 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
547 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP1]], align 4
548 // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4
549 // OPT-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00
550 // OPT-NEXT: store float [[ADD5]], ptr addrspace(1) [[TMP2]], align 4
551 // OPT-NEXT: ret void
553 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
554 // OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
555 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
556 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
557 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
558 // OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1
559 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
560 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00
561 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
562 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
563 // OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00
564 // OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
565 // OPT-SPIRV-NEXT: ret void
567 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T(
568 // HOST-SAME: ptr [[T_COERCE0:%.*]], ptr [[T_COERCE1:%.*]]) #[[ATTR0]] {
569 // HOST-NEXT: [[ENTRY:.*:]]
570 // HOST-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 8
571 // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 0
572 // HOST-NEXT: store ptr [[T_COERCE0]], ptr [[TMP0]], align 8
573 // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 1
574 // HOST-NEXT: store ptr [[T_COERCE1]], ptr [[TMP1]], align 8
575 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[T]], i64 16, i64 0)
576 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
577 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
578 // HOST: [[SETUP_NEXT]]:
579 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel61T)
580 // HOST-NEXT: br label %[[SETUP_END]]
581 // HOST: [[SETUP_END]]:
582 // HOST-NEXT: ret void
584 __global__ void kernel6(struct T t) {
589 // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
591 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi(
592 // CHECK-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) #[[ATTR0]] {
593 // CHECK-NEXT: [[ENTRY:.*:]]
594 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
595 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
596 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
597 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
598 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
599 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
600 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
601 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
602 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
603 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
604 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
605 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
606 // CHECK-NEXT: ret void
608 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
609 // CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
610 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
611 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
612 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
613 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
614 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
615 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
616 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
617 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
618 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
619 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
620 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
621 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
622 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
623 // CHECK-SPIRV-NEXT: ret void
625 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi(
626 // OPT-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
627 // OPT-NEXT: [[ENTRY:.*:]]
628 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
629 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
630 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
631 // OPT-NEXT: ret void
633 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
634 // OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
635 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
636 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
637 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
638 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
639 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
640 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
641 // OPT-SPIRV-NEXT: ret void
643 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
644 // HOST-SAME: ptr noalias noundef [[X:%.*]]) #[[ATTR0]] {
645 // HOST-NEXT: [[ENTRY:.*:]]
646 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
647 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
648 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
649 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
650 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
651 // HOST: [[SETUP_NEXT]]:
652 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel7Pi)
653 // HOST-NEXT: br label %[[SETUP_END]]
654 // HOST: [[SETUP_END]]:
655 // HOST-NEXT: ret void
657 __global__ void kernel7(int *__restrict x) {
661 // Single element struct.
665 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS(
666 // CHECK-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) #[[ATTR0]] {
667 // CHECK-NEXT: [[ENTRY:.*:]]
668 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8, addrspace(5)
669 // CHECK-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
670 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0
671 // CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
672 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0
673 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8
674 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4
675 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
676 // CHECK-NEXT: store float [[ADD]], ptr [[TMP0]], align 4
677 // CHECK-NEXT: ret void
679 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
680 // CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
681 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
682 // CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
683 // CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
684 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
685 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
686 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
687 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
688 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
689 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
690 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00
691 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
692 // CHECK-SPIRV-NEXT: ret void
694 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS(
695 // OPT-SAME: ptr addrspace(1) nocapture [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
696 // OPT-NEXT: [[ENTRY:.*:]]
697 // OPT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[A_COERCE]], align 4
698 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP0]], 3.000000e+00
699 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[A_COERCE]], align 4
700 // OPT-NEXT: ret void
702 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
703 // OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
704 // OPT-SPIRV-NEXT: [[ENTRY:.*:]]
705 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
706 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
707 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
708 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4
709 // OPT-SPIRV-NEXT: ret void
711 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS(
712 // HOST-SAME: ptr [[A_COERCE:%.*]]) #[[ATTR0]] {
713 // HOST-NEXT: [[ENTRY:.*:]]
714 // HOST-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
715 // HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A]], i32 0, i32 0
716 // HOST-NEXT: store ptr [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
717 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[A]], i64 8, i64 0)
718 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
719 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
720 // HOST: [[SETUP_NEXT]]:
721 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel82SS)
722 // HOST-NEXT: br label %[[SETUP_END]]
723 // HOST: [[SETUP_END]]:
724 // HOST-NEXT: ret void
726 __global__ void kernel8(struct SS a) {
730 // OPT: [[META4]] = !{}