[clang] Implement lifetime analysis for lifetime_capture_by(X) (#115921)
[llvm-project.git] / clang / test / CodeGenOpenCL / amdgpu-enqueue-kernel.cl
blob62b5661da9dbd8c0643ef23dc6c5e93086cd3634
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
2 // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU
4 // // Check no-optnone and target-cpu behavior
5 // RUN: %clang_cc1 -cl-std=CL2.0 -O1 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 -target-feature -sram-ecc -fdenormal-fp-math-f32=preserve-sign %s | FileCheck %s --check-prefixes=CHECK,GFX900
8 typedef struct {int a;} ndrange_t;
10 void callee(long id, global long *out) {
11 out[id] = id;
14 kernel void test(global char *a, char b, global long *c, long d) {
15 queue_t default_queue;
16 unsigned flags = 0;
17 ndrange_t ndrange;
19 enqueue_kernel(default_queue, flags, ndrange,
20 ^(void) {
21 a[0] = b;
22 });
24 enqueue_kernel(default_queue, flags, ndrange,
25 ^(void) {
26 a[0] = b;
27 c[0] = d;
28 });
29 enqueue_kernel(default_queue, flags, ndrange,
30 ^(local void *lp) {
31 a[0] = b;
32 c[0] = d;
33 ((local int*)lp)[0] = 1;
34 }, 100);
36 void (^block)(void) = ^{
37 callee(d, c);
40 enqueue_kernel(default_queue, flags, ndrange, block);
43 // The target attribute from the caller is not propagated to the block.
45 // FIXME: Something is broken and inconsistent. Either the builtin use in the
46 // block should be rejected, or the target attribute needs to apply to the block
47 // as well.
48 // https://github.com/llvm/llvm-project/issues/60005
49 __attribute__((target("s-memtime-inst")))
50 kernel void test_target_features_kernel(global int *i) {
51 queue_t default_queue;
52 unsigned flags = 0;
53 ndrange_t ndrange;
55 __builtin_amdgcn_s_memtime();
57 enqueue_kernel(default_queue, flags, ndrange,
58 ^(void) {
59 __builtin_amdgcn_s_memtime();
60 });
63 //.
64 // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
65 // CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
66 //.
67 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
68 // NOCPU-LABEL: define {{[^@]+}}@callee
69 // NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
70 // NOCPU-NEXT: entry:
71 // NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
72 // NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
73 // NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
74 // NOCPU-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
75 // NOCPU-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8
76 // NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
77 // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
78 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
79 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
80 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
81 // NOCPU-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8
82 // NOCPU-NEXT: ret void
85 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
86 // NOCPU-LABEL: define {{[^@]+}}@test
87 // NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
88 // NOCPU-NEXT: entry:
89 // NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
90 // NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
91 // NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
92 // NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
93 // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
94 // NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
95 // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
96 // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
97 // NOCPU-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
98 // NOCPU-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
99 // NOCPU-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
100 // NOCPU-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
101 // NOCPU-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
102 // NOCPU-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
103 // NOCPU-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
104 // NOCPU-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
105 // NOCPU-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
106 // NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
107 // NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
108 // NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
109 // NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
110 // NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
111 // NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
112 // NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
113 // NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
114 // NOCPU-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
115 // NOCPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
116 // NOCPU-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
117 // NOCPU-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
118 // NOCPU-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
119 // NOCPU-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
120 // NOCPU-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
121 // NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
122 // NOCPU-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
123 // NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
124 // NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
125 // NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
126 // NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
127 // NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
128 // NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
129 // NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
130 // NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
131 // NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
132 // NOCPU-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8
133 // NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
134 // NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4
135 // NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
136 // NOCPU-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
137 // NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
138 // NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
139 // NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8
140 // NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
141 // NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
142 // NOCPU-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8
143 // NOCPU-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
144 // NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
145 // NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
146 // NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
147 // NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
148 // NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8
149 // NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
150 // NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4
151 // NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
152 // NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
153 // NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
154 // NOCPU-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
155 // NOCPU-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8
156 // NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
157 // NOCPU-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
158 // NOCPU-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8
159 // NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
160 // NOCPU-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
161 // NOCPU-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8
162 // NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
163 // NOCPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
164 // NOCPU-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8
165 // NOCPU-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
166 // NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
167 // NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
168 // NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
169 // NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
170 // NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8
171 // NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
172 // NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4
173 // NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
174 // NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
175 // NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
176 // NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
177 // NOCPU-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8
178 // NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
179 // NOCPU-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
180 // NOCPU-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8
181 // NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
182 // NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
183 // NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8
184 // NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
185 // NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
186 // NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8
187 // NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
188 // NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8
189 // NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
190 // NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
191 // NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
192 // NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
193 // NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4
194 // NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
195 // NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
196 // NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
197 // NOCPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
198 // NOCPU-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8
199 // NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
200 // NOCPU-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
201 // NOCPU-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8
202 // NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8
203 // NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
204 // NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
205 // NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
206 // NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8
207 // NOCPU-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
208 // NOCPU-NEXT: ret void
211 // NOCPU: Function Attrs: convergent noinline nounwind optnone
212 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke
213 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
214 // NOCPU-NEXT: entry:
215 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
216 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
217 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
218 // NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
219 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
220 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
221 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
222 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
223 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
224 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
225 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
226 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
227 // NOCPU-NEXT: ret void
230 // NOCPU: Function Attrs: convergent nounwind
231 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
232 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
233 // NOCPU-NEXT: entry:
234 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
235 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
236 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
237 // NOCPU-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
238 // NOCPU-NEXT: ret void
241 // NOCPU: Function Attrs: convergent noinline nounwind optnone
242 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2
243 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
244 // NOCPU-NEXT: entry:
245 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
246 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
247 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
248 // NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
249 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
250 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
251 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
252 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
253 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
254 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
255 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
256 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
257 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
258 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
259 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
260 // NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
261 // NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
262 // NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
263 // NOCPU-NEXT: ret void
266 // NOCPU: Function Attrs: convergent nounwind
267 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
268 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
269 // NOCPU-NEXT: entry:
270 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
271 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
272 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
273 // NOCPU-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
274 // NOCPU-NEXT: ret void
277 // NOCPU: Function Attrs: convergent noinline nounwind optnone
278 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3
279 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
280 // NOCPU-NEXT: entry:
281 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
282 // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
283 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
284 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
285 // NOCPU-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
286 // NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
287 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
288 // NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4
289 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
290 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
291 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
292 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
293 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
294 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
295 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
296 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
297 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
298 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
299 // NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
300 // NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
301 // NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
302 // NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4
303 // NOCPU-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
304 // NOCPU-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4
305 // NOCPU-NEXT: ret void
308 // NOCPU: Function Attrs: convergent nounwind
309 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
310 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META11:![0-9]+]] !kernel_arg_access_qual [[META12:![0-9]+]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META14:![0-9]+]] {
311 // NOCPU-NEXT: entry:
312 // NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
313 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
314 // NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
315 // NOCPU-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
316 // NOCPU-NEXT: ret void
319 // NOCPU: Function Attrs: convergent noinline nounwind optnone
320 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4
321 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
322 // NOCPU-NEXT: entry:
323 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
324 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
325 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
326 // NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
327 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
328 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
329 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
330 // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
331 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
332 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
333 // NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
334 // NOCPU-NEXT: ret void
337 // NOCPU: Function Attrs: convergent nounwind
338 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
339 // NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
340 // NOCPU-NEXT: entry:
341 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
342 // NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
343 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
344 // NOCPU-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
345 // NOCPU-NEXT: ret void
348 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
349 // NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel
350 // NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META15:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META10]] {
351 // NOCPU-NEXT: entry:
352 // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
353 // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
354 // NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
355 // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
356 // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
357 // NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
358 // NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
359 // NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
360 // NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
361 // NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
362 // NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
363 // NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
364 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
365 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
366 // NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
367 // NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
368 // NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
369 // NOCPU-NEXT: ret void
372 // NOCPU: Function Attrs: convergent noinline nounwind optnone
373 // NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
374 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
375 // NOCPU-NEXT: entry:
376 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
377 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
378 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
379 // NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
380 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
381 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
382 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
383 // NOCPU-NEXT: ret void
386 // NOCPU: Function Attrs: convergent nounwind
387 // NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
388 // NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
389 // NOCPU-NEXT: entry:
390 // NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
391 // NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
392 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
393 // NOCPU-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
394 // NOCPU-NEXT: ret void
410 // GFX900: Function Attrs: convergent norecurse nounwind
411 // GFX900-LABEL: define {{[^@]+}}@callee
412 // GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
413 // GFX900-NEXT: entry:
414 // GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
415 // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
416 // GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
417 // GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
418 // GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]]
419 // GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]]
420 // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
421 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
422 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
423 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
424 // GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
425 // GFX900-NEXT: ret void
428 // GFX900: Function Attrs: convergent norecurse nounwind
429 // GFX900-LABEL: define {{[^@]+}}@test
430 // GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] {
431 // GFX900-NEXT: entry:
432 // GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
433 // GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
434 // GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
435 // GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
436 // GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
437 // GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
438 // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
439 // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
440 // GFX900-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
441 // GFX900-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
442 // GFX900-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
443 // GFX900-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
444 // GFX900-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
445 // GFX900-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
446 // GFX900-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
447 // GFX900-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
448 // GFX900-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
449 // GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
450 // GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
451 // GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
452 // GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
453 // GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
454 // GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
455 // GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
456 // GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
457 // GFX900-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
458 // GFX900-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
459 // GFX900-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
460 // GFX900-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
461 // GFX900-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
462 // GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
463 // GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
464 // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
465 // GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
466 // GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
467 // GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13:![0-9]+]]
468 // GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
469 // GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
470 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]]
471 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
472 // GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14:![0-9]+]]
473 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
474 // GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16:![0-9]+]]
475 // GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
476 // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18:![0-9]+]]
477 // GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
478 // GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8
479 // GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
480 // GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4
481 // GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
482 // GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
483 // GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
484 // GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
485 // GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA7]]
486 // GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
487 // GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]]
488 // GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA13]]
489 // GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
490 // GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]]
491 // GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
492 // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
493 // GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
494 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8
495 // GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
496 // GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4
497 // GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
498 // GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
499 // GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
500 // GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
501 // GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA7]]
502 // GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
503 // GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]]
504 // GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA13]]
505 // GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
506 // GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
507 // GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]]
508 // GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
509 // GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
510 // GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
511 // GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
512 // GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]]
513 // GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
514 // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
515 // GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
516 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8
517 // GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
518 // GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4
519 // GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
520 // GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
521 // GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
522 // GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
523 // GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA7]]
524 // GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
525 // GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]]
526 // GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA13]]
527 // GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
528 // GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
529 // GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]]
530 // GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
531 // GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
532 // GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
533 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
534 // GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
535 // GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8
536 // GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
537 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
538 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
539 // GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
540 // GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
541 // GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
542 // GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4
543 // GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
544 // GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
545 // GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
546 // GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
547 // GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]]
548 // GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
549 // GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
550 // GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
551 // GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA13]]
552 // GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]]
553 // GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
554 // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
555 // GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA13]]
556 // GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
557 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
558 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
559 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
560 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
561 // GFX900-NEXT: ret void
564 // GFX900: Function Attrs: convergent nounwind
565 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke
566 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
567 // GFX900-NEXT: entry:
568 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
569 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
570 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
571 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
572 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
573 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
574 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
575 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
576 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
577 // GFX900-NEXT: ret void
580 // GFX900: Function Attrs: convergent nounwind
581 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
582 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META19:![0-9]+]] !kernel_arg_access_qual [[META20:![0-9]+]] !kernel_arg_type [[META21:![0-9]+]] !kernel_arg_base_type [[META21]] !kernel_arg_type_qual [[META22:![0-9]+]] {
583 // GFX900-NEXT: entry:
584 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
585 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
586 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
587 // GFX900-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
588 // GFX900-NEXT: ret void
591 // GFX900: Function Attrs: convergent nounwind
592 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2
593 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
594 // GFX900-NEXT: entry:
595 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
596 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
597 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
598 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
599 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
600 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
601 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
602 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
603 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
604 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
605 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
606 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
607 // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
608 // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
609 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
610 // GFX900-NEXT: ret void
613 // GFX900: Function Attrs: convergent nounwind
614 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
615 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META19]] !kernel_arg_access_qual [[META20]] !kernel_arg_type [[META21]] !kernel_arg_base_type [[META21]] !kernel_arg_type_qual [[META22]] {
616 // GFX900-NEXT: entry:
617 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
618 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
619 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
620 // GFX900-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
621 // GFX900-NEXT: ret void
624 // GFX900: Function Attrs: convergent nounwind
625 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3
626 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
627 // GFX900-NEXT: entry:
628 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
629 // GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
630 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
631 // GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
632 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
633 // GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA7]]
634 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
635 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
636 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
637 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
638 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
639 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
640 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
641 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
642 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
643 // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
644 // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
645 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
646 // GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA7]]
647 // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
648 // GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA14]]
649 // GFX900-NEXT: ret void
652 // GFX900: Function Attrs: convergent nounwind
653 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
654 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META23:![0-9]+]] !kernel_arg_access_qual [[META24:![0-9]+]] !kernel_arg_type [[META25:![0-9]+]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26:![0-9]+]] {
655 // GFX900-NEXT: entry:
656 // GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
657 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
658 // GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
659 // GFX900-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
660 // GFX900-NEXT: ret void
663 // GFX900: Function Attrs: convergent nounwind
664 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4
665 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
666 // GFX900-NEXT: entry:
667 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
668 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
669 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
670 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
671 // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
672 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
673 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
674 // GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]]
675 // GFX900-NEXT: ret void
678 // GFX900: Function Attrs: convergent nounwind
679 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
680 // GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META19]] !kernel_arg_access_qual [[META20]] !kernel_arg_type [[META21]] !kernel_arg_base_type [[META21]] !kernel_arg_type_qual [[META22]] {
681 // GFX900-NEXT: entry:
682 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
683 // GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
684 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
685 // GFX900-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
686 // GFX900-NEXT: ret void
689 // GFX900: Function Attrs: convergent norecurse nounwind
690 // GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel
691 // GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META27:![0-9]+]] !kernel_arg_access_qual [[META20]] !kernel_arg_type [[META28:![0-9]+]] !kernel_arg_base_type [[META28]] !kernel_arg_type_qual [[META22]] {
692 // GFX900-NEXT: entry:
693 // GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
694 // GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
695 // GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
696 // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
697 // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
698 // GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
699 // GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
700 // GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
701 // GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
702 // GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
703 // GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
704 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
705 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
706 // GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
707 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
708 // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
709 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]]
710 // GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]]
711 // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
712 // GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
713 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
714 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
715 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
716 // GFX900-NEXT: ret void
719 // GFX900: Function Attrs: convergent nounwind
720 // GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
721 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
722 // GFX900-NEXT: entry:
723 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
724 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
725 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
726 // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
727 // GFX900-NEXT: ret void
730 // GFX900: Function Attrs: convergent nounwind
731 // GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
732 // GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META19]] !kernel_arg_access_qual [[META20]] !kernel_arg_type [[META21]] !kernel_arg_base_type [[META21]] !kernel_arg_type_qual [[META22]] {
733 // GFX900-NEXT: entry:
734 // GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
735 // GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
736 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
737 // GFX900-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
738 // GFX900-NEXT: ret void
741 // NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
742 // NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
743 // NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
744 // NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
745 // NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
746 // NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
747 // NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
748 // NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
749 // NOCPU: attributes #[[ATTR8]] = { convergent nounwind }
751 // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
752 // GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
753 // GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
754 // GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
755 // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
756 // GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
757 // GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
758 // GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
759 // GFX900: attributes #[[ATTR8]] = { nounwind }
760 // GFX900: attributes #[[ATTR9]] = { convergent nounwind }
762 // NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
763 // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
764 // NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
765 // NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
766 // NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"}
767 // NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"}
768 // NOCPU: [[META6]] = !{!"", !"", !"", !""}
769 // NOCPU: [[META7]] = !{i32 0}
770 // NOCPU: [[META8]] = !{!"none"}
771 // NOCPU: [[META9]] = !{!"__block_literal"}
772 // NOCPU: [[META10]] = !{!""}
773 // NOCPU: [[META11]] = !{i32 0, i32 3}
774 // NOCPU: [[META12]] = !{!"none", !"none"}
775 // NOCPU: [[META13]] = !{!"__block_literal", !"void*"}
776 // NOCPU: [[META14]] = !{!"", !""}
777 // NOCPU: [[META15]] = !{i32 1}
778 // NOCPU: [[META16]] = !{!"int*"}
780 // GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
781 // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
782 // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
783 // GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
784 // GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
785 // GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
786 // GFX900: [[META6]] = !{!"Simple C/C++ TBAA"}
787 // GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
788 // GFX900: [[META8]] = !{!"any pointer", [[META5]], i64 0}
789 // GFX900: [[META9]] = !{i32 1, i32 0, i32 1, i32 0}
790 // GFX900: [[META10]] = !{!"none", !"none", !"none", !"none"}
791 // GFX900: [[META11]] = !{!"char*", !"char", !"long*", !"long"}
792 // GFX900: [[META12]] = !{!"", !"", !"", !""}
793 // GFX900: [[TBAA13]] = !{[[META5]], [[META5]], i64 0}
794 // GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
795 // GFX900: [[META15]] = !{!"int", [[META5]], i64 0}
796 // GFX900: [[TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0}
797 // GFX900: [[META17]] = !{!"queue_t", [[META5]], i64 0}
798 // GFX900: [[TBAA_STRUCT18]] = !{i64 0, i64 4, [[TBAA14]]}
799 // GFX900: [[META19]] = !{i32 0}
800 // GFX900: [[META20]] = !{!"none"}
801 // GFX900: [[META21]] = !{!"__block_literal"}
802 // GFX900: [[META22]] = !{!""}
803 // GFX900: [[META23]] = !{i32 0, i32 3}
804 // GFX900: [[META24]] = !{!"none", !"none"}
805 // GFX900: [[META25]] = !{!"__block_literal", !"void*"}
806 // GFX900: [[META26]] = !{!"", !""}
807 // GFX900: [[META27]] = !{i32 1}
808 // GFX900: [[META28]] = !{!"int*"}
810 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
811 // CHECK: {{.*}}