1 // NOTE
: Assertions have been autogenerated by utils
/update_cc_test_checks.py UTC_ARGS
: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
2 // RUN
: %clang_cc1 -cl-std
=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32
=preserve-sign | FileCheck %s --check-prefixes
=CHECK
,NOCPU
4 // // Check no-optnone and target-cpu behavior
5 // RUN
: %clang_cc1 -cl-std
=CL2.0 -O1 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 -target-feature -sram-ecc -fdenormal-fp-math-f32
=preserve-sign %s | FileCheck %s --check-prefixes
=CHECK
,GFX900
8 typedef struct
{int a
;} ndrange_t;
10 void callee
(long id
, global long
*out
) {
14 kernel void test(global char *a, char b, global long *c, long d) {
15 queue_t default_queue;
19 enqueue_kernel(default_queue, flags, ndrange,
24 enqueue_kernel(default_queue, flags, ndrange,
29 enqueue_kernel(default_queue, flags, ndrange,
33 ((local int*)lp)[0] = 1;
36 void (^block)(void) = ^{
40 enqueue_kernel(default_queue, flags, ndrange, block);
43 // The target attribute from the caller is not propagated to the block.
45 // FIXME: Something is broken and inconsistent. Either the builtin use in the
46 // block should be rejected, or the target attribute needs to apply to the block
48 // https://github.com/llvm/llvm-project/issues/60005
49 __attribute__((target("s-memtime-inst")))
50 kernel void test_target_features_kernel(global int *i) {
51 queue_t default_queue;
55 __builtin_amdgcn_s_memtime();
57 enqueue_kernel(default_queue, flags, ndrange,
59 __builtin_amdgcn_s_memtime();
64 // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
66 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
67 // NOCPU-LABEL: define {{[^@]+}}@callee
68 // NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
70 // NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
71 // NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
72 // NOCPU-NEXT: store i64 [[ID]], ptr addrspace(5) [[ID_ADDR]], align 8
73 // NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
74 // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8
75 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
76 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8
77 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
78 // NOCPU-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8
79 // NOCPU-NEXT: ret void
82 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
83 // NOCPU-LABEL: define {{[^@]+}}@test
84 // NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
86 // NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
87 // NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
88 // NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
89 // NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
90 // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
91 // NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
92 // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
93 // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
94 // NOCPU-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
95 // NOCPU-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
96 // NOCPU-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
97 // NOCPU-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
98 // NOCPU-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
99 // NOCPU-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
100 // NOCPU-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
101 // NOCPU-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
102 // NOCPU-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
103 // NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8
104 // NOCPU-NEXT: store i8 [[B]], ptr addrspace(5) [[B_ADDR]], align 1
105 // NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr addrspace(5) [[C_ADDR]], align 8
106 // NOCPU-NEXT: store i64 [[D]], ptr addrspace(5) [[D_ADDR]], align 8
107 // NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4
108 // NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
109 // NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
110 // NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
111 // NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 0
112 // NOCPU-NEXT: store i32 25, ptr addrspace(5) [[BLOCK_SIZE]], align 8
113 // NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 1
114 // NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN]], align 4
115 // NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 2
116 // NOCPU-NEXT: store ptr @__test_block_invoke, ptr addrspace(5) [[BLOCK_INVOKE]], align 8
117 // NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 3
118 // NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
119 // NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(5) [[BLOCK_CAPTURED]], align 8
120 // NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 4
121 // NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1
122 // NOCPU-NEXT: store i8 [[TMP3]], ptr addrspace(5) [[BLOCK_CAPTURED1]], align 8
123 // NOCPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
124 // NOCPU-NEXT: [[TMP5:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_block_invoke_kernel, ptr [[TMP4]])
125 // NOCPU-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
126 // NOCPU-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
127 // NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP2]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
128 // NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 0
129 // NOCPU-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE4]], align 8
130 // NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 1
131 // NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN5]], align 4
132 // NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 2
133 // NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr addrspace(5) [[BLOCK_INVOKE6]], align 8
134 // NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 3
135 // NOCPU-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
136 // NOCPU-NEXT: store ptr addrspace(1) [[TMP8]], ptr addrspace(5) [[BLOCK_CAPTURED7]], align 8
137 // NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 6
138 // NOCPU-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1
139 // NOCPU-NEXT: store i8 [[TMP9]], ptr addrspace(5) [[BLOCK_CAPTURED8]], align 8
140 // NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 4
141 // NOCPU-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8
142 // NOCPU-NEXT: store ptr addrspace(1) [[TMP10]], ptr addrspace(5) [[BLOCK_CAPTURED9]], align 8
143 // NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 5
144 // NOCPU-NEXT: [[TMP11:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8
145 // NOCPU-NEXT: store i64 [[TMP11]], ptr addrspace(5) [[BLOCK_CAPTURED10]], align 8
146 // NOCPU-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
147 // NOCPU-NEXT: [[TMP13:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP6]], i32 [[TMP7]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP2]], ptr @__test_block_invoke_2_kernel, ptr [[TMP12]])
148 // NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
149 // NOCPU-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
150 // NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP11]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
151 // NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 0
152 // NOCPU-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE13]], align 8
153 // NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 1
154 // NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN14]], align 4
155 // NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 2
156 // NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr addrspace(5) [[BLOCK_INVOKE15]], align 8
157 // NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 3
158 // NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
159 // NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr addrspace(5) [[BLOCK_CAPTURED16]], align 8
160 // NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 6
161 // NOCPU-NEXT: [[TMP17:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1
162 // NOCPU-NEXT: store i8 [[TMP17]], ptr addrspace(5) [[BLOCK_CAPTURED17]], align 8
163 // NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 4
164 // NOCPU-NEXT: [[TMP18:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8
165 // NOCPU-NEXT: store ptr addrspace(1) [[TMP18]], ptr addrspace(5) [[BLOCK_CAPTURED18]], align 8
166 // NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 5
167 // NOCPU-NEXT: [[TMP19:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8
168 // NOCPU-NEXT: store i64 [[TMP19]], ptr addrspace(5) [[BLOCK_CAPTURED19]], align 8
169 // NOCPU-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
170 // NOCPU-NEXT: [[TMP21:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0
171 // NOCPU-NEXT: store i64 100, ptr addrspace(5) [[TMP21]], align 8
172 // NOCPU-NEXT: [[TMP22:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP14]], i32 [[TMP15]], ptr addrspace(5) [[VARTMP11]], ptr @__test_block_invoke_3_kernel, ptr [[TMP20]], i32 1, ptr addrspace(5) [[TMP21]])
173 // NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 0
174 // NOCPU-NEXT: store i32 32, ptr addrspace(5) [[BLOCK_SIZE22]], align 8
175 // NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 1
176 // NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN23]], align 4
177 // NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 2
178 // NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr addrspace(5) [[BLOCK_INVOKE24]], align 8
179 // NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 3
180 // NOCPU-NEXT: [[TMP23:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8
181 // NOCPU-NEXT: store i64 [[TMP23]], ptr addrspace(5) [[BLOCK_CAPTURED25]], align 8
182 // NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 4
183 // NOCPU-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8
184 // NOCPU-NEXT: store ptr addrspace(1) [[TMP24]], ptr addrspace(5) [[BLOCK_CAPTURED26]], align 8
185 // NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
186 // NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8
187 // NOCPU-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
188 // NOCPU-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
189 // NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP27]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
190 // NOCPU-NEXT: [[TMP27:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8
191 // NOCPU-NEXT: [[TMP28:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
192 // NOCPU-NEXT: [[TMP29:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP25]], i32 [[TMP26]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP27]], ptr @__test_block_invoke_4_kernel, ptr [[TMP28]])
193 // NOCPU-NEXT: ret void
196 // NOCPU: Function Attrs: convergent noinline nounwind optnone
197 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke
198 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
199 // NOCPU-NEXT: entry:
200 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
201 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
202 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
203 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
204 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
205 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
206 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
207 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
208 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
209 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
210 // NOCPU-NEXT: ret void
213 // NOCPU: Function Attrs: convergent nounwind
214 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
215 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
216 // NOCPU-NEXT: entry:
217 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
218 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
219 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
220 // NOCPU-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
221 // NOCPU-NEXT: ret void
224 // NOCPU: Function Attrs: convergent noinline nounwind optnone
225 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2
226 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
227 // NOCPU-NEXT: entry:
228 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
229 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
230 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
231 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
232 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
233 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
234 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
235 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
236 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
237 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
238 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
239 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
240 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
241 // NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
242 // NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
243 // NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
244 // NOCPU-NEXT: ret void
247 // NOCPU: Function Attrs: convergent nounwind
248 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
249 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
250 // NOCPU-NEXT: entry:
251 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
252 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
253 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
254 // NOCPU-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
255 // NOCPU-NEXT: ret void
258 // NOCPU: Function Attrs: convergent noinline nounwind optnone
259 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3
260 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
261 // NOCPU-NEXT: entry:
262 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
263 // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
264 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
265 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
266 // NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr addrspace(5) [[LP_ADDR]], align 4
267 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
268 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
269 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
270 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
271 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
272 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
273 // NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
274 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
275 // NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
276 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
277 // NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
278 // NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
279 // NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
280 // NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[LP_ADDR]], align 4
281 // NOCPU-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
282 // NOCPU-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4
283 // NOCPU-NEXT: ret void
286 // NOCPU: Function Attrs: convergent nounwind
287 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
288 // NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
289 // NOCPU-NEXT: entry:
290 // NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
291 // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
292 // NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
293 // NOCPU-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
294 // NOCPU-NEXT: ret void
297 // NOCPU: Function Attrs: convergent noinline nounwind optnone
298 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4
299 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
300 // NOCPU-NEXT: entry:
301 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
302 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
303 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
304 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
305 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
306 // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
307 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
308 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
309 // NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
310 // NOCPU-NEXT: ret void
313 // NOCPU: Function Attrs: convergent nounwind
314 // NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
315 // NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
316 // NOCPU-NEXT: entry:
317 // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
318 // NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
319 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
320 // NOCPU-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
321 // NOCPU-NEXT: ret void
324 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
325 // NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel
326 // NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space !15 !kernel_arg_access_qual !8 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !10 {
327 // NOCPU-NEXT: entry:
328 // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
329 // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
330 // NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
331 // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
332 // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
333 // NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8
334 // NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4
335 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
336 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
337 // NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
338 // NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
339 // NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
340 // NOCPU-NEXT: ret void
343 // NOCPU: Function Attrs: convergent noinline nounwind optnone
344 // NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
345 // NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
346 // NOCPU-NEXT: entry:
347 // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
348 // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
349 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
350 // NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
351 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
352 // NOCPU-NEXT: ret void
355 // NOCPU: Function Attrs: convergent nounwind
356 // NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
357 // NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
358 // NOCPU-NEXT: entry:
359 // NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
360 // NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
361 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
362 // NOCPU-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
363 // NOCPU-NEXT: ret void
379 // GFX900: Function Attrs: convergent norecurse nounwind
380 // GFX900-LABEL: define {{[^@]+}}@callee
381 // GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
382 // GFX900-NEXT: entry:
383 // GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
384 // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
385 // GFX900-NEXT: store i64 [[ID]], ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3:![0-9]+]]
386 // GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
387 // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3]]
388 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8, !tbaa [[TBAA7]]
389 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3]]
390 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
391 // GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
392 // GFX900-NEXT: ret void
395 // GFX900: Function Attrs: convergent norecurse nounwind
396 // GFX900-LABEL: define {{[^@]+}}@test
397 // GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !9 !kernel_arg_access_qual !10 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !12 {
398 // GFX900-NEXT: entry:
399 // GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
400 // GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
401 // GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
402 // GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
403 // GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
404 // GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
405 // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
406 // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
407 // GFX900-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
408 // GFX900-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
409 // GFX900-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
410 // GFX900-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
411 // GFX900-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
412 // GFX900-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
413 // GFX900-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
414 // GFX900-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
415 // GFX900-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
416 // GFX900-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]]
417 // GFX900-NEXT: store i8 [[B]], ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13:![0-9]+]]
418 // GFX900-NEXT: store ptr addrspace(1) [[C]], ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]]
419 // GFX900-NEXT: store i64 [[D]], ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]]
420 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]]
421 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
422 // GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14:![0-9]+]]
423 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
424 // GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16:![0-9]+]]
425 // GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
426 // GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18:![0-9]+]]
427 // GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 0
428 // GFX900-NEXT: store i32 25, ptr addrspace(5) [[BLOCK_SIZE]], align 8
429 // GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 1
430 // GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN]], align 4
431 // GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 2
432 // GFX900-NEXT: store ptr @__test_block_invoke, ptr addrspace(5) [[BLOCK_INVOKE]], align 8
433 // GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 3
434 // GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]]
435 // GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(5) [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA7]]
436 // GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 4
437 // GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]]
438 // GFX900-NEXT: store i8 [[TMP3]], ptr addrspace(5) [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA13]]
439 // GFX900-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
440 // GFX900-NEXT: [[TMP5:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_block_invoke_kernel, ptr [[TMP4]])
441 // GFX900-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]]
442 // GFX900-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
443 // GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP2]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
444 // GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 0
445 // GFX900-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE4]], align 8
446 // GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 1
447 // GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN5]], align 4
448 // GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 2
449 // GFX900-NEXT: store ptr @__test_block_invoke_2, ptr addrspace(5) [[BLOCK_INVOKE6]], align 8
450 // GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 3
451 // GFX900-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]]
452 // GFX900-NEXT: store ptr addrspace(1) [[TMP8]], ptr addrspace(5) [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA7]]
453 // GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 6
454 // GFX900-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]]
455 // GFX900-NEXT: store i8 [[TMP9]], ptr addrspace(5) [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA13]]
456 // GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 4
457 // GFX900-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]]
458 // GFX900-NEXT: store ptr addrspace(1) [[TMP10]], ptr addrspace(5) [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]]
459 // GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 5
460 // GFX900-NEXT: [[TMP11:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]]
461 // GFX900-NEXT: store i64 [[TMP11]], ptr addrspace(5) [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
462 // GFX900-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
463 // GFX900-NEXT: [[TMP13:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP6]], i32 [[TMP7]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP2]], ptr @__test_block_invoke_2_kernel, ptr [[TMP12]])
464 // GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]]
465 // GFX900-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
466 // GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP11]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
467 // GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 0
468 // GFX900-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE13]], align 8
469 // GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 1
470 // GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN14]], align 4
471 // GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 2
472 // GFX900-NEXT: store ptr @__test_block_invoke_3, ptr addrspace(5) [[BLOCK_INVOKE15]], align 8
473 // GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 3
474 // GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]]
475 // GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr addrspace(5) [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA7]]
476 // GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 6
477 // GFX900-NEXT: [[TMP17:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]]
478 // GFX900-NEXT: store i8 [[TMP17]], ptr addrspace(5) [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA13]]
479 // GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 4
480 // GFX900-NEXT: [[TMP18:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]]
481 // GFX900-NEXT: store ptr addrspace(1) [[TMP18]], ptr addrspace(5) [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]]
482 // GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 5
483 // GFX900-NEXT: [[TMP19:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]]
484 // GFX900-NEXT: store i64 [[TMP19]], ptr addrspace(5) [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
485 // GFX900-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
486 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
487 // GFX900-NEXT: [[TMP21:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0
488 // GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP21]], align 8
489 // GFX900-NEXT: [[TMP22:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP14]], i32 [[TMP15]], ptr addrspace(5) [[VARTMP11]], ptr @__test_block_invoke_3_kernel, ptr [[TMP20]], i32 1, ptr addrspace(5) [[TMP21]])
490 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
491 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
492 // GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 0
493 // GFX900-NEXT: store i32 32, ptr addrspace(5) [[BLOCK_SIZE22]], align 8
494 // GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 1
495 // GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN23]], align 4
496 // GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 2
497 // GFX900-NEXT: store ptr @__test_block_invoke_4, ptr addrspace(5) [[BLOCK_INVOKE24]], align 8
498 // GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 3
499 // GFX900-NEXT: [[TMP23:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]]
500 // GFX900-NEXT: store i64 [[TMP23]], ptr addrspace(5) [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]]
501 // GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 4
502 // GFX900-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]]
503 // GFX900-NEXT: store ptr addrspace(1) [[TMP24]], ptr addrspace(5) [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
504 // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
505 // GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA13]]
506 // GFX900-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]]
507 // GFX900-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
508 // GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP27]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
509 // GFX900-NEXT: [[TMP27:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA13]]
510 // GFX900-NEXT: [[TMP28:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
511 // GFX900-NEXT: [[TMP29:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP25]], i32 [[TMP26]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP27]], ptr @__test_block_invoke_4_kernel, ptr [[TMP28]])
512 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
513 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
514 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
515 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
516 // GFX900-NEXT: ret void
519 // GFX900: Function Attrs: convergent nounwind
520 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke
521 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
522 // GFX900-NEXT: entry:
523 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
524 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
525 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
526 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
527 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
528 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
529 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
530 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
531 // GFX900-NEXT: ret void
534 // GFX900: Function Attrs: convergent nounwind
535 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
536 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space !19 !kernel_arg_access_qual !20 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !22 {
537 // GFX900-NEXT: entry:
538 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
539 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
540 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
541 // GFX900-NEXT: call void @__test_block_invoke(ptr [[TMP2]])
542 // GFX900-NEXT: ret void
545 // GFX900: Function Attrs: convergent nounwind
546 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2
547 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
548 // GFX900-NEXT: entry:
549 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
550 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
551 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
552 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
553 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
554 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
555 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
556 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
557 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
558 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
559 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
560 // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
561 // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
562 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
563 // GFX900-NEXT: ret void
566 // GFX900: Function Attrs: convergent nounwind
567 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
568 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space !19 !kernel_arg_access_qual !20 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !22 {
569 // GFX900-NEXT: entry:
570 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
571 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
572 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
573 // GFX900-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]])
574 // GFX900-NEXT: ret void
577 // GFX900: Function Attrs: convergent nounwind
578 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3
579 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
580 // GFX900-NEXT: entry:
581 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
582 // GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
583 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
584 // GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr addrspace(5) [[LP_ADDR]], align 4, !tbaa [[TBAA7]]
585 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
586 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]]
587 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
588 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
589 // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
590 // GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA13]]
591 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
592 // GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
593 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
594 // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
595 // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
596 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
597 // GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[LP_ADDR]], align 4, !tbaa [[TBAA7]]
598 // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
599 // GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA14]]
600 // GFX900-NEXT: ret void
603 // GFX900: Function Attrs: convergent nounwind
604 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
605 // GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !kernel_arg_addr_space !23 !kernel_arg_access_qual !24 !kernel_arg_type !25 !kernel_arg_base_type !25 !kernel_arg_type_qual !26 {
606 // GFX900-NEXT: entry:
607 // GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
608 // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
609 // GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
610 // GFX900-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
611 // GFX900-NEXT: ret void
614 // GFX900: Function Attrs: convergent nounwind
615 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4
616 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
617 // GFX900-NEXT: entry:
618 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
619 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
620 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
621 // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
622 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
623 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
624 // GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]]
625 // GFX900-NEXT: ret void
628 // GFX900: Function Attrs: convergent nounwind
629 // GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
630 // GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space !19 !kernel_arg_access_qual !20 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !22 {
631 // GFX900-NEXT: entry:
632 // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
633 // GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
634 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
635 // GFX900-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]])
636 // GFX900-NEXT: ret void
639 // GFX900: Function Attrs: convergent norecurse nounwind
640 // GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel
641 // GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space !27 !kernel_arg_access_qual !20 !kernel_arg_type !28 !kernel_arg_base_type !28 !kernel_arg_type_qual !22 {
642 // GFX900-NEXT: entry:
643 // GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
644 // GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
645 // GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
646 // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
647 // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
648 // GFX900-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8, !tbaa [[TBAA7]]
649 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
650 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
651 // GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
652 // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
653 // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
654 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]]
655 // GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]]
656 // GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]]
657 // GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
658 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
659 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
660 // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
661 // GFX900-NEXT: ret void
664 // GFX900: Function Attrs: convergent nounwind
665 // GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
666 // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
667 // GFX900-NEXT: entry:
668 // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
669 // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
670 // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
671 // GFX900-NEXT: ret void
674 // GFX900: Function Attrs: convergent nounwind
675 // GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
676 // GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space !19 !kernel_arg_access_qual !20 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !22 {
677 // GFX900-NEXT: entry:
678 // GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
679 // GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
680 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
681 // GFX900-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
682 // GFX900-NEXT: ret void
685 // NOCPU: attributes #0 = { "objc_arc_inert" }
686 // NOCPU: attributes #1 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
687 // NOCPU: attributes #2 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
688 // NOCPU: attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
689 // NOCPU: attributes #4 = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
690 // NOCPU: attributes #5 = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
691 // NOCPU: attributes #6 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
692 // NOCPU: attributes #7 = { nocallback nofree nosync nounwind willreturn }
693 // NOCPU: attributes #8 = { convergent nounwind }
695 // GFX900: attributes #0 = { "objc_arc_inert" }
696 // GFX900: attributes #1 = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
697 // GFX900: attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
698 // GFX900: attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
699 // GFX900: attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
700 // GFX900: attributes #5 = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
701 // GFX900: attributes #6 = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
702 // GFX900: attributes #7 = { nocallback nofree nosync nounwind willreturn }
703 // GFX900: attributes #8 = { nounwind }
704 // GFX900: attributes #9 = { convergent nounwind }
706 // NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
707 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
708 // NOCPU: !2 = !{i32 2, i32 0}
709 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
710 // NOCPU: !4 = !{!"none", !"none", !"none", !"none"}
711 // NOCPU: !5 = !{!"char*", !"char", !"long*", !"long"}
712 // NOCPU: !6 = !{!"", !"", !"", !""}
713 // NOCPU: !7 = !{i32 0}
714 // NOCPU: !8 = !{!"none"}
715 // NOCPU: !9 = !{!"__block_literal"}
716 // NOCPU: !10 = !{!""}
717 // NOCPU: !11 = !{i32 0, i32 3}
718 // NOCPU: !12 = !{!"none", !"none"}
719 // NOCPU: !13 = !{!"__block_literal", !"void*"}
720 // NOCPU: !14 = !{!"", !""}
721 // NOCPU: !15 = !{i32 1}
722 // NOCPU: !16 = !{!"int*"}
724 // GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
725 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
726 // GFX900: !2 = !{i32 2, i32 0}
727 // GFX900: !3 = !{!4, !4, i64 0}
728 // GFX900: !4 = !{!"long", !5, i64 0}
729 // GFX900: !5 = !{!"omnipotent char", !6, i64 0}
730 // GFX900: !6 = !{!"Simple C/C++ TBAA"}
731 // GFX900: !7 = !{!8, !8, i64 0}
732 // GFX900: !8 = !{!"any pointer", !5, i64 0}
733 // GFX900: !9 = !{i32 1, i32 0, i32 1, i32 0}
734 // GFX900: !10 = !{!"none", !"none", !"none", !"none"}
735 // GFX900: !11 = !{!"char*", !"char", !"long*", !"long"}
736 // GFX900: !12 = !{!"", !"", !"", !""}
737 // GFX900: !13 = !{!5, !5, i64 0}
738 // GFX900: !14 = !{!15, !15, i64 0}
739 // GFX900: !15 = !{!"int", !5, i64 0}
740 // GFX900: !16 = !{!17, !17, i64 0}
741 // GFX900: !17 = !{!"queue_t", !5, i64 0}
742 // GFX900: !18 = !{i64 0, i64 4, !14}
743 // GFX900: !19 = !{i32 0}
744 // GFX900: !20 = !{!"none"}
745 // GFX900: !21 = !{!"__block_literal"}
746 // GFX900: !22 = !{!""}
747 // GFX900: !23 = !{i32 0, i32 3}
748 // GFX900: !24 = !{!"none", !"none"}
749 // GFX900: !25 = !{!"__block_literal", !"void*"}
750 // GFX900: !26 = !{!"", !""}
751 // GFX900: !27 = !{i32 1}
752 // GFX900: !28 = !{!"int*"}
754 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: