[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / propagate-waves-per-eu.ll
blob3a6421131fcae1da46a936cec64c9cc93e458aa1
1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
4 ; Check propagation of amdgpu-flat-work-group-size attribute.
6 ; Called from a single kernel with 1,8
7 define internal void @default_to_1_8_a() {
8 ; CHECK-LABEL: define internal void @default_to_1_8_a
9 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
10 ; CHECK-NEXT:    ret void
12   ret void
15 define amdgpu_kernel void @kernel_1_8() #0 {
16 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_8
17 ; CHECK-SAME: () #[[ATTR0]] {
18 ; CHECK-NEXT:    call void @default_to_1_8_a()
19 ; CHECK-NEXT:    ret void
21   call void @default_to_1_8_a()
22   ret void
25 ; Called from a single kernel with 1,2
26 define internal void @default_to_1_2() {
27 ; CHECK-LABEL: define internal void @default_to_1_2
28 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
29 ; CHECK-NEXT:    ret void
31   ret void
34 define amdgpu_kernel void @kernel_1_2() #1 {
35 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2
36 ; CHECK-SAME: () #[[ATTR1]] {
37 ; CHECK-NEXT:    call void @default_to_1_2()
38 ; CHECK-NEXT:    call void @flat_group_1_1()
39 ; CHECK-NEXT:    call void @default_to_1_8_b()
40 ; CHECK-NEXT:    call void @flat_group_2_8()
41 ; CHECK-NEXT:    ret void
43   call void @default_to_1_2()
44   call void @flat_group_1_1()
45   call void @default_to_1_8_b()
46   call void @flat_group_2_8()
47   ret void
50 ; Called from a single kernel with 1,4
51 define internal void @default_to_1_4() {
52 ; CHECK-LABEL: define internal void @default_to_1_4
53 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
54 ; CHECK-NEXT:    ret void
56   ret void
59 define amdgpu_kernel void @kernel_1_4() #2 {
60 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_4
61 ; CHECK-SAME: () #[[ATTR2]] {
62 ; CHECK-NEXT:    call void @default_to_1_4()
63 ; CHECK-NEXT:    ret void
65   call void @default_to_1_4()
66   ret void
69 ; Called from kernels with 2,9 and 9,9
70 define internal void @default_to_2_9() {
71 ; CHECK-LABEL: define internal void @default_to_2_9
72 ; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
73 ; CHECK-NEXT:    ret void
75   ret void
78 ; This already has strict bounds, but called from kernels with wider
79 ; bounds, and should not be changed.
80 define internal void @flat_group_1_1() #3 {
81 ; CHECK-LABEL: define internal void @flat_group_1_1
82 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
83 ; CHECK-NEXT:    ret void
85   ret void
88 ; 2,8 -> 2,2
89 define internal void @flat_group_2_8() #4 {
90 ; CHECK-LABEL: define internal void @flat_group_2_8
91 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
92 ; CHECK-NEXT:    ret void
94   ret void
97 ; 9,10 -> 9,9
98 define internal void @flat_group_9_10() #5 {
99 ; CHECK-LABEL: define internal void @flat_group_9_10
100 ; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
101 ; CHECK-NEXT:    ret void
103   ret void
106 define amdgpu_kernel void @kernel_2_9() #6 {
107 ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_9
108 ; CHECK-SAME: () #[[ATTR3]] {
109 ; CHECK-NEXT:    call void @default_to_2_9()
110 ; CHECK-NEXT:    call void @flat_group_1_1()
111 ; CHECK-NEXT:    ret void
113   call void @default_to_2_9()
114   call void @flat_group_1_1()
115   ret void
118 define amdgpu_kernel void @kernel_9_9() #7 {
119 ; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9
120 ; CHECK-SAME: () #[[ATTR6]] {
121 ; CHECK-NEXT:    call void @default_to_2_9()
122 ; CHECK-NEXT:    call void @flat_group_9_10()
123 ; CHECK-NEXT:    ret void
125   call void @default_to_2_9()
126   call void @flat_group_9_10()
127   ret void
130 ; Called from kernels with 2,8 and 1,2 => 1,8
131 define internal void @default_to_1_8_b() {
132 ; CHECK-LABEL: define internal void @default_to_1_8_b
133 ; CHECK-SAME: () #[[ATTR0]] {
134 ; CHECK-NEXT:    ret void
136   ret void
139 ; The kernel's lower bound is higher than the callee's lower bound, so
140 ; this should probably be illegal.
141 define amdgpu_kernel void @kernel_2_8() #4 {
142 ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8
143 ; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
144 ; CHECK-NEXT:    call void @default_to_1_8_a()
145 ; CHECK-NEXT:    call void @default_to_1_8_b()
146 ; CHECK-NEXT:    ret void
148   call void @default_to_1_8_a()
149   call void @default_to_1_8_b()
150   ret void
153 ; 1,2 -> 2,2
154 define internal void @merge_cycle_0() #1 {
155 ; CHECK-LABEL: define internal void @merge_cycle_0
156 ; CHECK-SAME: () #[[ATTR5]] {
157 ; CHECK-NEXT:    call void @merge_cycle_1()
158 ; CHECK-NEXT:    ret void
160   call void @merge_cycle_1()
161   ret void
164 ; Called from 1,2 + 3,8
165 ; 2,8 -> 2,8
166 define internal void @merge_cycle_1() #4 {
167 ; CHECK-LABEL: define internal void @merge_cycle_1
168 ; CHECK-SAME: () #[[ATTR7]] {
169 ; CHECK-NEXT:    call void @merge_cycle_0()
170 ; CHECK-NEXT:    ret void
172   call void @merge_cycle_0()
173   ret void
176 define amdgpu_kernel void @kernel_3_8() #8 {
177 ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_8
178 ; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
179 ; CHECK-NEXT:    call void @merge_cycle_0()
180 ; CHECK-NEXT:    call void @default_captured_address()
181 ; CHECK-NEXT:    call void @externally_visible_default()
182 ; CHECK-NEXT:    [[F32:%.*]] = call float @bitcasted_function()
183 ; CHECK-NEXT:    ret void
185   call void @merge_cycle_0()
186   call void @default_captured_address()
187   call void @externally_visible_default()
188   %f32 = call float @bitcasted_function()
189   ret void
192 define internal void @default_captured_address() {
193 ; CHECK-LABEL: define internal void @default_captured_address
194 ; CHECK-SAME: () #[[ATTR9:[0-9]+]] {
195 ; CHECK-NEXT:    store volatile ptr @default_captured_address, ptr undef, align 8
196 ; CHECK-NEXT:    ret void
198   store volatile ptr @default_captured_address, ptr undef, align 8
199   ret void
202 define void @externally_visible_default() {
203 ; CHECK-LABEL: define void @externally_visible_default
204 ; CHECK-SAME: () #[[ATTR9]] {
205 ; CHECK-NEXT:    ret void
207   ret void
210 ; 1,10 -> 3,8
211 define internal i32 @bitcasted_function() {
212 ; CHECK-LABEL: define internal i32 @bitcasted_function
213 ; CHECK-SAME: () #[[ATTR8]] {
214 ; CHECK-NEXT:    ret i32 0
216   ret i32 0
219 define internal void @called_from_invalid_bounds_0() {
220 ; CHECK-LABEL: define internal void @called_from_invalid_bounds_0
221 ; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
222 ; CHECK-NEXT:    ret void
224   ret void
227 define internal void @called_from_invalid_bounds_1() {
228 ; CHECK-LABEL: define internal void @called_from_invalid_bounds_1
229 ; CHECK-SAME: () #[[ATTR10]] {
230 ; CHECK-NEXT:    ret void
232   ret void
235 ; Invalid range for amdgpu-waves-per-eu
236 define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
237 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8
238 ; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
239 ; CHECK-NEXT:    call void @called_from_invalid_bounds_0()
240 ; CHECK-NEXT:    ret void
242   call void @called_from_invalid_bounds_0()
243   ret void
246 ; Invalid range for amdgpu-waves-per-eu
247 define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
248 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123
249 ; CHECK-SAME: () #[[ATTR12:[0-9]+]] {
250 ; CHECK-NEXT:    call void @called_from_invalid_bounds_1()
251 ; CHECK-NEXT:    ret void
253   call void @called_from_invalid_bounds_1()
254   ret void
257 ; XXX - Why is the maximum not 6?
258 ; The 512 maximum workgroup size implies a minimum occupancy of 2. The
259 ; implied minimum waves-per-eu should not be 3
260 ; -> 2,10
261 define void @larger_group_size_implies_lower_minimum() #11 {
262 ; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum
263 ; CHECK-SAME: () #[[ATTR13:[0-9]+]] {
264 ; CHECK-NEXT:    ret void
266   ret void
269 define amdgpu_kernel void @kernel_3_6() #12 {
270 ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6
271 ; CHECK-SAME: () #[[ATTR14:[0-9]+]] {
272 ; CHECK-NEXT:    call void @larger_group_size_implies_lower_minimum()
273 ; CHECK-NEXT:    ret void
275   call void @larger_group_size_implies_lower_minimum()
276   ret void
279 ; 3,6 -> 6,9
280 define internal void @refine_upper_func_3_6() #13 {
281 ; CHECK-LABEL: define internal void @refine_upper_func_3_6
282 ; CHECK-SAME: () #[[ATTR15:[0-9]+]] {
283 ; CHECK-NEXT:    ret void
285   ret void
288 ; 4,8 -> 6,8
289 define internal void @refine_lower_func_4_8() #14 {
290 ; CHECK-LABEL: define internal void @refine_lower_func_4_8
291 ; CHECK-SAME: () #[[ATTR16:[0-9]+]] {
292 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
293 ; CHECK-NEXT:    ret void
295   call void @refine_upper_func_3_6()
296   ret void
299 define amdgpu_kernel void @kernel_foo_6_8() #15 {
300 ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8
301 ; CHECK-SAME: () #[[ATTR16]] {
302 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
303 ; CHECK-NEXT:    call void @refine_lower_func_4_8()
304 ; CHECK-NEXT:    call void @func_9_10_a()
305 ; CHECK-NEXT:    ret void
307   call void @refine_upper_func_3_6()
308   call void @refine_lower_func_4_8()
309   call void @func_9_10_a()
310   ret void
313 ; 5,5 -> 5,5
314 define internal void @func_5_5() #16 {
315 ; CHECK-LABEL: define internal void @func_5_5
316 ; CHECK-SAME: () #[[ATTR17:[0-9]+]] {
317 ; CHECK-NEXT:    ret void
319   ret void
322 ; 5,8 -> 8,8
323 define internal void @func_5_8() #17 {
324 ; CHECK-LABEL: define internal void @func_5_8
325 ; CHECK-SAME: () #[[ATTR18:[0-9]+]] {
326 ; CHECK-NEXT:    ret void
328   ret void
331 ; 9,10 -> 9,10
332 define internal void @func_9_10_a() #18 {
333 ; CHECK-LABEL: define internal void @func_9_10_a
334 ; CHECK-SAME: () #[[ATTR19:[0-9]+]] {
335 ; CHECK-NEXT:    ret void
337   ret void
340 ; 9,10 -> 9,9
341 define internal void @func_9_10_b() #18 {
342 ; CHECK-LABEL: define internal void @func_9_10_b
343 ; CHECK-SAME: () #[[ATTR20:[0-9]+]] {
344 ; CHECK-NEXT:    ret void
346   ret void
349 define amdgpu_kernel void @kernel_bar_8_9() #19 {
350 ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9
351 ; CHECK-SAME: () #[[ATTR21:[0-9]+]] {
352 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
353 ; CHECK-NEXT:    call void @func_5_5()
354 ; CHECK-NEXT:    call void @func_9_10_b()
355 ; CHECK-NEXT:    call void @func_5_8()
356 ; CHECK-NEXT:    call void @externally_visible()
357 ; CHECK-NEXT:    ret void
359   call void @refine_upper_func_3_6()
360   call void @func_5_5()
361   call void @func_9_10_b()
362   call void @func_5_8()
363   call void @externally_visible()
364   ret void
367 ; This is an optimization hint based on users, so it's not strictly
368 ; required that all callers be visible.
369 define void @externally_visible() {
370 ; CHECK-LABEL: define void @externally_visible
371 ; CHECK-SAME: () #[[ATTR9]] {
372 ; CHECK-NEXT:    ret void
374   ret void
378 ; Use a 1 wave workgroup so there is no interaction by the workgroup
379 ; size on the implied waves per EU.
381 attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" }
382 attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" }
383 attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" }
384 attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" }
385 attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" }
386 attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" }
387 attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" }
388 attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" }
389 attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" }
390 attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" }
391 attributes #10 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" }
392 attributes #11 = { "amdgpu-flat-work-group-size"="1,512" }
393 attributes #12 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" }
394 attributes #13 = { "amdgpu-waves-per-eu"="3,6" }
395 attributes #14 = { "amdgpu-waves-per-eu"="4,8" }
396 attributes #15 = { "amdgpu-waves-per-eu"="6,8" }
397 attributes #16 = { "amdgpu-waves-per-eu"="5,5" }
398 attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
399 attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
400 attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
402 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
403 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
404 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
405 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
406 ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
407 ; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,2" "uniform-work-group-size"="false" }
408 ; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
409 ; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
410 ; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
411 ; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
412 ; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
413 ; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="0,8" "uniform-work-group-size"="false" }
414 ; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
415 ; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
416 ; CHECK: attributes #[[ATTR14]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
417 ; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,9" "uniform-work-group-size"="false" }
418 ; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
419 ; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
420 ; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,8" "uniform-work-group-size"="false" }
421 ; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
422 ; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
423 ; CHECK: attributes #[[ATTR21]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }