1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
5 ; Check propagation of amdgpu-flat-work-group-size attribute.
7 ; Called from a single kernel with 1,256
8 define internal void @default_to_1_256() {
9 ; CHECK-LABEL: define {{[^@]+}}@default_to_1_256
10 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
11 ; CHECK-NEXT: ret void
16 define amdgpu_kernel void @kernel_1_256() #0 {
17 ; CHECK-LABEL: define {{[^@]+}}@kernel_1_256
18 ; CHECK-SAME: () #[[ATTR0]] {
19 ; CHECK-NEXT: call void @default_to_1_256()
20 ; CHECK-NEXT: ret void
22 call void @default_to_1_256()
26 ; Called from a single kernel with 64,128
27 define internal void @default_to_64_128() {
28 ; CHECK-LABEL: define {{[^@]+}}@default_to_64_128
29 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
30 ; CHECK-NEXT: ret void
35 define amdgpu_kernel void @kernel_64_128() #1 {
36 ; CHECK-LABEL: define {{[^@]+}}@kernel_64_128
37 ; CHECK-SAME: () #[[ATTR1]] {
38 ; CHECK-NEXT: call void @default_to_64_128()
39 ; CHECK-NEXT: call void @flat_group_64_64()
40 ; CHECK-NEXT: call void @default_to_64_256()
41 ; CHECK-NEXT: call void @flat_group_128_256()
42 ; CHECK-NEXT: ret void
44 call void @default_to_64_128()
45 call void @flat_group_64_64()
46 call void @default_to_64_256()
47 call void @flat_group_128_256()
51 ; Called from kernels with 128,512 and 512,512
52 define internal void @default_to_128_512() {
53 ; CHECK-LABEL: define {{[^@]+}}@default_to_128_512
54 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
55 ; CHECK-NEXT: ret void
60 ; This already has a strict bounds, but called from kernels with wider
61 ; bounds, and should not be changed.
62 define internal void @flat_group_64_64() #2 {
63 ; CHECK-LABEL: define {{[^@]+}}@flat_group_64_64
64 ; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
65 ; CHECK-NEXT: ret void
71 define internal void @flat_group_128_256() #3 {
72 ; CHECK-LABEL: define {{[^@]+}}@flat_group_128_256
73 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
74 ; CHECK-NEXT: ret void
79 define internal void @flat_group_512_1024() #4 {
80 ; CHECK-LABEL: define {{[^@]+}}@flat_group_512_1024
81 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
82 ; CHECK-NEXT: ret void
87 define amdgpu_kernel void @kernel_128_512() #5 {
88 ; CHECK-LABEL: define {{[^@]+}}@kernel_128_512
89 ; CHECK-SAME: () #[[ATTR2]] {
90 ; CHECK-NEXT: call void @default_to_128_512()
91 ; CHECK-NEXT: call void @flat_group_64_64()
92 ; CHECK-NEXT: ret void
94 call void @default_to_128_512()
95 call void @flat_group_64_64()
99 define amdgpu_kernel void @kernel_512_512() #6 {
100 ; CHECK-LABEL: define {{[^@]+}}@kernel_512_512
101 ; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
102 ; CHECK-NEXT: call void @default_to_128_512()
103 ; CHECK-NEXT: call void @flat_group_512_1024()
104 ; CHECK-NEXT: ret void
106 call void @default_to_128_512()
107 call void @flat_group_512_1024()
111 ; Called from kernels with 128,256 and 64,128 => 64,256
112 define internal void @default_to_64_256() {
113 ; CHECK-LABEL: define {{[^@]+}}@default_to_64_256
114 ; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
115 ; CHECK-NEXT: ret void
120 ; The kernel's lower bound is higher than the callee's lower bound, so
121 ; this should probably be illegal.
122 define amdgpu_kernel void @kernel_128_256() #3 {
123 ; CHECK-LABEL: define {{[^@]+}}@kernel_128_256
124 ; CHECK-SAME: () #[[ATTR4]] {
125 ; CHECK-NEXT: call void @default_to_64_256()
126 ; CHECK-NEXT: ret void
128 call void @default_to_64_256()
133 define internal void @merge_cycle_0() #1 {
134 ; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0
135 ; CHECK-SAME: () #[[ATTR1]] {
136 ; CHECK-NEXT: call void @merge_cycle_1()
137 ; CHECK-NEXT: ret void
139 call void @merge_cycle_1()
144 define internal void @merge_cycle_1() #3 {
145 ; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1
146 ; CHECK-SAME: () #[[ATTR4]] {
147 ; CHECK-NEXT: call void @merge_cycle_0()
148 ; CHECK-NEXT: ret void
150 call void @merge_cycle_0()
154 define amdgpu_kernel void @kernel_64_256() #7 {
155 ; CHECK-LABEL: define {{[^@]+}}@kernel_64_256
156 ; CHECK-SAME: () #[[ATTR7]] {
157 ; CHECK-NEXT: call void @merge_cycle_0()
158 ; CHECK-NEXT: call void @default_captured_address()
159 ; CHECK-NEXT: call void @externally_visible_default()
160 ; CHECK-NEXT: [[F32:%.*]] = call float @bitcasted_function()
161 ; CHECK-NEXT: ret void
163 call void @merge_cycle_0()
164 call void @default_captured_address()
165 call void @externally_visible_default()
166 %f32 = call float @bitcasted_function()
170 define internal void @default_captured_address() {
171 ; CHECK-LABEL: define {{[^@]+}}@default_captured_address
172 ; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
173 ; CHECK-NEXT: store volatile ptr @default_captured_address, ptr undef, align 8
174 ; CHECK-NEXT: ret void
176 store volatile ptr @default_captured_address, ptr undef, align 8
180 define void @externally_visible_default() {
181 ; CHECK-LABEL: define {{[^@]+}}@externally_visible_default
182 ; CHECK-SAME: () #[[ATTR8]] {
183 ; CHECK-NEXT: ret void
189 define internal i32 @bitcasted_function() {
190 ; CHECK-LABEL: define {{[^@]+}}@bitcasted_function
191 ; CHECK-SAME: () #[[ATTR7]] {
192 ; CHECK-NEXT: ret i32 0
197 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
198 attributes #1 = { "amdgpu-flat-work-group-size"="64,128" }
199 attributes #2 = { "amdgpu-flat-work-group-size"="64,64" }
200 attributes #3 = { "amdgpu-flat-work-group-size"="128,256" }
201 attributes #4 = { "amdgpu-flat-work-group-size"="512,1024" }
202 attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
203 attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
204 attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
206 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
207 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
208 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
209 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
210 ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
211 ; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
212 ; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
213 ; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
214 ; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }