1 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
3 declare i32 @llvm.amdgcn.workgroup.id.x() #0
4 declare i32 @llvm.amdgcn.workgroup.id.y() #0
5 declare i32 @llvm.amdgcn.workgroup.id.z() #0
7 declare i32 @llvm.amdgcn.workitem.id.x() #0
8 declare i32 @llvm.amdgcn.workitem.id.y() #0
9 declare i32 @llvm.amdgcn.workitem.id.z() #0
11 declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
12 declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
13 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
15 declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
16 declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
18 ; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
19 define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
20 %val = call i32 @llvm.amdgcn.workgroup.id.x()
21 store i32 %val, i32 addrspace(1)* %ptr
25 ; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
26 define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
27 %val = call i32 @llvm.amdgcn.workgroup.id.y()
28 store i32 %val, i32 addrspace(1)* %ptr
32 ; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
33 define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
34 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
35 store volatile i32 %val0, i32 addrspace(1)* %ptr
36 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
37 store volatile i32 %val1, i32 addrspace(1)* %ptr
41 ; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
42 define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
43 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
44 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
45 store volatile i32 %val0, i32 addrspace(1)* %ptr
46 store volatile i32 %val1, i32 addrspace(1)* %ptr
50 ; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
51 define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
52 %val = call i32 @llvm.amdgcn.workgroup.id.z()
53 store i32 %val, i32 addrspace(1)* %ptr
57 ; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
58 define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
59 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
60 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
61 store volatile i32 %val0, i32 addrspace(1)* %ptr
62 store volatile i32 %val1, i32 addrspace(1)* %ptr
66 ; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
67 define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
68 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
69 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
70 store volatile i32 %val0, i32 addrspace(1)* %ptr
71 store volatile i32 %val1, i32 addrspace(1)* %ptr
75 ; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
76 define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
77 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
78 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
79 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
80 store volatile i32 %val0, i32 addrspace(1)* %ptr
81 store volatile i32 %val1, i32 addrspace(1)* %ptr
82 store volatile i32 %val2, i32 addrspace(1)* %ptr
86 ; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
87 define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
88 %val = call i32 @llvm.amdgcn.workitem.id.x()
89 store i32 %val, i32 addrspace(1)* %ptr
93 ; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
94 define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
95 %val = call i32 @llvm.amdgcn.workitem.id.y()
96 store i32 %val, i32 addrspace(1)* %ptr
100 ; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
101 define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
102 %val = call i32 @llvm.amdgcn.workitem.id.z()
103 store i32 %val, i32 addrspace(1)* %ptr
107 ; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
108 define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
109 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
110 %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
111 store volatile i32 %val0, i32 addrspace(1)* %ptr
112 store volatile i32 %val1, i32 addrspace(1)* %ptr
116 ; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
117 define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
118 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
119 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
120 store volatile i32 %val0, i32 addrspace(1)* %ptr
121 store volatile i32 %val1, i32 addrspace(1)* %ptr
125 ; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
126 define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
127 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
128 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
129 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
130 store volatile i32 %val0, i32 addrspace(1)* %ptr
131 store volatile i32 %val1, i32 addrspace(1)* %ptr
132 store volatile i32 %val2, i32 addrspace(1)* %ptr
136 ; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
137 define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
138 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
139 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
140 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
141 %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
142 %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
143 %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
144 store volatile i32 %val0, i32 addrspace(1)* %ptr
145 store volatile i32 %val1, i32 addrspace(1)* %ptr
146 store volatile i32 %val2, i32 addrspace(1)* %ptr
147 store volatile i32 %val3, i32 addrspace(1)* %ptr
148 store volatile i32 %val4, i32 addrspace(1)* %ptr
149 store volatile i32 %val5, i32 addrspace(1)* %ptr
153 ; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
154 define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
155 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
156 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
157 %val = load i32, i32 addrspace(4)* %bc
158 store i32 %val, i32 addrspace(1)* %ptr
162 ; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
163 define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
164 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
165 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
166 %val = load i32, i32 addrspace(4)* %bc
167 store i32 %val, i32 addrspace(1)* %ptr
171 ; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
172 define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
173 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
174 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
175 %val = load i32, i32 addrspace(4)* %bc
176 store i32 %val, i32 addrspace(1)* %ptr
180 ; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
181 define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
182 %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
183 store volatile i32 0, i32* %stof
187 ; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
188 define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
189 %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
190 store volatile i32 0, i32* %stof
194 ; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
195 define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
196 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
197 store volatile i32 0, i32 addrspace(3)* %ftos
201 ; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
202 define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
203 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
204 store volatile i32 0, i32 addrspace(5)* %ftos
208 ; No-op addrspacecast should not use queue ptr
209 ; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
210 define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
211 %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
212 store volatile i32 0, i32* %stof
216 ; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
217 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
218 %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
219 %ld = load volatile i32, i32* %stof
223 ; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
224 define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
225 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
226 store volatile i32 0, i32 addrspace(1)* %ftos
230 ; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
231 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
232 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
233 %ld = load volatile i32, i32 addrspace(4)* %ftos
237 ; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 {
238 define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
239 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
240 %ext = zext i1 %is.shared to i32
241 store i32 %ext, i32 addrspace(1)* undef
245 ; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 {
246 define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
247 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
248 %ext = zext i1 %is.private to i32
249 store i32 %ext, i32 addrspace(1)* undef
253 attributes #0 = { nounwind readnone speculatable }
254 attributes #1 = { nounwind }
256 ; HSA: attributes #0 = { nounwind readnone speculatable }
257 ; HSA: attributes #1 = { nounwind }
258 ; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
259 ; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
260 ; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
261 ; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
262 ; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
263 ; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
264 ; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
265 ; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
266 ; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
267 ; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
268 ; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }