1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None -o - %s | FileCheck %s
4 %S = type <{ float, double }>
6 ; The result of that atomic ops should not be used as a uniform value.
8 define protected amdgpu_kernel void @add(ptr addrspace(1) %p, ptr addrspace(1) %q) {
11 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
12 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
13 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
14 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
15 ; CHECK-NEXT: global_atomic_add v2, v0, v1, s[0:1] glc
16 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
17 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
18 ; CHECK-NEXT: s_waitcnt vmcnt(0)
19 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
20 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
21 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
22 ; CHECK-NEXT: s_endpgm
23 %n32 = atomicrmw add ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
24 %n64 = zext i32 %n32 to i64
25 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
26 store float 1.0, ptr addrspace(1) %p1
30 define protected amdgpu_kernel void @sub(ptr addrspace(1) %p, ptr addrspace(1) %q) {
33 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
34 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
35 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
36 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
37 ; CHECK-NEXT: global_atomic_sub v2, v0, v1, s[0:1] glc
38 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
39 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
40 ; CHECK-NEXT: s_waitcnt vmcnt(0)
41 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
42 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
43 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
44 ; CHECK-NEXT: s_endpgm
45 %n32 = atomicrmw sub ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
46 %n64 = zext i32 %n32 to i64
47 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
48 store float 1.0, ptr addrspace(1) %p1
52 define protected amdgpu_kernel void @and(ptr addrspace(1) %p, ptr addrspace(1) %q) {
55 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
56 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
57 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
58 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
59 ; CHECK-NEXT: global_atomic_and v2, v0, v1, s[0:1] glc
60 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
61 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
62 ; CHECK-NEXT: s_waitcnt vmcnt(0)
63 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
64 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
65 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
66 ; CHECK-NEXT: s_endpgm
67 %n32 = atomicrmw and ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
68 %n64 = zext i32 %n32 to i64
69 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
70 store float 1.0, ptr addrspace(1) %p1
74 define protected amdgpu_kernel void @or(ptr addrspace(1) %p, ptr addrspace(1) %q) {
77 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
78 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
79 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
80 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
81 ; CHECK-NEXT: global_atomic_or v2, v0, v1, s[0:1] glc
82 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
83 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
84 ; CHECK-NEXT: s_waitcnt vmcnt(0)
85 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
86 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
87 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
88 ; CHECK-NEXT: s_endpgm
89 %n32 = atomicrmw or ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
90 %n64 = zext i32 %n32 to i64
91 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
92 store float 1.0, ptr addrspace(1) %p1
96 define protected amdgpu_kernel void @xor(ptr addrspace(1) %p, ptr addrspace(1) %q) {
99 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
100 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
101 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
102 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
103 ; CHECK-NEXT: global_atomic_xor v2, v0, v1, s[0:1] glc
104 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
105 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
106 ; CHECK-NEXT: s_waitcnt vmcnt(0)
107 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
108 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
109 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
110 ; CHECK-NEXT: s_endpgm
111 %n32 = atomicrmw xor ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
112 %n64 = zext i32 %n32 to i64
113 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
114 store float 1.0, ptr addrspace(1) %p1
118 define protected amdgpu_kernel void @nand(ptr addrspace(1) %p, ptr addrspace(1) %q) {
121 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
122 ; CHECK-NEXT: s_mov_b64 s[4:5], 0
123 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
124 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
125 ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
126 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
127 ; CHECK-NEXT: v_mov_b32_e32 v0, s6
128 ; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start
129 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
130 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
131 ; CHECK-NEXT: v_not_b32_e32 v0, v3
132 ; CHECK-NEXT: v_or_b32_e32 v2, -2, v0
133 ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
134 ; CHECK-NEXT: s_waitcnt vmcnt(0)
135 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
136 ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
137 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
138 ; CHECK-NEXT: s_cbranch_execnz .LBB5_1
139 ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
140 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
141 ; CHECK-NEXT: v_mov_b32_e32 v2, s2
142 ; CHECK-NEXT: v_mov_b32_e32 v3, s3
143 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
144 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
145 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
146 ; CHECK-NEXT: s_endpgm
147 %n32 = atomicrmw nand ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
148 %n64 = zext i32 %n32 to i64
149 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
150 store float 1.0, ptr addrspace(1) %p1
154 define protected amdgpu_kernel void @max_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) {
155 ; CHECK-LABEL: max_workgroup:
157 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
158 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
159 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
160 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
161 ; CHECK-NEXT: global_atomic_smax v2, v0, v1, s[0:1] glc
162 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
163 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
164 ; CHECK-NEXT: s_waitcnt vmcnt(0)
165 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
166 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
167 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
168 ; CHECK-NEXT: s_endpgm
169 %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic
170 %n64 = zext i32 %n32 to i64
171 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
172 store float 1.0, ptr addrspace(1) %p1
176 define protected amdgpu_kernel void @max(ptr addrspace(1) %p, ptr addrspace(1) %q) {
179 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
180 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
181 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
182 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
183 ; CHECK-NEXT: global_atomic_smax v2, v0, v1, s[0:1] glc
184 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
185 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
186 ; CHECK-NEXT: s_waitcnt vmcnt(0)
187 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
188 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
189 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
190 ; CHECK-NEXT: s_endpgm
191 %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
192 %n64 = zext i32 %n32 to i64
193 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
194 store float 1.0, ptr addrspace(1) %p1
198 define protected amdgpu_kernel void @min_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) {
199 ; CHECK-LABEL: min_workgroup:
201 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
202 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
203 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
204 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
205 ; CHECK-NEXT: global_atomic_smin v2, v0, v1, s[0:1] glc
206 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
207 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
208 ; CHECK-NEXT: s_waitcnt vmcnt(0)
209 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
210 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
211 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
212 ; CHECK-NEXT: s_endpgm
213 %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic
214 %n64 = zext i32 %n32 to i64
215 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
216 store float 1.0, ptr addrspace(1) %p1
220 define protected amdgpu_kernel void @min(ptr addrspace(1) %p, ptr addrspace(1) %q) {
223 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
224 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
225 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
226 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
227 ; CHECK-NEXT: global_atomic_smin v2, v0, v1, s[0:1] glc
228 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
229 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
230 ; CHECK-NEXT: s_waitcnt vmcnt(0)
231 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
232 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
233 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
234 ; CHECK-NEXT: s_endpgm
235 %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
236 %n64 = zext i32 %n32 to i64
237 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
238 store float 1.0, ptr addrspace(1) %p1
242 define protected amdgpu_kernel void @umax_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) {
243 ; CHECK-LABEL: umax_workgroup:
245 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
246 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
247 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
248 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
249 ; CHECK-NEXT: global_atomic_umax v2, v0, v1, s[0:1] glc
250 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
251 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
252 ; CHECK-NEXT: s_waitcnt vmcnt(0)
253 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
254 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
255 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
256 ; CHECK-NEXT: s_endpgm
257 %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic
258 %n64 = zext i32 %n32 to i64
259 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
260 store float 1.0, ptr addrspace(1) %p1
264 define protected amdgpu_kernel void @umax(ptr addrspace(1) %p, ptr addrspace(1) %q) {
267 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
268 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
269 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
270 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
271 ; CHECK-NEXT: global_atomic_umax v2, v0, v1, s[0:1] glc
272 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
273 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
274 ; CHECK-NEXT: s_waitcnt vmcnt(0)
275 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
276 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
277 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
278 ; CHECK-NEXT: s_endpgm
279 %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
280 %n64 = zext i32 %n32 to i64
281 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
282 store float 1.0, ptr addrspace(1) %p1
286 define protected amdgpu_kernel void @umin_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) {
287 ; CHECK-LABEL: umin_workgroup:
289 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
290 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
291 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
292 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
293 ; CHECK-NEXT: global_atomic_umin v2, v0, v1, s[0:1] glc
294 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
295 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
296 ; CHECK-NEXT: s_waitcnt vmcnt(0)
297 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
298 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
299 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
300 ; CHECK-NEXT: s_endpgm
301 %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic
302 %n64 = zext i32 %n32 to i64
303 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
304 store float 1.0, ptr addrspace(1) %p1
308 define protected amdgpu_kernel void @umin(ptr addrspace(1) %p, ptr addrspace(1) %q) {
311 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
312 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
313 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
314 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
315 ; CHECK-NEXT: global_atomic_umin v2, v0, v1, s[0:1] glc
316 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
317 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
318 ; CHECK-NEXT: s_waitcnt vmcnt(0)
319 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
320 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
321 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
322 ; CHECK-NEXT: s_endpgm
323 %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
324 %n64 = zext i32 %n32 to i64
325 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
326 store float 1.0, ptr addrspace(1) %p1
330 define protected amdgpu_kernel void @cmpxchg(ptr addrspace(1) %p, ptr addrspace(1) %q) {
331 ; CHECK-LABEL: cmpxchg:
333 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
334 ; CHECK-NEXT: v_mov_b32_e32 v2, 0
335 ; CHECK-NEXT: v_mov_b32_e32 v0, 2
336 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
337 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
338 ; CHECK-NEXT: global_atomic_cmpswap v2, v2, v[0:1], s[0:1] glc
339 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
340 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
341 ; CHECK-NEXT: s_waitcnt vmcnt(0)
342 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
343 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
344 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
345 ; CHECK-NEXT: s_endpgm
346 %agg = cmpxchg ptr addrspace(1) %p, i32 1, i32 2 monotonic monotonic
347 %n32 = extractvalue {i32, i1} %agg, 0
348 %n64 = zext i32 %n32 to i64
349 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
350 store float 1.0, ptr addrspace(1) %p1
354 define protected amdgpu_kernel void @xchg(ptr addrspace(1) %p, ptr addrspace(1) %q) {
357 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
358 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
359 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
360 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
361 ; CHECK-NEXT: global_atomic_swap v2, v0, v1, s[0:1] glc
362 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
363 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
364 ; CHECK-NEXT: s_waitcnt vmcnt(0)
365 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
366 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
367 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
368 ; CHECK-NEXT: s_endpgm
369 %n32 = atomicrmw xchg ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
370 %n64 = zext i32 %n32 to i64
371 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
372 store float 1.0, ptr addrspace(1) %p1
376 define protected amdgpu_kernel void @inc(ptr addrspace(1) %p, ptr addrspace(1) %q) {
379 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
380 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
381 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
382 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
383 ; CHECK-NEXT: global_atomic_inc v2, v0, v1, s[0:1] glc
384 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
385 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
386 ; CHECK-NEXT: s_waitcnt vmcnt(0)
387 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
388 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
389 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
390 ; CHECK-NEXT: s_endpgm
391 %n32 = atomicrmw uinc_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
392 %n64 = zext i32 %n32 to i64
393 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
394 store float 1.0, ptr addrspace(1) %p1
398 define protected amdgpu_kernel void @dec(ptr addrspace(1) %p, ptr addrspace(1) %q) {
401 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
402 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
403 ; CHECK-NEXT: v_mov_b32_e32 v1, 1
404 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
405 ; CHECK-NEXT: global_atomic_dec v2, v0, v1, s[0:1] glc
406 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
407 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
408 ; CHECK-NEXT: s_waitcnt vmcnt(0)
409 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
410 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
411 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
412 ; CHECK-NEXT: s_endpgm
413 %n32 = atomicrmw udec_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
414 %n64 = zext i32 %n32 to i64
415 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
416 store float 1.0, ptr addrspace(1) %p1
420 define protected amdgpu_kernel void @fadd(ptr addrspace(1) %p, ptr addrspace(1) %q) {
423 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
424 ; CHECK-NEXT: s_mov_b64 s[4:5], 0
425 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
426 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
427 ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
428 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
429 ; CHECK-NEXT: v_mov_b32_e32 v0, s6
430 ; CHECK-NEXT: .LBB18_1: ; %atomicrmw.start
431 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
432 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
433 ; CHECK-NEXT: v_add_f32_e32 v2, 1.0, v3
434 ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
435 ; CHECK-NEXT: s_waitcnt vmcnt(0)
436 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
437 ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
438 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
439 ; CHECK-NEXT: s_cbranch_execnz .LBB18_1
440 ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
441 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
442 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0
443 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
444 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
445 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
446 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
447 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
448 ; CHECK-NEXT: s_endpgm
449 %f32 = atomicrmw fadd ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic
450 %n32 = fptoui float %f32 to i32
451 %n64 = zext i32 %n32 to i64
452 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
453 store float 1.0, ptr addrspace(1) %p1
457 define protected amdgpu_kernel void @fsub(ptr addrspace(1) %p, ptr addrspace(1) %q) {
460 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
461 ; CHECK-NEXT: s_mov_b64 s[4:5], 0
462 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
463 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
464 ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
465 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
466 ; CHECK-NEXT: v_mov_b32_e32 v0, s6
467 ; CHECK-NEXT: .LBB19_1: ; %atomicrmw.start
468 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
469 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
470 ; CHECK-NEXT: v_add_f32_e32 v2, -1.0, v3
471 ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
472 ; CHECK-NEXT: s_waitcnt vmcnt(0)
473 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
474 ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
475 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
476 ; CHECK-NEXT: s_cbranch_execnz .LBB19_1
477 ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
478 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
479 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0
480 ; CHECK-NEXT: v_mov_b32_e32 v0, s2
481 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
482 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
483 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
484 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
485 ; CHECK-NEXT: s_endpgm
486 %f32 = atomicrmw fsub ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic
487 %n32 = fptoui float %f32 to i32
488 %n64 = zext i32 %n32 to i64
489 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
490 store float 1.0, ptr addrspace(1) %p1
494 define protected amdgpu_kernel void @fmin(ptr addrspace(1) %p, ptr addrspace(1) %q) {
497 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
498 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
499 ; CHECK-NEXT: v_mov_b32_e32 v2, 0
500 ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
501 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
502 ; CHECK-NEXT: global_atomic_min_f64 v[0:1], v2, v[0:1], s[0:1] glc
503 ; CHECK-NEXT: v_mov_b32_e32 v2, s2
504 ; CHECK-NEXT: v_mov_b32_e32 v3, s3
505 ; CHECK-NEXT: s_waitcnt vmcnt(0)
506 ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
507 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
508 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
509 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
510 ; CHECK-NEXT: s_endpgm
512 %f64 = atomicrmw fmin ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0
513 %n32 = fptoui double %f64 to i32
514 %n64 = zext i32 %n32 to i64
515 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
516 store float 1.0, ptr addrspace(1) %p1
520 define protected amdgpu_kernel void @fmax(ptr addrspace(1) %p, ptr addrspace(1) %q) {
523 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
524 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
525 ; CHECK-NEXT: v_mov_b32_e32 v2, 0
526 ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
527 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
528 ; CHECK-NEXT: global_atomic_max_f64 v[0:1], v2, v[0:1], s[0:1] glc
529 ; CHECK-NEXT: v_mov_b32_e32 v2, s2
530 ; CHECK-NEXT: v_mov_b32_e32 v3, s3
531 ; CHECK-NEXT: s_waitcnt vmcnt(0)
532 ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
533 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
534 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
535 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
536 ; CHECK-NEXT: s_endpgm
537 %f64 = atomicrmw fmax ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0
538 %n32 = fptoui double %f64 to i32
539 %n64 = zext i32 %n32 to i64
540 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
541 store float 1.0, ptr addrspace(1) %p1
545 define protected amdgpu_kernel void @buffer.ptr.atomic.swap(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
546 ; CHECK-LABEL: buffer.ptr.atomic.swap:
548 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
549 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
550 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
551 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
552 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
553 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
554 ; CHECK-NEXT: buffer_atomic_swap v0, v1, s[0:3], 0 offen glc
555 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
556 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
557 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
558 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
559 ; CHECK-NEXT: s_endpgm
560 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
561 %n64 = zext i32 %n32 to i64
562 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
563 store float 1.0, ptr addrspace(1) %p1
567 define protected amdgpu_kernel void @buffer.ptr.atomic.add(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
568 ; CHECK-LABEL: buffer.ptr.atomic.add:
570 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
571 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
572 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
573 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
574 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
575 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
576 ; CHECK-NEXT: buffer_atomic_add v0, v1, s[0:3], 0 offen glc
577 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
578 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
579 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
580 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
581 ; CHECK-NEXT: s_endpgm
582 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
583 %n64 = zext i32 %n32 to i64
584 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
585 store float 1.0, ptr addrspace(1) %p1
589 define protected amdgpu_kernel void @buffer.ptr.atomic.sub(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
590 ; CHECK-LABEL: buffer.ptr.atomic.sub:
592 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
593 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
594 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
595 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
596 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
597 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
598 ; CHECK-NEXT: buffer_atomic_sub v0, v1, s[0:3], 0 offen glc
599 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
600 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
601 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
602 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
603 ; CHECK-NEXT: s_endpgm
604 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
605 %n64 = zext i32 %n32 to i64
606 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
607 store float 1.0, ptr addrspace(1) %p1
611 define protected amdgpu_kernel void @buffer.ptr.atomic.smin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
612 ; CHECK-LABEL: buffer.ptr.atomic.smin:
614 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
615 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
616 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
617 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
618 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
619 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
620 ; CHECK-NEXT: buffer_atomic_smin v0, v1, s[0:3], 0 offen glc
621 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
622 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
623 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
624 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
625 ; CHECK-NEXT: s_endpgm
626 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
627 %n64 = zext i32 %n32 to i64
628 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
629 store float 1.0, ptr addrspace(1) %p1
633 define protected amdgpu_kernel void @buffer.ptr.atomic.smax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
634 ; CHECK-LABEL: buffer.ptr.atomic.smax:
636 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
637 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
638 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
639 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
640 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
641 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
642 ; CHECK-NEXT: buffer_atomic_smax v0, v1, s[0:3], 0 offen glc
643 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
644 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
645 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
646 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
647 ; CHECK-NEXT: s_endpgm
648 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
649 %n64 = zext i32 %n32 to i64
650 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
651 store float 1.0, ptr addrspace(1) %p1
655 define protected amdgpu_kernel void @buffer.ptr.atomic.umin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
656 ; CHECK-LABEL: buffer.ptr.atomic.umin:
658 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
659 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
660 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
661 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
662 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
663 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
664 ; CHECK-NEXT: buffer_atomic_umin v0, v1, s[0:3], 0 offen glc
665 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
666 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
667 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
668 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
669 ; CHECK-NEXT: s_endpgm
670 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
671 %n64 = zext i32 %n32 to i64
672 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
673 store float 1.0, ptr addrspace(1) %p1
677 define protected amdgpu_kernel void @buffer.ptr.atomic.umax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
678 ; CHECK-LABEL: buffer.ptr.atomic.umax:
680 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
681 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
682 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
683 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
684 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
685 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
686 ; CHECK-NEXT: buffer_atomic_umax v0, v1, s[0:3], 0 offen glc
687 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
688 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
689 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
690 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
691 ; CHECK-NEXT: s_endpgm
692 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
693 %n64 = zext i32 %n32 to i64
694 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
695 store float 1.0, ptr addrspace(1) %p1
699 define protected amdgpu_kernel void @buffer.ptr.atomic.and(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
700 ; CHECK-LABEL: buffer.ptr.atomic.and:
702 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
703 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
704 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
705 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
706 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
707 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
708 ; CHECK-NEXT: buffer_atomic_and v0, v1, s[0:3], 0 offen glc
709 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
710 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
711 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
712 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
713 ; CHECK-NEXT: s_endpgm
714 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
715 %n64 = zext i32 %n32 to i64
716 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
717 store float 1.0, ptr addrspace(1) %p1
721 define protected amdgpu_kernel void @buffer.ptr.atomic.or(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
722 ; CHECK-LABEL: buffer.ptr.atomic.or:
724 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
725 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
726 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
727 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
728 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
729 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
730 ; CHECK-NEXT: buffer_atomic_or v0, v1, s[0:3], 0 offen glc
731 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
732 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
733 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
734 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
735 ; CHECK-NEXT: s_endpgm
736 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
737 %n64 = zext i32 %n32 to i64
738 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
739 store float 1.0, ptr addrspace(1) %p1
743 define protected amdgpu_kernel void @buffer.ptr.atomic.xor(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
744 ; CHECK-LABEL: buffer.ptr.atomic.xor:
746 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
747 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
748 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
749 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
750 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
751 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
752 ; CHECK-NEXT: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc
753 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
754 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
755 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
756 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
757 ; CHECK-NEXT: s_endpgm
758 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
759 %n64 = zext i32 %n32 to i64
760 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
761 store float 1.0, ptr addrspace(1) %p1
765 define protected amdgpu_kernel void @buffer.ptr.atomic.inc(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
766 ; CHECK-LABEL: buffer.ptr.atomic.inc:
768 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
769 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
770 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
771 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
772 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
773 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
774 ; CHECK-NEXT: buffer_atomic_inc v0, v1, s[0:3], 0 offen glc
775 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
776 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
777 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
778 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
779 ; CHECK-NEXT: s_endpgm
780 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
781 %n64 = zext i32 %n32 to i64
782 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
783 store float 1.0, ptr addrspace(1) %p1
787 define protected amdgpu_kernel void @buffer.ptr.atomic.dec(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
788 ; CHECK-LABEL: buffer.ptr.atomic.dec:
790 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
791 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
792 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
793 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
794 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
795 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
796 ; CHECK-NEXT: buffer_atomic_dec v0, v1, s[0:3], 0 offen glc
797 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
798 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
799 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
800 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
801 ; CHECK-NEXT: s_endpgm
802 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
803 %n64 = zext i32 %n32 to i64
804 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
805 store float 1.0, ptr addrspace(1) %p1
809 define protected amdgpu_kernel void @buffer.ptr.atomic.cmpswap(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
810 ; CHECK-LABEL: buffer.ptr.atomic.cmpswap:
812 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
813 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
814 ; CHECK-NEXT: v_mov_b32_e32 v1, 2
815 ; CHECK-NEXT: v_mov_b32_e32 v0, 1
816 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
817 ; CHECK-NEXT: v_mov_b32_e32 v2, s6
818 ; CHECK-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen glc
819 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
820 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
821 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
822 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
823 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
824 ; CHECK-NEXT: s_endpgm
825 %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 1, i32 2, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
826 %n64 = zext i32 %n32 to i64
827 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
828 store float 1.0, ptr addrspace(1) %p1
832 define protected amdgpu_kernel void @buffer.ptr.atomic.fadd(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
833 ; CHECK-LABEL: buffer.ptr.atomic.fadd:
835 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
836 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
837 ; CHECK-NEXT: v_mov_b32_e32 v1, 1.0
838 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
839 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
840 ; CHECK-NEXT: v_mov_b32_e32 v0, s6
841 ; CHECK-NEXT: buffer_atomic_add_f32 v1, v0, s[0:3], 0 offen glc
842 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
843 ; CHECK-NEXT: s_waitcnt vmcnt(0)
844 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
845 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
846 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
847 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
848 ; CHECK-NEXT: s_endpgm
849 %f32 = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
850 %n32 = fptoui float %f32 to i32
851 %n64 = zext i32 %n32 to i64
852 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
853 store float 1.0, ptr addrspace(1) %p1
857 define protected amdgpu_kernel void @buffer.ptr.atomic.fmin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
858 ; CHECK-LABEL: buffer.ptr.atomic.fmin:
860 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
861 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
862 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
863 ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
864 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
865 ; CHECK-NEXT: v_mov_b32_e32 v2, s6
866 ; CHECK-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
867 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
868 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
869 ; CHECK-NEXT: s_waitcnt vmcnt(0)
870 ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
871 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
872 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
873 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
874 ; CHECK-NEXT: s_endpgm
875 %f64 = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
876 %n32 = fptoui double %f64 to i32
877 %n64 = zext i32 %n32 to i64
878 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
879 store float 1.0, ptr addrspace(1) %p1
883 define protected amdgpu_kernel void @buffer.ptr.atomic.fmax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) {
884 ; CHECK-LABEL: buffer.ptr.atomic.fmax:
886 ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34
887 ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
888 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
889 ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
890 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
891 ; CHECK-NEXT: v_mov_b32_e32 v2, s6
892 ; CHECK-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
893 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
894 ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
895 ; CHECK-NEXT: s_waitcnt vmcnt(0)
896 ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
897 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
898 ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
899 ; CHECK-NEXT: global_store_dword v[0:1], v2, off
900 ; CHECK-NEXT: s_endpgm
901 %f64 = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
902 %n32 = fptoui double %f64 to i32
903 %n64 = zext i32 %n32 to i64
904 %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
905 store float 1.0, ptr addrspace(1) %p1
909 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32)
910 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32)
911 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32)
912 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32)
913 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32)
914 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32)
915 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32)
916 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32)
917 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32)
918 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32)
919 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32)
920 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32)
921 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32)
922 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32)
923 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32)
924 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32)