1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s
6 ; Test using saddr addressing mode of global_* flat atomic instructions.
8 define amdgpu_ps void @global_xchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
9 ; GFX9-LABEL: global_xchg_saddr_i32_nortn:
11 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
12 ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3]
13 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14 ; GFX9-NEXT: buffer_wbinvl1
17 ; GFX10-LABEL: global_xchg_saddr_i32_nortn:
19 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
20 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
21 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3]
22 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
23 ; GFX10-NEXT: buffer_gl0_inv
24 ; GFX10-NEXT: buffer_gl1_inv
25 ; GFX10-NEXT: s_endpgm
27 ; GFX11-LABEL: global_xchg_saddr_i32_nortn:
29 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
30 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
31 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3]
32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
33 ; GFX11-NEXT: buffer_gl0_inv
34 ; GFX11-NEXT: buffer_gl1_inv
35 ; GFX11-NEXT: s_endpgm
36 %zext.offset = zext i32 %voffset to i64
37 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
38 %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
42 ; Maximum positive offset on gfx10
43 define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
44 ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_2047:
46 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
47 ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047
48 ; GFX9-NEXT: s_waitcnt vmcnt(0)
49 ; GFX9-NEXT: buffer_wbinvl1
52 ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_2047:
54 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
55 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
56 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047
57 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
58 ; GFX10-NEXT: buffer_gl0_inv
59 ; GFX10-NEXT: buffer_gl1_inv
60 ; GFX10-NEXT: s_endpgm
62 ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_2047:
64 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
65 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
66 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:2047
67 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
68 ; GFX11-NEXT: buffer_gl0_inv
69 ; GFX11-NEXT: buffer_gl1_inv
70 ; GFX11-NEXT: s_endpgm
71 %zext.offset = zext i32 %voffset to i64
72 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
73 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
74 %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
78 ; Maximum negative offset on gfx10
79 define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
80 ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048:
82 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
83 ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048
84 ; GFX9-NEXT: s_waitcnt vmcnt(0)
85 ; GFX9-NEXT: buffer_wbinvl1
88 ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048:
90 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
92 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048
93 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
94 ; GFX10-NEXT: buffer_gl0_inv
95 ; GFX10-NEXT: buffer_gl1_inv
96 ; GFX10-NEXT: s_endpgm
98 ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048:
100 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
101 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
102 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:-2048
103 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
104 ; GFX11-NEXT: buffer_gl0_inv
105 ; GFX11-NEXT: buffer_gl1_inv
106 ; GFX11-NEXT: s_endpgm
107 %zext.offset = zext i32 %voffset to i64
108 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
109 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
110 %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
114 define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
115 ; GFX9-LABEL: global_xchg_saddr_i32_rtn:
117 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
118 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc
119 ; GFX9-NEXT: s_waitcnt vmcnt(0)
120 ; GFX9-NEXT: buffer_wbinvl1
121 ; GFX9-NEXT: ; return to shader part epilog
123 ; GFX10-LABEL: global_xchg_saddr_i32_rtn:
125 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
127 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc
128 ; GFX10-NEXT: s_waitcnt vmcnt(0)
129 ; GFX10-NEXT: buffer_gl0_inv
130 ; GFX10-NEXT: buffer_gl1_inv
131 ; GFX10-NEXT: ; return to shader part epilog
133 ; GFX11-LABEL: global_xchg_saddr_i32_rtn:
135 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
136 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
137 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] glc
138 ; GFX11-NEXT: s_waitcnt vmcnt(0)
139 ; GFX11-NEXT: buffer_gl0_inv
140 ; GFX11-NEXT: buffer_gl1_inv
141 ; GFX11-NEXT: ; return to shader part epilog
142 %zext.offset = zext i32 %voffset to i64
143 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
144 %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
145 %cast.rtn = bitcast i32 %rtn to float
149 define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
150 ; GFX9-LABEL: global_xchg_saddr_i32_rtn_2048:
152 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
153 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:2048 glc
154 ; GFX9-NEXT: s_waitcnt vmcnt(0)
155 ; GFX9-NEXT: buffer_wbinvl1
156 ; GFX9-NEXT: ; return to shader part epilog
158 ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048:
160 ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0
161 ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1]
162 ; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0
163 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc
164 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
165 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
166 ; GFX10-NEXT: global_atomic_swap v0, v[2:3], v1, off glc
167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
168 ; GFX10-NEXT: buffer_gl0_inv
169 ; GFX10-NEXT: buffer_gl1_inv
170 ; GFX10-NEXT: ; return to shader part epilog
172 ; GFX11-LABEL: global_xchg_saddr_i32_rtn_2048:
174 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
175 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
176 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 glc
177 ; GFX11-NEXT: s_waitcnt vmcnt(0)
178 ; GFX11-NEXT: buffer_gl0_inv
179 ; GFX11-NEXT: buffer_gl1_inv
180 ; GFX11-NEXT: ; return to shader part epilog
181 %zext.offset = zext i32 %voffset to i64
182 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
183 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
184 %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
185 %cast.rtn = bitcast i32 %rtn to float
189 define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
190 ; GFX9-LABEL: global_xchg_saddr_i32_rtn_neg2048:
192 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
193 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc
194 ; GFX9-NEXT: s_waitcnt vmcnt(0)
195 ; GFX9-NEXT: buffer_wbinvl1
196 ; GFX9-NEXT: ; return to shader part epilog
198 ; GFX10-LABEL: global_xchg_saddr_i32_rtn_neg2048:
200 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
201 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
202 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc
203 ; GFX10-NEXT: s_waitcnt vmcnt(0)
204 ; GFX10-NEXT: buffer_gl0_inv
205 ; GFX10-NEXT: buffer_gl1_inv
206 ; GFX10-NEXT: ; return to shader part epilog
208 ; GFX11-LABEL: global_xchg_saddr_i32_rtn_neg2048:
210 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
211 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
212 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 glc
213 ; GFX11-NEXT: s_waitcnt vmcnt(0)
214 ; GFX11-NEXT: buffer_gl0_inv
215 ; GFX11-NEXT: buffer_gl1_inv
216 ; GFX11-NEXT: ; return to shader part epilog
217 %zext.offset = zext i32 %voffset to i64
218 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
219 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
220 %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
221 %cast.rtn = bitcast i32 %rtn to float
225 ; --------------------------------------------------------------------------------
226 ; Uniformity edge cases
227 ; --------------------------------------------------------------------------------
229 @ptr.in.lds = internal addrspace(3) global ptr addrspace(1) undef
231 ; Base pointer is uniform, but also in VGPRs
232 define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i32 %data) {
233 ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn:
235 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
236 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
237 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
238 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
239 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
240 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
242 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
243 ; GFX9-NEXT: s_waitcnt vmcnt(0)
244 ; GFX9-NEXT: buffer_wbinvl1
245 ; GFX9-NEXT: ; return to shader part epilog
247 ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn:
249 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
250 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
251 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
252 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
253 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
254 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
255 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
256 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
257 ; GFX10-NEXT: s_waitcnt vmcnt(0)
258 ; GFX10-NEXT: buffer_gl0_inv
259 ; GFX10-NEXT: buffer_gl1_inv
260 ; GFX10-NEXT: ; return to shader part epilog
262 ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn:
264 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
265 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
266 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
267 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
268 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
269 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
270 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
271 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] glc
272 ; GFX11-NEXT: s_waitcnt vmcnt(0)
273 ; GFX11-NEXT: buffer_gl0_inv
274 ; GFX11-NEXT: buffer_gl1_inv
275 ; GFX11-NEXT: ; return to shader part epilog
276 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
277 %zext.offset = zext i32 %voffset to i64
278 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
279 %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
280 %cast.rtn = bitcast i32 %rtn to float
284 ; Base pointer is uniform, but also in VGPRs, with imm offset
285 define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %voffset, i32 %data) {
286 ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset:
288 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
289 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
290 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
291 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
292 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
293 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
295 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
296 ; GFX9-NEXT: s_waitcnt vmcnt(0)
297 ; GFX9-NEXT: buffer_wbinvl1
298 ; GFX9-NEXT: ; return to shader part epilog
300 ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset:
302 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
303 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
304 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
305 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
306 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
307 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
308 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
309 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
310 ; GFX10-NEXT: s_waitcnt vmcnt(0)
311 ; GFX10-NEXT: buffer_gl0_inv
312 ; GFX10-NEXT: buffer_gl1_inv
313 ; GFX10-NEXT: ; return to shader part epilog
315 ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset:
317 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
318 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
319 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
320 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
321 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
322 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
323 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
324 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 glc
325 ; GFX11-NEXT: s_waitcnt vmcnt(0)
326 ; GFX11-NEXT: buffer_gl0_inv
327 ; GFX11-NEXT: buffer_gl1_inv
328 ; GFX11-NEXT: ; return to shader part epilog
329 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
330 %zext.offset = zext i32 %voffset to i64
331 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
332 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42
333 %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
334 %cast.rtn = bitcast i32 %rtn to float
338 ; Base pointer is uniform, but also in VGPRs
339 define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset, i32 %data) {
340 ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn:
342 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
343 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
344 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
345 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
346 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
347 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
349 ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1]
350 ; GFX9-NEXT: s_waitcnt vmcnt(0)
351 ; GFX9-NEXT: buffer_wbinvl1
352 ; GFX9-NEXT: s_endpgm
354 ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn:
356 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
357 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
358 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
359 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
360 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
361 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
362 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
363 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1]
364 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
365 ; GFX10-NEXT: buffer_gl0_inv
366 ; GFX10-NEXT: buffer_gl1_inv
367 ; GFX10-NEXT: s_endpgm
369 ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn:
371 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
372 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
373 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
374 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
375 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
376 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
377 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
378 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
379 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
380 ; GFX11-NEXT: buffer_gl0_inv
381 ; GFX11-NEXT: buffer_gl1_inv
382 ; GFX11-NEXT: s_endpgm
383 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
384 %zext.offset = zext i32 %voffset to i64
385 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
386 %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
390 ; Base pointer is uniform, but also in VGPRs, with imm offset
391 define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32 %voffset, i32 %data) {
392 ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset:
394 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
395 ; GFX9-NEXT: ds_read_b64 v[2:3], v2
396 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
397 ; GFX9-NEXT: v_readfirstlane_b32 s0, v2
398 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3
399 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
401 ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
402 ; GFX9-NEXT: s_waitcnt vmcnt(0)
403 ; GFX9-NEXT: buffer_wbinvl1
404 ; GFX9-NEXT: s_endpgm
406 ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset:
408 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
409 ; GFX10-NEXT: ds_read_b64 v[2:3], v2
410 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
411 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
412 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
413 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
414 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
415 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
416 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
417 ; GFX10-NEXT: buffer_gl0_inv
418 ; GFX10-NEXT: buffer_gl1_inv
419 ; GFX10-NEXT: s_endpgm
421 ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset:
423 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
424 ; GFX11-NEXT: ds_load_b64 v[2:3], v2
425 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
426 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
427 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
428 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
429 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
430 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] offset:42
431 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
432 ; GFX11-NEXT: buffer_gl0_inv
433 ; GFX11-NEXT: buffer_gl1_inv
434 ; GFX11-NEXT: s_endpgm
435 %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
436 %zext.offset = zext i32 %voffset to i64
437 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
438 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42
439 %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
443 ; --------------------------------------------------------------------------------
445 ; --------------------------------------------------------------------------------
447 ; --------------------------------------------------------------------------------
449 ; --------------------------------------------------------------------------------
451 define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
452 ; GFX9-LABEL: global_xchg_saddr_i64_rtn:
454 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
455 ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc
456 ; GFX9-NEXT: s_waitcnt vmcnt(0)
457 ; GFX9-NEXT: buffer_wbinvl1
458 ; GFX9-NEXT: ; return to shader part epilog
460 ; GFX10-LABEL: global_xchg_saddr_i64_rtn:
462 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
463 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
464 ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc
465 ; GFX10-NEXT: s_waitcnt vmcnt(0)
466 ; GFX10-NEXT: buffer_gl0_inv
467 ; GFX10-NEXT: buffer_gl1_inv
468 ; GFX10-NEXT: ; return to shader part epilog
470 ; GFX11-LABEL: global_xchg_saddr_i64_rtn:
472 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
473 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
474 ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] glc
475 ; GFX11-NEXT: s_waitcnt vmcnt(0)
476 ; GFX11-NEXT: buffer_gl0_inv
477 ; GFX11-NEXT: buffer_gl1_inv
478 ; GFX11-NEXT: ; return to shader part epilog
479 %zext.offset = zext i32 %voffset to i64
480 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
481 %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
482 %cast.rtn = bitcast i64 %rtn to <2 x float>
483 ret <2 x float> %cast.rtn
486 define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
487 ; GFX9-LABEL: global_xchg_saddr_i64_rtn_neg128:
489 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
490 ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
491 ; GFX9-NEXT: s_waitcnt vmcnt(0)
492 ; GFX9-NEXT: buffer_wbinvl1
493 ; GFX9-NEXT: ; return to shader part epilog
495 ; GFX10-LABEL: global_xchg_saddr_i64_rtn_neg128:
497 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
498 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
499 ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
500 ; GFX10-NEXT: s_waitcnt vmcnt(0)
501 ; GFX10-NEXT: buffer_gl0_inv
502 ; GFX10-NEXT: buffer_gl1_inv
503 ; GFX10-NEXT: ; return to shader part epilog
505 ; GFX11-LABEL: global_xchg_saddr_i64_rtn_neg128:
507 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
508 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
509 ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
510 ; GFX11-NEXT: s_waitcnt vmcnt(0)
511 ; GFX11-NEXT: buffer_gl0_inv
512 ; GFX11-NEXT: buffer_gl1_inv
513 ; GFX11-NEXT: ; return to shader part epilog
514 %zext.offset = zext i32 %voffset to i64
515 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
516 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
517 %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
518 %cast.rtn = bitcast i64 %rtn to <2 x float>
519 ret <2 x float> %cast.rtn
522 define amdgpu_ps void @global_xchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
523 ; GFX9-LABEL: global_xchg_saddr_i64_nortn:
525 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
526 ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3]
527 ; GFX9-NEXT: s_waitcnt vmcnt(0)
528 ; GFX9-NEXT: buffer_wbinvl1
529 ; GFX9-NEXT: s_endpgm
531 ; GFX10-LABEL: global_xchg_saddr_i64_nortn:
533 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
534 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
535 ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3]
536 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
537 ; GFX10-NEXT: buffer_gl0_inv
538 ; GFX10-NEXT: buffer_gl1_inv
539 ; GFX10-NEXT: s_endpgm
541 ; GFX11-LABEL: global_xchg_saddr_i64_nortn:
543 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
544 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
545 ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3]
546 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
547 ; GFX11-NEXT: buffer_gl0_inv
548 ; GFX11-NEXT: buffer_gl1_inv
549 ; GFX11-NEXT: s_endpgm
550 %zext.offset = zext i32 %voffset to i64
551 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
552 %unused = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
556 define amdgpu_ps void @global_xchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
557 ; GFX9-LABEL: global_xchg_saddr_i64_nortn_neg128:
559 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
560 ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128
561 ; GFX9-NEXT: s_waitcnt vmcnt(0)
562 ; GFX9-NEXT: buffer_wbinvl1
563 ; GFX9-NEXT: s_endpgm
565 ; GFX10-LABEL: global_xchg_saddr_i64_nortn_neg128:
567 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
568 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
569 ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128
570 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
571 ; GFX10-NEXT: buffer_gl0_inv
572 ; GFX10-NEXT: buffer_gl1_inv
573 ; GFX10-NEXT: s_endpgm
575 ; GFX11-LABEL: global_xchg_saddr_i64_nortn_neg128:
577 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
578 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
579 ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] offset:-128
580 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
581 ; GFX11-NEXT: buffer_gl0_inv
582 ; GFX11-NEXT: buffer_gl1_inv
583 ; GFX11-NEXT: s_endpgm
584 %zext.offset = zext i32 %voffset to i64
585 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
586 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
587 %unused = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
591 ; --------------------------------------------------------------------------------
593 ; --------------------------------------------------------------------------------
595 define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
596 ; GFX9-LABEL: global_add_saddr_i32_rtn:
598 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
599 ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc
600 ; GFX9-NEXT: s_waitcnt vmcnt(0)
601 ; GFX9-NEXT: buffer_wbinvl1
602 ; GFX9-NEXT: ; return to shader part epilog
604 ; GFX10-LABEL: global_add_saddr_i32_rtn:
606 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
607 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
608 ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc
609 ; GFX10-NEXT: s_waitcnt vmcnt(0)
610 ; GFX10-NEXT: buffer_gl0_inv
611 ; GFX10-NEXT: buffer_gl1_inv
612 ; GFX10-NEXT: ; return to shader part epilog
614 ; GFX11-LABEL: global_add_saddr_i32_rtn:
616 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
617 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
618 ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] glc
619 ; GFX11-NEXT: s_waitcnt vmcnt(0)
620 ; GFX11-NEXT: buffer_gl0_inv
621 ; GFX11-NEXT: buffer_gl1_inv
622 ; GFX11-NEXT: ; return to shader part epilog
623 %zext.offset = zext i32 %voffset to i64
624 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
625 %rtn = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
626 %cast.rtn = bitcast i32 %rtn to float
630 define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
631 ; GFX9-LABEL: global_add_saddr_i32_rtn_neg128:
633 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
634 ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc
635 ; GFX9-NEXT: s_waitcnt vmcnt(0)
636 ; GFX9-NEXT: buffer_wbinvl1
637 ; GFX9-NEXT: ; return to shader part epilog
639 ; GFX10-LABEL: global_add_saddr_i32_rtn_neg128:
641 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
642 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
643 ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc
644 ; GFX10-NEXT: s_waitcnt vmcnt(0)
645 ; GFX10-NEXT: buffer_gl0_inv
646 ; GFX10-NEXT: buffer_gl1_inv
647 ; GFX10-NEXT: ; return to shader part epilog
649 ; GFX11-LABEL: global_add_saddr_i32_rtn_neg128:
651 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
652 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
653 ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 glc
654 ; GFX11-NEXT: s_waitcnt vmcnt(0)
655 ; GFX11-NEXT: buffer_gl0_inv
656 ; GFX11-NEXT: buffer_gl1_inv
657 ; GFX11-NEXT: ; return to shader part epilog
658 %zext.offset = zext i32 %voffset to i64
659 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
660 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
661 %rtn = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
662 %cast.rtn = bitcast i32 %rtn to float
666 define amdgpu_ps void @global_add_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
667 ; GFX9-LABEL: global_add_saddr_i32_nortn:
669 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670 ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3]
671 ; GFX9-NEXT: s_waitcnt vmcnt(0)
672 ; GFX9-NEXT: buffer_wbinvl1
673 ; GFX9-NEXT: s_endpgm
675 ; GFX10-LABEL: global_add_saddr_i32_nortn:
677 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
678 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
679 ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3]
680 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
681 ; GFX10-NEXT: buffer_gl0_inv
682 ; GFX10-NEXT: buffer_gl1_inv
683 ; GFX10-NEXT: s_endpgm
685 ; GFX11-LABEL: global_add_saddr_i32_nortn:
687 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
688 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
689 ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3]
690 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
691 ; GFX11-NEXT: buffer_gl0_inv
692 ; GFX11-NEXT: buffer_gl1_inv
693 ; GFX11-NEXT: s_endpgm
694 %zext.offset = zext i32 %voffset to i64
695 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
696 %unused = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
700 define amdgpu_ps void @global_add_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
701 ; GFX9-LABEL: global_add_saddr_i32_nortn_neg128:
703 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
704 ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128
705 ; GFX9-NEXT: s_waitcnt vmcnt(0)
706 ; GFX9-NEXT: buffer_wbinvl1
707 ; GFX9-NEXT: s_endpgm
709 ; GFX10-LABEL: global_add_saddr_i32_nortn_neg128:
711 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
712 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
713 ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128
714 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
715 ; GFX10-NEXT: buffer_gl0_inv
716 ; GFX10-NEXT: buffer_gl1_inv
717 ; GFX10-NEXT: s_endpgm
719 ; GFX11-LABEL: global_add_saddr_i32_nortn_neg128:
721 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
722 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
723 ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:-128
724 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
725 ; GFX11-NEXT: buffer_gl0_inv
726 ; GFX11-NEXT: buffer_gl1_inv
727 ; GFX11-NEXT: s_endpgm
728 %zext.offset = zext i32 %voffset to i64
729 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
730 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
731 %unused = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
735 define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
736 ; GFX9-LABEL: global_add_saddr_i64_rtn:
738 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
739 ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc
740 ; GFX9-NEXT: s_waitcnt vmcnt(0)
741 ; GFX9-NEXT: buffer_wbinvl1
742 ; GFX9-NEXT: ; return to shader part epilog
744 ; GFX10-LABEL: global_add_saddr_i64_rtn:
746 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
747 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
748 ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc
749 ; GFX10-NEXT: s_waitcnt vmcnt(0)
750 ; GFX10-NEXT: buffer_gl0_inv
751 ; GFX10-NEXT: buffer_gl1_inv
752 ; GFX10-NEXT: ; return to shader part epilog
754 ; GFX11-LABEL: global_add_saddr_i64_rtn:
756 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
757 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
758 ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] glc
759 ; GFX11-NEXT: s_waitcnt vmcnt(0)
760 ; GFX11-NEXT: buffer_gl0_inv
761 ; GFX11-NEXT: buffer_gl1_inv
762 ; GFX11-NEXT: ; return to shader part epilog
763 %zext.offset = zext i32 %voffset to i64
764 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
765 %rtn = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
766 %cast.rtn = bitcast i64 %rtn to <2 x float>
767 ret <2 x float> %cast.rtn
770 define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
771 ; GFX9-LABEL: global_add_saddr_i64_rtn_neg128:
773 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
774 ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
775 ; GFX9-NEXT: s_waitcnt vmcnt(0)
776 ; GFX9-NEXT: buffer_wbinvl1
777 ; GFX9-NEXT: ; return to shader part epilog
779 ; GFX10-LABEL: global_add_saddr_i64_rtn_neg128:
781 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
782 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
783 ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
784 ; GFX10-NEXT: s_waitcnt vmcnt(0)
785 ; GFX10-NEXT: buffer_gl0_inv
786 ; GFX10-NEXT: buffer_gl1_inv
787 ; GFX10-NEXT: ; return to shader part epilog
789 ; GFX11-LABEL: global_add_saddr_i64_rtn_neg128:
791 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
792 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
793 ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
794 ; GFX11-NEXT: s_waitcnt vmcnt(0)
795 ; GFX11-NEXT: buffer_gl0_inv
796 ; GFX11-NEXT: buffer_gl1_inv
797 ; GFX11-NEXT: ; return to shader part epilog
798 %zext.offset = zext i32 %voffset to i64
799 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
800 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
801 %rtn = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
802 %cast.rtn = bitcast i64 %rtn to <2 x float>
803 ret <2 x float> %cast.rtn
806 define amdgpu_ps void @global_add_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
807 ; GFX9-LABEL: global_add_saddr_i64_nortn:
809 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
810 ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3]
811 ; GFX9-NEXT: s_waitcnt vmcnt(0)
812 ; GFX9-NEXT: buffer_wbinvl1
813 ; GFX9-NEXT: s_endpgm
815 ; GFX10-LABEL: global_add_saddr_i64_nortn:
817 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
818 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
819 ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3]
820 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
821 ; GFX10-NEXT: buffer_gl0_inv
822 ; GFX10-NEXT: buffer_gl1_inv
823 ; GFX10-NEXT: s_endpgm
825 ; GFX11-LABEL: global_add_saddr_i64_nortn:
827 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
828 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
829 ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3]
830 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
831 ; GFX11-NEXT: buffer_gl0_inv
832 ; GFX11-NEXT: buffer_gl1_inv
833 ; GFX11-NEXT: s_endpgm
834 %zext.offset = zext i32 %voffset to i64
835 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
836 %unused = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
840 define amdgpu_ps void @global_add_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
841 ; GFX9-LABEL: global_add_saddr_i64_nortn_neg128:
843 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
844 ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128
845 ; GFX9-NEXT: s_waitcnt vmcnt(0)
846 ; GFX9-NEXT: buffer_wbinvl1
847 ; GFX9-NEXT: s_endpgm
849 ; GFX10-LABEL: global_add_saddr_i64_nortn_neg128:
851 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
852 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
853 ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128
854 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
855 ; GFX10-NEXT: buffer_gl0_inv
856 ; GFX10-NEXT: buffer_gl1_inv
857 ; GFX10-NEXT: s_endpgm
859 ; GFX11-LABEL: global_add_saddr_i64_nortn_neg128:
861 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
862 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
863 ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] offset:-128
864 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
865 ; GFX11-NEXT: buffer_gl0_inv
866 ; GFX11-NEXT: buffer_gl1_inv
867 ; GFX11-NEXT: s_endpgm
868 %zext.offset = zext i32 %voffset to i64
869 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
870 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
871 %unused = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
875 ; --------------------------------------------------------------------------------
877 ; --------------------------------------------------------------------------------
879 define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
880 ; GFX9-LABEL: global_sub_saddr_i32_rtn:
882 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
883 ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc
884 ; GFX9-NEXT: s_waitcnt vmcnt(0)
885 ; GFX9-NEXT: buffer_wbinvl1
886 ; GFX9-NEXT: ; return to shader part epilog
888 ; GFX10-LABEL: global_sub_saddr_i32_rtn:
890 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
891 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
892 ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc
893 ; GFX10-NEXT: s_waitcnt vmcnt(0)
894 ; GFX10-NEXT: buffer_gl0_inv
895 ; GFX10-NEXT: buffer_gl1_inv
896 ; GFX10-NEXT: ; return to shader part epilog
898 ; GFX11-LABEL: global_sub_saddr_i32_rtn:
900 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
901 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
902 ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] glc
903 ; GFX11-NEXT: s_waitcnt vmcnt(0)
904 ; GFX11-NEXT: buffer_gl0_inv
905 ; GFX11-NEXT: buffer_gl1_inv
906 ; GFX11-NEXT: ; return to shader part epilog
907 %zext.offset = zext i32 %voffset to i64
908 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
909 %rtn = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
910 %cast.rtn = bitcast i32 %rtn to float
914 define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
915 ; GFX9-LABEL: global_sub_saddr_i32_rtn_neg128:
917 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
918 ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc
919 ; GFX9-NEXT: s_waitcnt vmcnt(0)
920 ; GFX9-NEXT: buffer_wbinvl1
921 ; GFX9-NEXT: ; return to shader part epilog
923 ; GFX10-LABEL: global_sub_saddr_i32_rtn_neg128:
925 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
926 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
927 ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc
928 ; GFX10-NEXT: s_waitcnt vmcnt(0)
929 ; GFX10-NEXT: buffer_gl0_inv
930 ; GFX10-NEXT: buffer_gl1_inv
931 ; GFX10-NEXT: ; return to shader part epilog
933 ; GFX11-LABEL: global_sub_saddr_i32_rtn_neg128:
935 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
936 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
937 ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 glc
938 ; GFX11-NEXT: s_waitcnt vmcnt(0)
939 ; GFX11-NEXT: buffer_gl0_inv
940 ; GFX11-NEXT: buffer_gl1_inv
941 ; GFX11-NEXT: ; return to shader part epilog
942 %zext.offset = zext i32 %voffset to i64
943 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
944 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
945 %rtn = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
946 %cast.rtn = bitcast i32 %rtn to float
950 define amdgpu_ps void @global_sub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
951 ; GFX9-LABEL: global_sub_saddr_i32_nortn:
953 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
954 ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3]
955 ; GFX9-NEXT: s_waitcnt vmcnt(0)
956 ; GFX9-NEXT: buffer_wbinvl1
957 ; GFX9-NEXT: s_endpgm
959 ; GFX10-LABEL: global_sub_saddr_i32_nortn:
961 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
962 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
963 ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3]
964 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
965 ; GFX10-NEXT: buffer_gl0_inv
966 ; GFX10-NEXT: buffer_gl1_inv
967 ; GFX10-NEXT: s_endpgm
969 ; GFX11-LABEL: global_sub_saddr_i32_nortn:
971 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
972 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
973 ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3]
974 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
975 ; GFX11-NEXT: buffer_gl0_inv
976 ; GFX11-NEXT: buffer_gl1_inv
977 ; GFX11-NEXT: s_endpgm
978 %zext.offset = zext i32 %voffset to i64
979 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
980 %unused = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
984 define amdgpu_ps void @global_sub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
985 ; GFX9-LABEL: global_sub_saddr_i32_nortn_neg128:
987 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
988 ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128
989 ; GFX9-NEXT: s_waitcnt vmcnt(0)
990 ; GFX9-NEXT: buffer_wbinvl1
991 ; GFX9-NEXT: s_endpgm
993 ; GFX10-LABEL: global_sub_saddr_i32_nortn_neg128:
995 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
996 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
997 ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128
998 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
999 ; GFX10-NEXT: buffer_gl0_inv
1000 ; GFX10-NEXT: buffer_gl1_inv
1001 ; GFX10-NEXT: s_endpgm
1003 ; GFX11-LABEL: global_sub_saddr_i32_nortn_neg128:
1005 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1006 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1007 ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] offset:-128
1008 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1009 ; GFX11-NEXT: buffer_gl0_inv
1010 ; GFX11-NEXT: buffer_gl1_inv
1011 ; GFX11-NEXT: s_endpgm
1012 %zext.offset = zext i32 %voffset to i64
1013 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1014 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1015 %unused = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1019 define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1020 ; GFX9-LABEL: global_sub_saddr_i64_rtn:
1022 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1023 ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc
1024 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1025 ; GFX9-NEXT: buffer_wbinvl1
1026 ; GFX9-NEXT: ; return to shader part epilog
1028 ; GFX10-LABEL: global_sub_saddr_i64_rtn:
1030 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1031 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1032 ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc
1033 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1034 ; GFX10-NEXT: buffer_gl0_inv
1035 ; GFX10-NEXT: buffer_gl1_inv
1036 ; GFX10-NEXT: ; return to shader part epilog
1038 ; GFX11-LABEL: global_sub_saddr_i64_rtn:
1040 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1041 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1042 ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] glc
1043 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1044 ; GFX11-NEXT: buffer_gl0_inv
1045 ; GFX11-NEXT: buffer_gl1_inv
1046 ; GFX11-NEXT: ; return to shader part epilog
1047 %zext.offset = zext i32 %voffset to i64
1048 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1049 %rtn = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1050 %cast.rtn = bitcast i64 %rtn to <2 x float>
1051 ret <2 x float> %cast.rtn
1054 define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1055 ; GFX9-LABEL: global_sub_saddr_i64_rtn_neg128:
1057 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1058 ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1059 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1060 ; GFX9-NEXT: buffer_wbinvl1
1061 ; GFX9-NEXT: ; return to shader part epilog
1063 ; GFX10-LABEL: global_sub_saddr_i64_rtn_neg128:
1065 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1066 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1067 ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1068 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1069 ; GFX10-NEXT: buffer_gl0_inv
1070 ; GFX10-NEXT: buffer_gl1_inv
1071 ; GFX10-NEXT: ; return to shader part epilog
1073 ; GFX11-LABEL: global_sub_saddr_i64_rtn_neg128:
1075 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1076 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1077 ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1078 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1079 ; GFX11-NEXT: buffer_gl0_inv
1080 ; GFX11-NEXT: buffer_gl1_inv
1081 ; GFX11-NEXT: ; return to shader part epilog
1082 %zext.offset = zext i32 %voffset to i64
1083 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1084 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1085 %rtn = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1086 %cast.rtn = bitcast i64 %rtn to <2 x float>
1087 ret <2 x float> %cast.rtn
1090 define amdgpu_ps void @global_sub_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1091 ; GFX9-LABEL: global_sub_saddr_i64_nortn:
1093 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1094 ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3]
1095 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1096 ; GFX9-NEXT: buffer_wbinvl1
1097 ; GFX9-NEXT: s_endpgm
1099 ; GFX10-LABEL: global_sub_saddr_i64_nortn:
1101 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1102 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1103 ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3]
1104 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1105 ; GFX10-NEXT: buffer_gl0_inv
1106 ; GFX10-NEXT: buffer_gl1_inv
1107 ; GFX10-NEXT: s_endpgm
1109 ; GFX11-LABEL: global_sub_saddr_i64_nortn:
1111 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1112 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1113 ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3]
1114 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1115 ; GFX11-NEXT: buffer_gl0_inv
1116 ; GFX11-NEXT: buffer_gl1_inv
1117 ; GFX11-NEXT: s_endpgm
1118 %zext.offset = zext i32 %voffset to i64
1119 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1120 %unused = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1124 define amdgpu_ps void @global_sub_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1125 ; GFX9-LABEL: global_sub_saddr_i64_nortn_neg128:
1127 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1128 ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128
1129 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1130 ; GFX9-NEXT: buffer_wbinvl1
1131 ; GFX9-NEXT: s_endpgm
1133 ; GFX10-LABEL: global_sub_saddr_i64_nortn_neg128:
1135 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1136 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1137 ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128
1138 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1139 ; GFX10-NEXT: buffer_gl0_inv
1140 ; GFX10-NEXT: buffer_gl1_inv
1141 ; GFX10-NEXT: s_endpgm
1143 ; GFX11-LABEL: global_sub_saddr_i64_nortn_neg128:
1145 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1146 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1147 ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] offset:-128
1148 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1149 ; GFX11-NEXT: buffer_gl0_inv
1150 ; GFX11-NEXT: buffer_gl1_inv
1151 ; GFX11-NEXT: s_endpgm
1152 %zext.offset = zext i32 %voffset to i64
1153 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1154 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1155 %unused = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1159 ; --------------------------------------------------------------------------------
1161 ; --------------------------------------------------------------------------------
1163 define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1164 ; GFX9-LABEL: global_and_saddr_i32_rtn:
1166 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1167 ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc
1168 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1169 ; GFX9-NEXT: buffer_wbinvl1
1170 ; GFX9-NEXT: ; return to shader part epilog
1172 ; GFX10-LABEL: global_and_saddr_i32_rtn:
1174 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1175 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1176 ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc
1177 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1178 ; GFX10-NEXT: buffer_gl0_inv
1179 ; GFX10-NEXT: buffer_gl1_inv
1180 ; GFX10-NEXT: ; return to shader part epilog
1182 ; GFX11-LABEL: global_and_saddr_i32_rtn:
1184 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1185 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1186 ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] glc
1187 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1188 ; GFX11-NEXT: buffer_gl0_inv
1189 ; GFX11-NEXT: buffer_gl1_inv
1190 ; GFX11-NEXT: ; return to shader part epilog
1191 %zext.offset = zext i32 %voffset to i64
1192 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1193 %rtn = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1194 %cast.rtn = bitcast i32 %rtn to float
1198 define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1199 ; GFX9-LABEL: global_and_saddr_i32_rtn_neg128:
1201 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1202 ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc
1203 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1204 ; GFX9-NEXT: buffer_wbinvl1
1205 ; GFX9-NEXT: ; return to shader part epilog
1207 ; GFX10-LABEL: global_and_saddr_i32_rtn_neg128:
1209 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1210 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1211 ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc
1212 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1213 ; GFX10-NEXT: buffer_gl0_inv
1214 ; GFX10-NEXT: buffer_gl1_inv
1215 ; GFX10-NEXT: ; return to shader part epilog
1217 ; GFX11-LABEL: global_and_saddr_i32_rtn_neg128:
1219 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1220 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1221 ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 glc
1222 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1223 ; GFX11-NEXT: buffer_gl0_inv
1224 ; GFX11-NEXT: buffer_gl1_inv
1225 ; GFX11-NEXT: ; return to shader part epilog
1226 %zext.offset = zext i32 %voffset to i64
1227 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1228 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1229 %rtn = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1230 %cast.rtn = bitcast i32 %rtn to float
1234 define amdgpu_ps void @global_and_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1235 ; GFX9-LABEL: global_and_saddr_i32_nortn:
1237 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1238 ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3]
1239 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1240 ; GFX9-NEXT: buffer_wbinvl1
1241 ; GFX9-NEXT: s_endpgm
1243 ; GFX10-LABEL: global_and_saddr_i32_nortn:
1245 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1246 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1247 ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3]
1248 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1249 ; GFX10-NEXT: buffer_gl0_inv
1250 ; GFX10-NEXT: buffer_gl1_inv
1251 ; GFX10-NEXT: s_endpgm
1253 ; GFX11-LABEL: global_and_saddr_i32_nortn:
1255 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1256 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1257 ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3]
1258 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1259 ; GFX11-NEXT: buffer_gl0_inv
1260 ; GFX11-NEXT: buffer_gl1_inv
1261 ; GFX11-NEXT: s_endpgm
1262 %zext.offset = zext i32 %voffset to i64
1263 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1264 %unused = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1268 define amdgpu_ps void @global_and_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1269 ; GFX9-LABEL: global_and_saddr_i32_nortn_neg128:
1271 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1272 ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128
1273 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1274 ; GFX9-NEXT: buffer_wbinvl1
1275 ; GFX9-NEXT: s_endpgm
1277 ; GFX10-LABEL: global_and_saddr_i32_nortn_neg128:
1279 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1280 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1281 ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128
1282 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1283 ; GFX10-NEXT: buffer_gl0_inv
1284 ; GFX10-NEXT: buffer_gl1_inv
1285 ; GFX10-NEXT: s_endpgm
1287 ; GFX11-LABEL: global_and_saddr_i32_nortn_neg128:
1289 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1290 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1291 ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] offset:-128
1292 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1293 ; GFX11-NEXT: buffer_gl0_inv
1294 ; GFX11-NEXT: buffer_gl1_inv
1295 ; GFX11-NEXT: s_endpgm
1296 %zext.offset = zext i32 %voffset to i64
1297 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1298 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1299 %unused = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1303 define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1304 ; GFX9-LABEL: global_and_saddr_i64_rtn:
1306 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1307 ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc
1308 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX9-NEXT: buffer_wbinvl1
1310 ; GFX9-NEXT: ; return to shader part epilog
1312 ; GFX10-LABEL: global_and_saddr_i64_rtn:
1314 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1315 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1316 ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc
1317 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1318 ; GFX10-NEXT: buffer_gl0_inv
1319 ; GFX10-NEXT: buffer_gl1_inv
1320 ; GFX10-NEXT: ; return to shader part epilog
1322 ; GFX11-LABEL: global_and_saddr_i64_rtn:
1324 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1325 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1326 ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] glc
1327 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1328 ; GFX11-NEXT: buffer_gl0_inv
1329 ; GFX11-NEXT: buffer_gl1_inv
1330 ; GFX11-NEXT: ; return to shader part epilog
1331 %zext.offset = zext i32 %voffset to i64
1332 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1333 %rtn = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1334 %cast.rtn = bitcast i64 %rtn to <2 x float>
1335 ret <2 x float> %cast.rtn
1338 define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1339 ; GFX9-LABEL: global_and_saddr_i64_rtn_neg128:
1341 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1342 ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1343 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1344 ; GFX9-NEXT: buffer_wbinvl1
1345 ; GFX9-NEXT: ; return to shader part epilog
1347 ; GFX10-LABEL: global_and_saddr_i64_rtn_neg128:
1349 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1350 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1351 ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1352 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1353 ; GFX10-NEXT: buffer_gl0_inv
1354 ; GFX10-NEXT: buffer_gl1_inv
1355 ; GFX10-NEXT: ; return to shader part epilog
1357 ; GFX11-LABEL: global_and_saddr_i64_rtn_neg128:
1359 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1360 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1361 ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1362 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1363 ; GFX11-NEXT: buffer_gl0_inv
1364 ; GFX11-NEXT: buffer_gl1_inv
1365 ; GFX11-NEXT: ; return to shader part epilog
1366 %zext.offset = zext i32 %voffset to i64
1367 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1368 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1369 %rtn = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1370 %cast.rtn = bitcast i64 %rtn to <2 x float>
1371 ret <2 x float> %cast.rtn
1374 define amdgpu_ps void @global_and_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1375 ; GFX9-LABEL: global_and_saddr_i64_nortn:
1377 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1378 ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3]
1379 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1380 ; GFX9-NEXT: buffer_wbinvl1
1381 ; GFX9-NEXT: s_endpgm
1383 ; GFX10-LABEL: global_and_saddr_i64_nortn:
1385 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1386 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1387 ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3]
1388 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1389 ; GFX10-NEXT: buffer_gl0_inv
1390 ; GFX10-NEXT: buffer_gl1_inv
1391 ; GFX10-NEXT: s_endpgm
1393 ; GFX11-LABEL: global_and_saddr_i64_nortn:
1395 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1396 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1397 ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3]
1398 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1399 ; GFX11-NEXT: buffer_gl0_inv
1400 ; GFX11-NEXT: buffer_gl1_inv
1401 ; GFX11-NEXT: s_endpgm
1402 %zext.offset = zext i32 %voffset to i64
1403 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1404 %unused = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1408 define amdgpu_ps void @global_and_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1409 ; GFX9-LABEL: global_and_saddr_i64_nortn_neg128:
1411 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1412 ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128
1413 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1414 ; GFX9-NEXT: buffer_wbinvl1
1415 ; GFX9-NEXT: s_endpgm
1417 ; GFX10-LABEL: global_and_saddr_i64_nortn_neg128:
1419 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1420 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1421 ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128
1422 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1423 ; GFX10-NEXT: buffer_gl0_inv
1424 ; GFX10-NEXT: buffer_gl1_inv
1425 ; GFX10-NEXT: s_endpgm
1427 ; GFX11-LABEL: global_and_saddr_i64_nortn_neg128:
1429 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1430 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1431 ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] offset:-128
1432 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1433 ; GFX11-NEXT: buffer_gl0_inv
1434 ; GFX11-NEXT: buffer_gl1_inv
1435 ; GFX11-NEXT: s_endpgm
1436 %zext.offset = zext i32 %voffset to i64
1437 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1438 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1439 %unused = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1443 ; --------------------------------------------------------------------------------
1445 ; --------------------------------------------------------------------------------
1447 define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1448 ; GFX9-LABEL: global_or_saddr_i32_rtn:
1450 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1451 ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc
1452 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1453 ; GFX9-NEXT: buffer_wbinvl1
1454 ; GFX9-NEXT: ; return to shader part epilog
1456 ; GFX10-LABEL: global_or_saddr_i32_rtn:
1458 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1459 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1460 ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc
1461 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1462 ; GFX10-NEXT: buffer_gl0_inv
1463 ; GFX10-NEXT: buffer_gl1_inv
1464 ; GFX10-NEXT: ; return to shader part epilog
1466 ; GFX11-LABEL: global_or_saddr_i32_rtn:
1468 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1469 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1470 ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] glc
1471 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1472 ; GFX11-NEXT: buffer_gl0_inv
1473 ; GFX11-NEXT: buffer_gl1_inv
1474 ; GFX11-NEXT: ; return to shader part epilog
1475 %zext.offset = zext i32 %voffset to i64
1476 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1477 %rtn = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1478 %cast.rtn = bitcast i32 %rtn to float
1482 define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1483 ; GFX9-LABEL: global_or_saddr_i32_rtn_neg128:
1485 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1486 ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc
1487 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1488 ; GFX9-NEXT: buffer_wbinvl1
1489 ; GFX9-NEXT: ; return to shader part epilog
1491 ; GFX10-LABEL: global_or_saddr_i32_rtn_neg128:
1493 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1494 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1495 ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc
1496 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1497 ; GFX10-NEXT: buffer_gl0_inv
1498 ; GFX10-NEXT: buffer_gl1_inv
1499 ; GFX10-NEXT: ; return to shader part epilog
1501 ; GFX11-LABEL: global_or_saddr_i32_rtn_neg128:
1503 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1504 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1505 ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 glc
1506 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1507 ; GFX11-NEXT: buffer_gl0_inv
1508 ; GFX11-NEXT: buffer_gl1_inv
1509 ; GFX11-NEXT: ; return to shader part epilog
1510 %zext.offset = zext i32 %voffset to i64
1511 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1512 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1513 %rtn = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1514 %cast.rtn = bitcast i32 %rtn to float
1518 define amdgpu_ps void @global_or_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1519 ; GFX9-LABEL: global_or_saddr_i32_nortn:
1521 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1522 ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3]
1523 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1524 ; GFX9-NEXT: buffer_wbinvl1
1525 ; GFX9-NEXT: s_endpgm
1527 ; GFX10-LABEL: global_or_saddr_i32_nortn:
1529 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1530 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1531 ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3]
1532 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1533 ; GFX10-NEXT: buffer_gl0_inv
1534 ; GFX10-NEXT: buffer_gl1_inv
1535 ; GFX10-NEXT: s_endpgm
1537 ; GFX11-LABEL: global_or_saddr_i32_nortn:
1539 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1540 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1541 ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3]
1542 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1543 ; GFX11-NEXT: buffer_gl0_inv
1544 ; GFX11-NEXT: buffer_gl1_inv
1545 ; GFX11-NEXT: s_endpgm
1546 %zext.offset = zext i32 %voffset to i64
1547 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1548 %unused = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1552 define amdgpu_ps void @global_or_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1553 ; GFX9-LABEL: global_or_saddr_i32_nortn_neg128:
1555 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1556 ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128
1557 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1558 ; GFX9-NEXT: buffer_wbinvl1
1559 ; GFX9-NEXT: s_endpgm
1561 ; GFX10-LABEL: global_or_saddr_i32_nortn_neg128:
1563 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1564 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1565 ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128
1566 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1567 ; GFX10-NEXT: buffer_gl0_inv
1568 ; GFX10-NEXT: buffer_gl1_inv
1569 ; GFX10-NEXT: s_endpgm
1571 ; GFX11-LABEL: global_or_saddr_i32_nortn_neg128:
1573 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1574 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1575 ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] offset:-128
1576 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1577 ; GFX11-NEXT: buffer_gl0_inv
1578 ; GFX11-NEXT: buffer_gl1_inv
1579 ; GFX11-NEXT: s_endpgm
1580 %zext.offset = zext i32 %voffset to i64
1581 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1582 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1583 %unused = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1587 define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1588 ; GFX9-LABEL: global_or_saddr_i64_rtn:
1590 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1591 ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc
1592 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1593 ; GFX9-NEXT: buffer_wbinvl1
1594 ; GFX9-NEXT: ; return to shader part epilog
1596 ; GFX10-LABEL: global_or_saddr_i64_rtn:
1598 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1599 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1600 ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc
1601 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1602 ; GFX10-NEXT: buffer_gl0_inv
1603 ; GFX10-NEXT: buffer_gl1_inv
1604 ; GFX10-NEXT: ; return to shader part epilog
1606 ; GFX11-LABEL: global_or_saddr_i64_rtn:
1608 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1609 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1610 ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] glc
1611 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1612 ; GFX11-NEXT: buffer_gl0_inv
1613 ; GFX11-NEXT: buffer_gl1_inv
1614 ; GFX11-NEXT: ; return to shader part epilog
1615 %zext.offset = zext i32 %voffset to i64
1616 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1617 %rtn = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1618 %cast.rtn = bitcast i64 %rtn to <2 x float>
1619 ret <2 x float> %cast.rtn
1622 define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1623 ; GFX9-LABEL: global_or_saddr_i64_rtn_neg128:
1625 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1626 ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1627 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1628 ; GFX9-NEXT: buffer_wbinvl1
1629 ; GFX9-NEXT: ; return to shader part epilog
1631 ; GFX10-LABEL: global_or_saddr_i64_rtn_neg128:
1633 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1634 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1635 ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1636 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1637 ; GFX10-NEXT: buffer_gl0_inv
1638 ; GFX10-NEXT: buffer_gl1_inv
1639 ; GFX10-NEXT: ; return to shader part epilog
1641 ; GFX11-LABEL: global_or_saddr_i64_rtn_neg128:
1643 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1644 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1645 ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1646 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1647 ; GFX11-NEXT: buffer_gl0_inv
1648 ; GFX11-NEXT: buffer_gl1_inv
1649 ; GFX11-NEXT: ; return to shader part epilog
1650 %zext.offset = zext i32 %voffset to i64
1651 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1652 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1653 %rtn = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1654 %cast.rtn = bitcast i64 %rtn to <2 x float>
1655 ret <2 x float> %cast.rtn
1658 define amdgpu_ps void @global_or_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1659 ; GFX9-LABEL: global_or_saddr_i64_nortn:
1661 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1662 ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3]
1663 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1664 ; GFX9-NEXT: buffer_wbinvl1
1665 ; GFX9-NEXT: s_endpgm
1667 ; GFX10-LABEL: global_or_saddr_i64_nortn:
1669 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1670 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1671 ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3]
1672 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1673 ; GFX10-NEXT: buffer_gl0_inv
1674 ; GFX10-NEXT: buffer_gl1_inv
1675 ; GFX10-NEXT: s_endpgm
1677 ; GFX11-LABEL: global_or_saddr_i64_nortn:
1679 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1680 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1681 ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3]
1682 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1683 ; GFX11-NEXT: buffer_gl0_inv
1684 ; GFX11-NEXT: buffer_gl1_inv
1685 ; GFX11-NEXT: s_endpgm
1686 %zext.offset = zext i32 %voffset to i64
1687 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1688 %unused = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1692 define amdgpu_ps void @global_or_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1693 ; GFX9-LABEL: global_or_saddr_i64_nortn_neg128:
1695 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1696 ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128
1697 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1698 ; GFX9-NEXT: buffer_wbinvl1
1699 ; GFX9-NEXT: s_endpgm
1701 ; GFX10-LABEL: global_or_saddr_i64_nortn_neg128:
1703 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1704 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1705 ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128
1706 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1707 ; GFX10-NEXT: buffer_gl0_inv
1708 ; GFX10-NEXT: buffer_gl1_inv
1709 ; GFX10-NEXT: s_endpgm
1711 ; GFX11-LABEL: global_or_saddr_i64_nortn_neg128:
1713 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1715 ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] offset:-128
1716 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1717 ; GFX11-NEXT: buffer_gl0_inv
1718 ; GFX11-NEXT: buffer_gl1_inv
1719 ; GFX11-NEXT: s_endpgm
1720 %zext.offset = zext i32 %voffset to i64
1721 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1722 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1723 %unused = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1727 ; --------------------------------------------------------------------------------
1729 ; --------------------------------------------------------------------------------
1731 define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1732 ; GFX9-LABEL: global_xor_saddr_i32_rtn:
1734 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1735 ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc
1736 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1737 ; GFX9-NEXT: buffer_wbinvl1
1738 ; GFX9-NEXT: ; return to shader part epilog
1740 ; GFX10-LABEL: global_xor_saddr_i32_rtn:
1742 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1743 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1744 ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc
1745 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1746 ; GFX10-NEXT: buffer_gl0_inv
1747 ; GFX10-NEXT: buffer_gl1_inv
1748 ; GFX10-NEXT: ; return to shader part epilog
1750 ; GFX11-LABEL: global_xor_saddr_i32_rtn:
1752 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1753 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1754 ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] glc
1755 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1756 ; GFX11-NEXT: buffer_gl0_inv
1757 ; GFX11-NEXT: buffer_gl1_inv
1758 ; GFX11-NEXT: ; return to shader part epilog
1759 %zext.offset = zext i32 %voffset to i64
1760 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1761 %rtn = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1762 %cast.rtn = bitcast i32 %rtn to float
1766 define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1767 ; GFX9-LABEL: global_xor_saddr_i32_rtn_neg128:
1769 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1770 ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc
1771 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1772 ; GFX9-NEXT: buffer_wbinvl1
1773 ; GFX9-NEXT: ; return to shader part epilog
1775 ; GFX10-LABEL: global_xor_saddr_i32_rtn_neg128:
1777 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1778 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1779 ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc
1780 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1781 ; GFX10-NEXT: buffer_gl0_inv
1782 ; GFX10-NEXT: buffer_gl1_inv
1783 ; GFX10-NEXT: ; return to shader part epilog
1785 ; GFX11-LABEL: global_xor_saddr_i32_rtn_neg128:
1787 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1788 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1789 ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 glc
1790 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1791 ; GFX11-NEXT: buffer_gl0_inv
1792 ; GFX11-NEXT: buffer_gl1_inv
1793 ; GFX11-NEXT: ; return to shader part epilog
1794 %zext.offset = zext i32 %voffset to i64
1795 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1796 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1797 %rtn = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1798 %cast.rtn = bitcast i32 %rtn to float
1802 define amdgpu_ps void @global_xor_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1803 ; GFX9-LABEL: global_xor_saddr_i32_nortn:
1805 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1806 ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3]
1807 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1808 ; GFX9-NEXT: buffer_wbinvl1
1809 ; GFX9-NEXT: s_endpgm
1811 ; GFX10-LABEL: global_xor_saddr_i32_nortn:
1813 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1814 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1815 ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3]
1816 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1817 ; GFX10-NEXT: buffer_gl0_inv
1818 ; GFX10-NEXT: buffer_gl1_inv
1819 ; GFX10-NEXT: s_endpgm
1821 ; GFX11-LABEL: global_xor_saddr_i32_nortn:
1823 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1824 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1825 ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3]
1826 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1827 ; GFX11-NEXT: buffer_gl0_inv
1828 ; GFX11-NEXT: buffer_gl1_inv
1829 ; GFX11-NEXT: s_endpgm
1830 %zext.offset = zext i32 %voffset to i64
1831 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1832 %unused = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
1836 define amdgpu_ps void @global_xor_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
1837 ; GFX9-LABEL: global_xor_saddr_i32_nortn_neg128:
1839 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1840 ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128
1841 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1842 ; GFX9-NEXT: buffer_wbinvl1
1843 ; GFX9-NEXT: s_endpgm
1845 ; GFX10-LABEL: global_xor_saddr_i32_nortn_neg128:
1847 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1848 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1849 ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128
1850 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1851 ; GFX10-NEXT: buffer_gl0_inv
1852 ; GFX10-NEXT: buffer_gl1_inv
1853 ; GFX10-NEXT: s_endpgm
1855 ; GFX11-LABEL: global_xor_saddr_i32_nortn_neg128:
1857 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1858 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1859 ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] offset:-128
1860 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1861 ; GFX11-NEXT: buffer_gl0_inv
1862 ; GFX11-NEXT: buffer_gl1_inv
1863 ; GFX11-NEXT: s_endpgm
1864 %zext.offset = zext i32 %voffset to i64
1865 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1866 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1867 %unused = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
1871 define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1872 ; GFX9-LABEL: global_xor_saddr_i64_rtn:
1874 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1875 ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc
1876 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1877 ; GFX9-NEXT: buffer_wbinvl1
1878 ; GFX9-NEXT: ; return to shader part epilog
1880 ; GFX10-LABEL: global_xor_saddr_i64_rtn:
1882 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1883 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1884 ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc
1885 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1886 ; GFX10-NEXT: buffer_gl0_inv
1887 ; GFX10-NEXT: buffer_gl1_inv
1888 ; GFX10-NEXT: ; return to shader part epilog
1890 ; GFX11-LABEL: global_xor_saddr_i64_rtn:
1892 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1893 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1894 ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] glc
1895 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1896 ; GFX11-NEXT: buffer_gl0_inv
1897 ; GFX11-NEXT: buffer_gl1_inv
1898 ; GFX11-NEXT: ; return to shader part epilog
1899 %zext.offset = zext i32 %voffset to i64
1900 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1901 %rtn = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1902 %cast.rtn = bitcast i64 %rtn to <2 x float>
1903 ret <2 x float> %cast.rtn
1906 define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1907 ; GFX9-LABEL: global_xor_saddr_i64_rtn_neg128:
1909 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1910 ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1911 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1912 ; GFX9-NEXT: buffer_wbinvl1
1913 ; GFX9-NEXT: ; return to shader part epilog
1915 ; GFX10-LABEL: global_xor_saddr_i64_rtn_neg128:
1917 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1918 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1919 ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1920 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1921 ; GFX10-NEXT: buffer_gl0_inv
1922 ; GFX10-NEXT: buffer_gl1_inv
1923 ; GFX10-NEXT: ; return to shader part epilog
1925 ; GFX11-LABEL: global_xor_saddr_i64_rtn_neg128:
1927 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1928 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1929 ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
1930 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1931 ; GFX11-NEXT: buffer_gl0_inv
1932 ; GFX11-NEXT: buffer_gl1_inv
1933 ; GFX11-NEXT: ; return to shader part epilog
1934 %zext.offset = zext i32 %voffset to i64
1935 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1936 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
1937 %rtn = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
1938 %cast.rtn = bitcast i64 %rtn to <2 x float>
1939 ret <2 x float> %cast.rtn
1942 define amdgpu_ps void @global_xor_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1943 ; GFX9-LABEL: global_xor_saddr_i64_nortn:
1945 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1946 ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3]
1947 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1948 ; GFX9-NEXT: buffer_wbinvl1
1949 ; GFX9-NEXT: s_endpgm
1951 ; GFX10-LABEL: global_xor_saddr_i64_nortn:
1953 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1954 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1955 ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3]
1956 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1957 ; GFX10-NEXT: buffer_gl0_inv
1958 ; GFX10-NEXT: buffer_gl1_inv
1959 ; GFX10-NEXT: s_endpgm
1961 ; GFX11-LABEL: global_xor_saddr_i64_nortn:
1963 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1964 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1965 ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3]
1966 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1967 ; GFX11-NEXT: buffer_gl0_inv
1968 ; GFX11-NEXT: buffer_gl1_inv
1969 ; GFX11-NEXT: s_endpgm
1970 %zext.offset = zext i32 %voffset to i64
1971 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
1972 %unused = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
1976 define amdgpu_ps void @global_xor_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
1977 ; GFX9-LABEL: global_xor_saddr_i64_nortn_neg128:
1979 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1980 ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128
1981 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1982 ; GFX9-NEXT: buffer_wbinvl1
1983 ; GFX9-NEXT: s_endpgm
1985 ; GFX10-LABEL: global_xor_saddr_i64_nortn_neg128:
1987 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1988 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1989 ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128
1990 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1991 ; GFX10-NEXT: buffer_gl0_inv
1992 ; GFX10-NEXT: buffer_gl1_inv
1993 ; GFX10-NEXT: s_endpgm
1995 ; GFX11-LABEL: global_xor_saddr_i64_nortn_neg128:
1997 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1998 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1999 ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] offset:-128
2000 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2001 ; GFX11-NEXT: buffer_gl0_inv
2002 ; GFX11-NEXT: buffer_gl1_inv
2003 ; GFX11-NEXT: s_endpgm
2004 %zext.offset = zext i32 %voffset to i64
2005 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2006 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2007 %unused = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
2011 ; --------------------------------------------------------------------------------
2013 ; --------------------------------------------------------------------------------
2015 define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2016 ; GFX9-LABEL: global_max_saddr_i32_rtn:
2018 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2019 ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc
2020 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2021 ; GFX9-NEXT: ; return to shader part epilog
2023 ; GFX10-LABEL: global_max_saddr_i32_rtn:
2025 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2026 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2027 ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc
2028 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2029 ; GFX10-NEXT: buffer_gl0_inv
2030 ; GFX10-NEXT: ; return to shader part epilog
2032 ; GFX11-LABEL: global_max_saddr_i32_rtn:
2034 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2035 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2036 ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] glc
2037 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2038 ; GFX11-NEXT: buffer_gl0_inv
2039 ; GFX11-NEXT: ; return to shader part epilog
2040 %zext.offset = zext i32 %voffset to i64
2041 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2042 %rtn = atomicrmw max ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2043 %cast.rtn = bitcast i32 %rtn to float
2047 define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2048 ; GFX9-LABEL: global_max_saddr_i32_rtn_neg128:
2050 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2051 ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc
2052 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2053 ; GFX9-NEXT: ; return to shader part epilog
2055 ; GFX10-LABEL: global_max_saddr_i32_rtn_neg128:
2057 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2058 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2059 ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc
2060 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2061 ; GFX10-NEXT: buffer_gl0_inv
2062 ; GFX10-NEXT: ; return to shader part epilog
2064 ; GFX11-LABEL: global_max_saddr_i32_rtn_neg128:
2066 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2067 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2068 ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] offset:-128 glc
2069 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2070 ; GFX11-NEXT: buffer_gl0_inv
2071 ; GFX11-NEXT: ; return to shader part epilog
2072 %zext.offset = zext i32 %voffset to i64
2073 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2074 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2075 %rtn = atomicrmw max ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2076 %cast.rtn = bitcast i32 %rtn to float
2080 define amdgpu_ps void @global_max_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2081 ; GFX9-LABEL: global_max_saddr_i32_nortn:
2083 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2084 ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3]
2085 ; GFX9-NEXT: s_endpgm
2087 ; GFX10-LABEL: global_max_saddr_i32_nortn:
2089 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2090 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2091 ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3]
2092 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2093 ; GFX10-NEXT: buffer_gl0_inv
2094 ; GFX10-NEXT: s_endpgm
2096 ; GFX11-LABEL: global_max_saddr_i32_nortn:
2098 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2099 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2100 ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3]
2101 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2102 ; GFX11-NEXT: buffer_gl0_inv
2103 ; GFX11-NEXT: s_endpgm
2104 %zext.offset = zext i32 %voffset to i64
2105 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2106 %unused = atomicrmw max ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2110 define amdgpu_ps void @global_max_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2111 ; GFX9-LABEL: global_max_saddr_i32_nortn_neg128:
2113 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2114 ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128
2115 ; GFX9-NEXT: s_endpgm
2117 ; GFX10-LABEL: global_max_saddr_i32_nortn_neg128:
2119 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2120 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2121 ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128
2122 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2123 ; GFX10-NEXT: buffer_gl0_inv
2124 ; GFX10-NEXT: s_endpgm
2126 ; GFX11-LABEL: global_max_saddr_i32_nortn_neg128:
2128 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2129 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2130 ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3] offset:-128
2131 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2132 ; GFX11-NEXT: buffer_gl0_inv
2133 ; GFX11-NEXT: s_endpgm
2134 %zext.offset = zext i32 %voffset to i64
2135 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2136 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2137 %unused = atomicrmw max ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2141 define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2142 ; GFX9-LABEL: global_max_saddr_i64_rtn:
2144 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2145 ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc
2146 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2147 ; GFX9-NEXT: ; return to shader part epilog
2149 ; GFX10-LABEL: global_max_saddr_i64_rtn:
2151 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2152 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2153 ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc
2154 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2155 ; GFX10-NEXT: buffer_gl0_inv
2156 ; GFX10-NEXT: ; return to shader part epilog
2158 ; GFX11-LABEL: global_max_saddr_i64_rtn:
2160 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2161 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2162 ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] glc
2163 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2164 ; GFX11-NEXT: buffer_gl0_inv
2165 ; GFX11-NEXT: ; return to shader part epilog
2166 %zext.offset = zext i32 %voffset to i64
2167 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2168 %rtn = atomicrmw max ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2169 %cast.rtn = bitcast i64 %rtn to <2 x float>
2170 ret <2 x float> %cast.rtn
2173 define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2174 ; GFX9-LABEL: global_max_saddr_i64_rtn_neg128:
2176 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2177 ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2178 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2179 ; GFX9-NEXT: ; return to shader part epilog
2181 ; GFX10-LABEL: global_max_saddr_i64_rtn_neg128:
2183 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2184 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2185 ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2186 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2187 ; GFX10-NEXT: buffer_gl0_inv
2188 ; GFX10-NEXT: ; return to shader part epilog
2190 ; GFX11-LABEL: global_max_saddr_i64_rtn_neg128:
2192 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2193 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2194 ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2195 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2196 ; GFX11-NEXT: buffer_gl0_inv
2197 ; GFX11-NEXT: ; return to shader part epilog
2198 %zext.offset = zext i32 %voffset to i64
2199 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2200 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2201 %rtn = atomicrmw max ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2202 %cast.rtn = bitcast i64 %rtn to <2 x float>
2203 ret <2 x float> %cast.rtn
2206 define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2207 ; GFX9-LABEL: global_max_saddr_i64_nortn:
2209 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2210 ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3]
2211 ; GFX9-NEXT: s_endpgm
2213 ; GFX10-LABEL: global_max_saddr_i64_nortn:
2215 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2216 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2217 ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3]
2218 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2219 ; GFX10-NEXT: buffer_gl0_inv
2220 ; GFX10-NEXT: s_endpgm
2222 ; GFX11-LABEL: global_max_saddr_i64_nortn:
2224 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2225 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2226 ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3]
2227 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2228 ; GFX11-NEXT: buffer_gl0_inv
2229 ; GFX11-NEXT: s_endpgm
2230 %zext.offset = zext i32 %voffset to i64
2231 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2232 %unused = atomicrmw max ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2236 define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2237 ; GFX9-LABEL: global_max_saddr_i64_nortn_neg128:
2239 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2240 ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128
2241 ; GFX9-NEXT: s_endpgm
2243 ; GFX10-LABEL: global_max_saddr_i64_nortn_neg128:
2245 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2246 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2247 ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128
2248 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2249 ; GFX10-NEXT: buffer_gl0_inv
2250 ; GFX10-NEXT: s_endpgm
2252 ; GFX11-LABEL: global_max_saddr_i64_nortn_neg128:
2254 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2255 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2256 ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] offset:-128
2257 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2258 ; GFX11-NEXT: buffer_gl0_inv
2259 ; GFX11-NEXT: s_endpgm
2260 %zext.offset = zext i32 %voffset to i64
2261 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2262 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2263 %unused = atomicrmw max ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2267 ; --------------------------------------------------------------------------------
2269 ; --------------------------------------------------------------------------------
2271 define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2272 ; GFX9-LABEL: global_min_saddr_i32_rtn:
2274 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2275 ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc
2276 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2277 ; GFX9-NEXT: ; return to shader part epilog
2279 ; GFX10-LABEL: global_min_saddr_i32_rtn:
2281 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2282 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2283 ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc
2284 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2285 ; GFX10-NEXT: buffer_gl0_inv
2286 ; GFX10-NEXT: ; return to shader part epilog
2288 ; GFX11-LABEL: global_min_saddr_i32_rtn:
2290 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2291 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2292 ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] glc
2293 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX11-NEXT: buffer_gl0_inv
2295 ; GFX11-NEXT: ; return to shader part epilog
2296 %zext.offset = zext i32 %voffset to i64
2297 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2298 %rtn = atomicrmw min ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2299 %cast.rtn = bitcast i32 %rtn to float
2303 define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2304 ; GFX9-LABEL: global_min_saddr_i32_rtn_neg128:
2306 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2307 ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc
2308 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2309 ; GFX9-NEXT: ; return to shader part epilog
2311 ; GFX10-LABEL: global_min_saddr_i32_rtn_neg128:
2313 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2314 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2315 ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc
2316 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2317 ; GFX10-NEXT: buffer_gl0_inv
2318 ; GFX10-NEXT: ; return to shader part epilog
2320 ; GFX11-LABEL: global_min_saddr_i32_rtn_neg128:
2322 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2323 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2324 ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] offset:-128 glc
2325 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2326 ; GFX11-NEXT: buffer_gl0_inv
2327 ; GFX11-NEXT: ; return to shader part epilog
2328 %zext.offset = zext i32 %voffset to i64
2329 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2330 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2331 %rtn = atomicrmw min ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2332 %cast.rtn = bitcast i32 %rtn to float
2336 define amdgpu_ps void @global_min_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2337 ; GFX9-LABEL: global_min_saddr_i32_nortn:
2339 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2340 ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3]
2341 ; GFX9-NEXT: s_endpgm
2343 ; GFX10-LABEL: global_min_saddr_i32_nortn:
2345 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2346 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2347 ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3]
2348 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2349 ; GFX10-NEXT: buffer_gl0_inv
2350 ; GFX10-NEXT: s_endpgm
2352 ; GFX11-LABEL: global_min_saddr_i32_nortn:
2354 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2355 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2356 ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3]
2357 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2358 ; GFX11-NEXT: buffer_gl0_inv
2359 ; GFX11-NEXT: s_endpgm
2360 %zext.offset = zext i32 %voffset to i64
2361 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2362 %unused = atomicrmw min ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2366 define amdgpu_ps void @global_min_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2367 ; GFX9-LABEL: global_min_saddr_i32_nortn_neg128:
2369 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2370 ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128
2371 ; GFX9-NEXT: s_endpgm
2373 ; GFX10-LABEL: global_min_saddr_i32_nortn_neg128:
2375 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2376 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2377 ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128
2378 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2379 ; GFX10-NEXT: buffer_gl0_inv
2380 ; GFX10-NEXT: s_endpgm
2382 ; GFX11-LABEL: global_min_saddr_i32_nortn_neg128:
2384 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2385 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2386 ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3] offset:-128
2387 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2388 ; GFX11-NEXT: buffer_gl0_inv
2389 ; GFX11-NEXT: s_endpgm
2390 %zext.offset = zext i32 %voffset to i64
2391 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2392 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2393 %unused = atomicrmw min ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2397 define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2398 ; GFX9-LABEL: global_min_saddr_i64_rtn:
2400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2401 ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc
2402 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2403 ; GFX9-NEXT: ; return to shader part epilog
2405 ; GFX10-LABEL: global_min_saddr_i64_rtn:
2407 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2408 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2409 ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc
2410 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2411 ; GFX10-NEXT: buffer_gl0_inv
2412 ; GFX10-NEXT: ; return to shader part epilog
2414 ; GFX11-LABEL: global_min_saddr_i64_rtn:
2416 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2417 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2418 ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] glc
2419 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2420 ; GFX11-NEXT: buffer_gl0_inv
2421 ; GFX11-NEXT: ; return to shader part epilog
2422 %zext.offset = zext i32 %voffset to i64
2423 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2424 %rtn = atomicrmw min ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2425 %cast.rtn = bitcast i64 %rtn to <2 x float>
2426 ret <2 x float> %cast.rtn
2429 define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2430 ; GFX9-LABEL: global_min_saddr_i64_rtn_neg128:
2432 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2433 ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2434 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2435 ; GFX9-NEXT: ; return to shader part epilog
2437 ; GFX10-LABEL: global_min_saddr_i64_rtn_neg128:
2439 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2440 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2441 ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2442 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2443 ; GFX10-NEXT: buffer_gl0_inv
2444 ; GFX10-NEXT: ; return to shader part epilog
2446 ; GFX11-LABEL: global_min_saddr_i64_rtn_neg128:
2448 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2449 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2450 ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2451 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2452 ; GFX11-NEXT: buffer_gl0_inv
2453 ; GFX11-NEXT: ; return to shader part epilog
2454 %zext.offset = zext i32 %voffset to i64
2455 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2456 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2457 %rtn = atomicrmw min ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2458 %cast.rtn = bitcast i64 %rtn to <2 x float>
2459 ret <2 x float> %cast.rtn
2462 define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2463 ; GFX9-LABEL: global_min_saddr_i64_nortn:
2465 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2466 ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3]
2467 ; GFX9-NEXT: s_endpgm
2469 ; GFX10-LABEL: global_min_saddr_i64_nortn:
2471 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2472 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2473 ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3]
2474 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2475 ; GFX10-NEXT: buffer_gl0_inv
2476 ; GFX10-NEXT: s_endpgm
2478 ; GFX11-LABEL: global_min_saddr_i64_nortn:
2480 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2481 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2482 ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3]
2483 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2484 ; GFX11-NEXT: buffer_gl0_inv
2485 ; GFX11-NEXT: s_endpgm
2486 %zext.offset = zext i32 %voffset to i64
2487 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2488 %unused = atomicrmw min ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2492 define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2493 ; GFX9-LABEL: global_min_saddr_i64_nortn_neg128:
2495 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2496 ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128
2497 ; GFX9-NEXT: s_endpgm
2499 ; GFX10-LABEL: global_min_saddr_i64_nortn_neg128:
2501 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2502 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2503 ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128
2504 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2505 ; GFX10-NEXT: buffer_gl0_inv
2506 ; GFX10-NEXT: s_endpgm
2508 ; GFX11-LABEL: global_min_saddr_i64_nortn_neg128:
2510 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2511 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2512 ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] offset:-128
2513 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2514 ; GFX11-NEXT: buffer_gl0_inv
2515 ; GFX11-NEXT: s_endpgm
2516 %zext.offset = zext i32 %voffset to i64
2517 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2518 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2519 %unused = atomicrmw min ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2523 ; --------------------------------------------------------------------------------
2525 ; --------------------------------------------------------------------------------
2527 define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2528 ; GFX9-LABEL: global_umax_saddr_i32_rtn:
2530 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2531 ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc
2532 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2533 ; GFX9-NEXT: ; return to shader part epilog
2535 ; GFX10-LABEL: global_umax_saddr_i32_rtn:
2537 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2538 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2539 ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc
2540 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2541 ; GFX10-NEXT: buffer_gl0_inv
2542 ; GFX10-NEXT: ; return to shader part epilog
2544 ; GFX11-LABEL: global_umax_saddr_i32_rtn:
2546 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2547 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2548 ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] glc
2549 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2550 ; GFX11-NEXT: buffer_gl0_inv
2551 ; GFX11-NEXT: ; return to shader part epilog
2552 %zext.offset = zext i32 %voffset to i64
2553 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2554 %rtn = atomicrmw umax ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2555 %cast.rtn = bitcast i32 %rtn to float
2559 define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2560 ; GFX9-LABEL: global_umax_saddr_i32_rtn_neg128:
2562 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2563 ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc
2564 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2565 ; GFX9-NEXT: ; return to shader part epilog
2567 ; GFX10-LABEL: global_umax_saddr_i32_rtn_neg128:
2569 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2570 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2571 ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc
2572 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2573 ; GFX10-NEXT: buffer_gl0_inv
2574 ; GFX10-NEXT: ; return to shader part epilog
2576 ; GFX11-LABEL: global_umax_saddr_i32_rtn_neg128:
2578 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2579 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2580 ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] offset:-128 glc
2581 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2582 ; GFX11-NEXT: buffer_gl0_inv
2583 ; GFX11-NEXT: ; return to shader part epilog
2584 %zext.offset = zext i32 %voffset to i64
2585 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2586 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2587 %rtn = atomicrmw umax ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2588 %cast.rtn = bitcast i32 %rtn to float
2592 define amdgpu_ps void @global_umax_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2593 ; GFX9-LABEL: global_umax_saddr_i32_nortn:
2595 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2596 ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3]
2597 ; GFX9-NEXT: s_endpgm
2599 ; GFX10-LABEL: global_umax_saddr_i32_nortn:
2601 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2602 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2603 ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3]
2604 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2605 ; GFX10-NEXT: buffer_gl0_inv
2606 ; GFX10-NEXT: s_endpgm
2608 ; GFX11-LABEL: global_umax_saddr_i32_nortn:
2610 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2611 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2612 ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3]
2613 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2614 ; GFX11-NEXT: buffer_gl0_inv
2615 ; GFX11-NEXT: s_endpgm
2616 %zext.offset = zext i32 %voffset to i64
2617 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2618 %unused = atomicrmw umax ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2622 define amdgpu_ps void @global_umax_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2623 ; GFX9-LABEL: global_umax_saddr_i32_nortn_neg128:
2625 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2626 ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128
2627 ; GFX9-NEXT: s_endpgm
2629 ; GFX10-LABEL: global_umax_saddr_i32_nortn_neg128:
2631 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2632 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2633 ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128
2634 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2635 ; GFX10-NEXT: buffer_gl0_inv
2636 ; GFX10-NEXT: s_endpgm
2638 ; GFX11-LABEL: global_umax_saddr_i32_nortn_neg128:
2640 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2641 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2642 ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3] offset:-128
2643 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2644 ; GFX11-NEXT: buffer_gl0_inv
2645 ; GFX11-NEXT: s_endpgm
2646 %zext.offset = zext i32 %voffset to i64
2647 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2648 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2649 %unused = atomicrmw umax ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2653 define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2654 ; GFX9-LABEL: global_umax_saddr_i64_rtn:
2656 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2657 ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc
2658 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2659 ; GFX9-NEXT: ; return to shader part epilog
2661 ; GFX10-LABEL: global_umax_saddr_i64_rtn:
2663 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2664 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2665 ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc
2666 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2667 ; GFX10-NEXT: buffer_gl0_inv
2668 ; GFX10-NEXT: ; return to shader part epilog
2670 ; GFX11-LABEL: global_umax_saddr_i64_rtn:
2672 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2673 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2674 ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] glc
2675 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2676 ; GFX11-NEXT: buffer_gl0_inv
2677 ; GFX11-NEXT: ; return to shader part epilog
2678 %zext.offset = zext i32 %voffset to i64
2679 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2680 %rtn = atomicrmw umax ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2681 %cast.rtn = bitcast i64 %rtn to <2 x float>
2682 ret <2 x float> %cast.rtn
2685 define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2686 ; GFX9-LABEL: global_umax_saddr_i64_rtn_neg128:
2688 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2689 ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2690 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2691 ; GFX9-NEXT: ; return to shader part epilog
2693 ; GFX10-LABEL: global_umax_saddr_i64_rtn_neg128:
2695 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2696 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2697 ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2698 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2699 ; GFX10-NEXT: buffer_gl0_inv
2700 ; GFX10-NEXT: ; return to shader part epilog
2702 ; GFX11-LABEL: global_umax_saddr_i64_rtn_neg128:
2704 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2705 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2706 ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2707 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2708 ; GFX11-NEXT: buffer_gl0_inv
2709 ; GFX11-NEXT: ; return to shader part epilog
2710 %zext.offset = zext i32 %voffset to i64
2711 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2712 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2713 %rtn = atomicrmw umax ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2714 %cast.rtn = bitcast i64 %rtn to <2 x float>
2715 ret <2 x float> %cast.rtn
2718 define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2719 ; GFX9-LABEL: global_umax_saddr_i64_nortn:
2721 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2722 ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3]
2723 ; GFX9-NEXT: s_endpgm
2725 ; GFX10-LABEL: global_umax_saddr_i64_nortn:
2727 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2728 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2729 ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3]
2730 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2731 ; GFX10-NEXT: buffer_gl0_inv
2732 ; GFX10-NEXT: s_endpgm
2734 ; GFX11-LABEL: global_umax_saddr_i64_nortn:
2736 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2737 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2738 ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3]
2739 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2740 ; GFX11-NEXT: buffer_gl0_inv
2741 ; GFX11-NEXT: s_endpgm
2742 %zext.offset = zext i32 %voffset to i64
2743 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2744 %unused = atomicrmw umax ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2748 define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2749 ; GFX9-LABEL: global_umax_saddr_i64_nortn_neg128:
2751 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2752 ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128
2753 ; GFX9-NEXT: s_endpgm
2755 ; GFX10-LABEL: global_umax_saddr_i64_nortn_neg128:
2757 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2758 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2759 ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128
2760 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2761 ; GFX10-NEXT: buffer_gl0_inv
2762 ; GFX10-NEXT: s_endpgm
2764 ; GFX11-LABEL: global_umax_saddr_i64_nortn_neg128:
2766 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2767 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2768 ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] offset:-128
2769 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2770 ; GFX11-NEXT: buffer_gl0_inv
2771 ; GFX11-NEXT: s_endpgm
2772 %zext.offset = zext i32 %voffset to i64
2773 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2774 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2775 %unused = atomicrmw umax ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2779 ; --------------------------------------------------------------------------------
2781 ; --------------------------------------------------------------------------------
2783 define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2784 ; GFX9-LABEL: global_umin_saddr_i32_rtn:
2786 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2787 ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc
2788 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2789 ; GFX9-NEXT: ; return to shader part epilog
2791 ; GFX10-LABEL: global_umin_saddr_i32_rtn:
2793 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2794 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2795 ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc
2796 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2797 ; GFX10-NEXT: buffer_gl0_inv
2798 ; GFX10-NEXT: ; return to shader part epilog
2800 ; GFX11-LABEL: global_umin_saddr_i32_rtn:
2802 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2803 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2804 ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] glc
2805 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2806 ; GFX11-NEXT: buffer_gl0_inv
2807 ; GFX11-NEXT: ; return to shader part epilog
2808 %zext.offset = zext i32 %voffset to i64
2809 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2810 %rtn = atomicrmw umin ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2811 %cast.rtn = bitcast i32 %rtn to float
2815 define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2816 ; GFX9-LABEL: global_umin_saddr_i32_rtn_neg128:
2818 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2819 ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc
2820 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2821 ; GFX9-NEXT: ; return to shader part epilog
2823 ; GFX10-LABEL: global_umin_saddr_i32_rtn_neg128:
2825 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2826 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2827 ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc
2828 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2829 ; GFX10-NEXT: buffer_gl0_inv
2830 ; GFX10-NEXT: ; return to shader part epilog
2832 ; GFX11-LABEL: global_umin_saddr_i32_rtn_neg128:
2834 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2835 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2836 ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] offset:-128 glc
2837 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2838 ; GFX11-NEXT: buffer_gl0_inv
2839 ; GFX11-NEXT: ; return to shader part epilog
2840 %zext.offset = zext i32 %voffset to i64
2841 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2842 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2843 %rtn = atomicrmw umin ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2844 %cast.rtn = bitcast i32 %rtn to float
2848 define amdgpu_ps void @global_umin_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2849 ; GFX9-LABEL: global_umin_saddr_i32_nortn:
2851 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2852 ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3]
2853 ; GFX9-NEXT: s_endpgm
2855 ; GFX10-LABEL: global_umin_saddr_i32_nortn:
2857 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2858 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2859 ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3]
2860 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2861 ; GFX10-NEXT: buffer_gl0_inv
2862 ; GFX10-NEXT: s_endpgm
2864 ; GFX11-LABEL: global_umin_saddr_i32_nortn:
2866 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2867 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2868 ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3]
2869 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2870 ; GFX11-NEXT: buffer_gl0_inv
2871 ; GFX11-NEXT: s_endpgm
2872 %zext.offset = zext i32 %voffset to i64
2873 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2874 %unused = atomicrmw umin ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst
2878 define amdgpu_ps void @global_umin_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
2879 ; GFX9-LABEL: global_umin_saddr_i32_nortn_neg128:
2881 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2882 ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128
2883 ; GFX9-NEXT: s_endpgm
2885 ; GFX10-LABEL: global_umin_saddr_i32_nortn_neg128:
2887 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2888 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2889 ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128
2890 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2891 ; GFX10-NEXT: buffer_gl0_inv
2892 ; GFX10-NEXT: s_endpgm
2894 ; GFX11-LABEL: global_umin_saddr_i32_nortn_neg128:
2896 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2897 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2898 ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3] offset:-128
2899 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2900 ; GFX11-NEXT: buffer_gl0_inv
2901 ; GFX11-NEXT: s_endpgm
2902 %zext.offset = zext i32 %voffset to i64
2903 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2904 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2905 %unused = atomicrmw umin ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst
2909 define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2910 ; GFX9-LABEL: global_umin_saddr_i64_rtn:
2912 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2913 ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc
2914 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2915 ; GFX9-NEXT: ; return to shader part epilog
2917 ; GFX10-LABEL: global_umin_saddr_i64_rtn:
2919 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2920 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2921 ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc
2922 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2923 ; GFX10-NEXT: buffer_gl0_inv
2924 ; GFX10-NEXT: ; return to shader part epilog
2926 ; GFX11-LABEL: global_umin_saddr_i64_rtn:
2928 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2929 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2930 ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] glc
2931 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2932 ; GFX11-NEXT: buffer_gl0_inv
2933 ; GFX11-NEXT: ; return to shader part epilog
2934 %zext.offset = zext i32 %voffset to i64
2935 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2936 %rtn = atomicrmw umin ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
2937 %cast.rtn = bitcast i64 %rtn to <2 x float>
2938 ret <2 x float> %cast.rtn
2941 define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2942 ; GFX9-LABEL: global_umin_saddr_i64_rtn_neg128:
2944 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2945 ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2946 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2947 ; GFX9-NEXT: ; return to shader part epilog
2949 ; GFX10-LABEL: global_umin_saddr_i64_rtn_neg128:
2951 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2952 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2953 ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2954 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2955 ; GFX10-NEXT: buffer_gl0_inv
2956 ; GFX10-NEXT: ; return to shader part epilog
2958 ; GFX11-LABEL: global_umin_saddr_i64_rtn_neg128:
2960 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2961 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2962 ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
2963 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2964 ; GFX11-NEXT: buffer_gl0_inv
2965 ; GFX11-NEXT: ; return to shader part epilog
2966 %zext.offset = zext i32 %voffset to i64
2967 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
2968 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
2969 %rtn = atomicrmw umin ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
2970 %cast.rtn = bitcast i64 %rtn to <2 x float>
2971 ret <2 x float> %cast.rtn
2974 define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
2975 ; GFX9-LABEL: global_umin_saddr_i64_nortn:
2977 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2978 ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3]
2979 ; GFX9-NEXT: s_endpgm
2981 ; GFX10-LABEL: global_umin_saddr_i64_nortn:
2983 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2984 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2985 ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3]
2986 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2987 ; GFX10-NEXT: buffer_gl0_inv
2988 ; GFX10-NEXT: s_endpgm
2990 ; GFX11-LABEL: global_umin_saddr_i64_nortn:
2992 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2993 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2994 ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3]
2995 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2996 ; GFX11-NEXT: buffer_gl0_inv
2997 ; GFX11-NEXT: s_endpgm
2998 %zext.offset = zext i32 %voffset to i64
2999 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3000 %unused = atomicrmw umin ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst
3004 define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3005 ; GFX9-LABEL: global_umin_saddr_i64_nortn_neg128:
3007 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3008 ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128
3009 ; GFX9-NEXT: s_endpgm
3011 ; GFX10-LABEL: global_umin_saddr_i64_nortn_neg128:
3013 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3014 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3015 ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128
3016 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3017 ; GFX10-NEXT: buffer_gl0_inv
3018 ; GFX10-NEXT: s_endpgm
3020 ; GFX11-LABEL: global_umin_saddr_i64_nortn_neg128:
3022 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3023 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3024 ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] offset:-128
3025 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3026 ; GFX11-NEXT: buffer_gl0_inv
3027 ; GFX11-NEXT: s_endpgm
3028 %zext.offset = zext i32 %voffset to i64
3029 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3030 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3031 %unused = atomicrmw umin ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst
3035 ; --------------------------------------------------------------------------------
3037 ; --------------------------------------------------------------------------------
3039 define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) {
3040 ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn:
3042 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
3043 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3044 ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc
3045 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3046 ; GFX9-NEXT: buffer_wbinvl1
3047 ; GFX9-NEXT: ; return to shader part epilog
3049 ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn:
3051 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
3052 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3053 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3054 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc
3055 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3056 ; GFX10-NEXT: buffer_gl0_inv
3057 ; GFX10-NEXT: buffer_gl1_inv
3058 ; GFX10-NEXT: ; return to shader part epilog
3060 ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn:
3062 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
3063 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3064 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3065 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] glc
3066 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3067 ; GFX11-NEXT: buffer_gl0_inv
3068 ; GFX11-NEXT: buffer_gl1_inv
3069 ; GFX11-NEXT: ; return to shader part epilog
3070 %zext.offset = zext i32 %voffset to i64
3071 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3072 %cmpxchg = cmpxchg ptr addrspace(1) %gep0, i32 %cmp, i32 %data seq_cst seq_cst
3073 %rtn = extractvalue { i32, i1 } %cmpxchg, 0
3074 %cast.rtn = bitcast i32 %rtn to float
3078 define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) {
3079 ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn_neg128:
3081 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
3082 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3083 ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc
3084 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3085 ; GFX9-NEXT: buffer_wbinvl1
3086 ; GFX9-NEXT: ; return to shader part epilog
3088 ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn_neg128:
3090 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
3091 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3092 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3093 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc
3094 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3095 ; GFX10-NEXT: buffer_gl0_inv
3096 ; GFX10-NEXT: buffer_gl1_inv
3097 ; GFX10-NEXT: ; return to shader part epilog
3099 ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn_neg128:
3101 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
3102 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3103 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3104 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 glc
3105 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3106 ; GFX11-NEXT: buffer_gl0_inv
3107 ; GFX11-NEXT: buffer_gl1_inv
3108 ; GFX11-NEXT: ; return to shader part epilog
3109 %zext.offset = zext i32 %voffset to i64
3110 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3111 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3112 %cmpxchg = cmpxchg ptr addrspace(1) %gep1, i32 %cmp, i32 %data seq_cst seq_cst
3113 %rtn = extractvalue { i32, i1 } %cmpxchg, 0
3114 %cast.rtn = bitcast i32 %rtn to float
3118 define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) {
3119 ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn:
3121 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
3122 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3123 ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3]
3124 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3125 ; GFX9-NEXT: buffer_wbinvl1
3126 ; GFX9-NEXT: s_endpgm
3128 ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn:
3130 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
3131 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3132 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3133 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3]
3134 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3135 ; GFX10-NEXT: buffer_gl0_inv
3136 ; GFX10-NEXT: buffer_gl1_inv
3137 ; GFX10-NEXT: s_endpgm
3139 ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn:
3141 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
3142 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3143 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3144 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3]
3145 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3146 ; GFX11-NEXT: buffer_gl0_inv
3147 ; GFX11-NEXT: buffer_gl1_inv
3148 ; GFX11-NEXT: s_endpgm
3149 %zext.offset = zext i32 %voffset to i64
3150 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3151 %unused = cmpxchg ptr addrspace(1) %gep0, i32 %cmp, i32 %data seq_cst seq_cst
3155 define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) {
3156 ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn_neg128:
3158 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
3159 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3160 ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128
3161 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3162 ; GFX9-NEXT: buffer_wbinvl1
3163 ; GFX9-NEXT: s_endpgm
3165 ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn_neg128:
3167 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
3168 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3169 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3170 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128
3171 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3172 ; GFX10-NEXT: buffer_gl0_inv
3173 ; GFX10-NEXT: buffer_gl1_inv
3174 ; GFX10-NEXT: s_endpgm
3176 ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn_neg128:
3178 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
3179 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3180 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3181 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] offset:-128
3182 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3183 ; GFX11-NEXT: buffer_gl0_inv
3184 ; GFX11-NEXT: buffer_gl1_inv
3185 ; GFX11-NEXT: s_endpgm
3186 %zext.offset = zext i32 %voffset to i64
3187 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3188 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3189 %unused = cmpxchg ptr addrspace(1) %gep1, i32 %cmp, i32 %data seq_cst seq_cst
3193 define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) {
3194 ; GFX9-LABEL: global_cmpxchg_saddr_i64_rtn:
3196 ; GFX9-NEXT: v_mov_b32_e32 v6, v2
3197 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
3198 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3199 ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc
3200 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3201 ; GFX9-NEXT: buffer_wbinvl1
3202 ; GFX9-NEXT: ; return to shader part epilog
3204 ; GFX10-LABEL: global_cmpxchg_saddr_i64_rtn:
3206 ; GFX10-NEXT: v_mov_b32_e32 v6, v2
3207 ; GFX10-NEXT: v_mov_b32_e32 v5, v1
3208 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3209 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3210 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc
3211 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3212 ; GFX10-NEXT: buffer_gl0_inv
3213 ; GFX10-NEXT: buffer_gl1_inv
3214 ; GFX10-NEXT: ; return to shader part epilog
3216 ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn:
3218 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
3219 ; GFX11-NEXT: v_mov_b32_e32 v5, v1
3220 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3221 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3222 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] glc
3223 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3224 ; GFX11-NEXT: buffer_gl0_inv
3225 ; GFX11-NEXT: buffer_gl1_inv
3226 ; GFX11-NEXT: ; return to shader part epilog
3227 %zext.offset = zext i32 %voffset to i64
3228 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3229 %cmpxchg = cmpxchg ptr addrspace(1) %gep0, i64 %cmp, i64 %data seq_cst seq_cst
3230 %rtn = extractvalue { i64, i1 } %cmpxchg, 0
3231 %cast.rtn = bitcast i64 %rtn to <2 x float>
3232 ret <2 x float> %cast.rtn
3235 define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) {
3236 ; GFX9-LABEL: global_cmpxchg_saddr_i64_rtn_neg128:
3238 ; GFX9-NEXT: v_mov_b32_e32 v6, v2
3239 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
3240 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3241 ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc
3242 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3243 ; GFX9-NEXT: buffer_wbinvl1
3244 ; GFX9-NEXT: ; return to shader part epilog
3246 ; GFX10-LABEL: global_cmpxchg_saddr_i64_rtn_neg128:
3248 ; GFX10-NEXT: v_mov_b32_e32 v6, v2
3249 ; GFX10-NEXT: v_mov_b32_e32 v5, v1
3250 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3251 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3252 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc
3253 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3254 ; GFX10-NEXT: buffer_gl0_inv
3255 ; GFX10-NEXT: buffer_gl1_inv
3256 ; GFX10-NEXT: ; return to shader part epilog
3258 ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn_neg128:
3260 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
3261 ; GFX11-NEXT: v_mov_b32_e32 v5, v1
3262 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3263 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3264 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc
3265 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3266 ; GFX11-NEXT: buffer_gl0_inv
3267 ; GFX11-NEXT: buffer_gl1_inv
3268 ; GFX11-NEXT: ; return to shader part epilog
3269 %zext.offset = zext i32 %voffset to i64
3270 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3271 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3272 %cmpxchg = cmpxchg ptr addrspace(1) %gep1, i64 %cmp, i64 %data seq_cst seq_cst
3273 %rtn = extractvalue { i64, i1 } %cmpxchg, 0
3274 %cast.rtn = bitcast i64 %rtn to <2 x float>
3275 ret <2 x float> %cast.rtn
3278 define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) {
3279 ; GFX9-LABEL: global_cmpxchg_saddr_i64_nortn:
3281 ; GFX9-NEXT: v_mov_b32_e32 v6, v2
3282 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
3283 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3284 ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3]
3285 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3286 ; GFX9-NEXT: buffer_wbinvl1
3287 ; GFX9-NEXT: s_endpgm
3289 ; GFX10-LABEL: global_cmpxchg_saddr_i64_nortn:
3291 ; GFX10-NEXT: v_mov_b32_e32 v6, v2
3292 ; GFX10-NEXT: v_mov_b32_e32 v5, v1
3293 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3294 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3295 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3]
3296 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3297 ; GFX10-NEXT: buffer_gl0_inv
3298 ; GFX10-NEXT: buffer_gl1_inv
3299 ; GFX10-NEXT: s_endpgm
3301 ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn:
3303 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
3304 ; GFX11-NEXT: v_mov_b32_e32 v5, v1
3305 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3306 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3307 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3]
3308 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3309 ; GFX11-NEXT: buffer_gl0_inv
3310 ; GFX11-NEXT: buffer_gl1_inv
3311 ; GFX11-NEXT: s_endpgm
3312 %zext.offset = zext i32 %voffset to i64
3313 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3314 %unused = cmpxchg ptr addrspace(1) %gep0, i64 %cmp, i64 %data seq_cst seq_cst
3318 define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) {
3319 ; GFX9-LABEL: global_cmpxchg_saddr_i64_nortn_neg128:
3321 ; GFX9-NEXT: v_mov_b32_e32 v6, v2
3322 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
3323 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3324 ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128
3325 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3326 ; GFX9-NEXT: buffer_wbinvl1
3327 ; GFX9-NEXT: s_endpgm
3329 ; GFX10-LABEL: global_cmpxchg_saddr_i64_nortn_neg128:
3331 ; GFX10-NEXT: v_mov_b32_e32 v6, v2
3332 ; GFX10-NEXT: v_mov_b32_e32 v5, v1
3333 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3334 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3335 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128
3336 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3337 ; GFX10-NEXT: buffer_gl0_inv
3338 ; GFX10-NEXT: buffer_gl1_inv
3339 ; GFX10-NEXT: s_endpgm
3341 ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn_neg128:
3343 ; GFX11-NEXT: v_mov_b32_e32 v6, v2
3344 ; GFX11-NEXT: v_mov_b32_e32 v5, v1
3345 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3346 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3347 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] offset:-128
3348 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
3349 ; GFX11-NEXT: buffer_gl0_inv
3350 ; GFX11-NEXT: buffer_gl1_inv
3351 ; GFX11-NEXT: s_endpgm
3352 %zext.offset = zext i32 %voffset to i64
3353 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3354 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3355 %unused = cmpxchg ptr addrspace(1) %gep1, i64 %cmp, i64 %data seq_cst seq_cst
3359 ; --------------------------------------------------------------------------------
3361 ; --------------------------------------------------------------------------------
3363 define amdgpu_ps float @global_inc_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3364 ; GCN-LABEL: global_inc_saddr_i32_rtn:
3366 ; GCN-NEXT: global_atomic_inc v0, v0, v1, s[2:3] glc
3367 ; GCN-NEXT: s_waitcnt vmcnt(0)
3368 ; GCN-NEXT: ; return to shader part epilog
3370 ; GFX11-LABEL: global_inc_saddr_i32_rtn:
3372 ; GFX11-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] glc
3373 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3374 ; GFX11-NEXT: ; return to shader part epilog
3375 %zext.offset = zext i32 %voffset to i64
3376 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3377 %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
3378 %cast.rtn = bitcast i32 %rtn to float
3382 define amdgpu_ps float @global_inc_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3383 ; GCN-LABEL: global_inc_saddr_i32_rtn_neg128:
3385 ; GCN-NEXT: global_atomic_inc v0, v0, v1, s[2:3] offset:-128 glc
3386 ; GCN-NEXT: s_waitcnt vmcnt(0)
3387 ; GCN-NEXT: ; return to shader part epilog
3389 ; GFX11-LABEL: global_inc_saddr_i32_rtn_neg128:
3391 ; GFX11-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] offset:-128 glc
3392 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3393 ; GFX11-NEXT: ; return to shader part epilog
3394 %zext.offset = zext i32 %voffset to i64
3395 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3396 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3397 %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
3398 %cast.rtn = bitcast i32 %rtn to float
3402 define amdgpu_ps void @global_inc_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3403 ; GCN-LABEL: global_inc_saddr_i32_nortn:
3405 ; GCN-NEXT: global_atomic_inc v0, v1, s[2:3]
3406 ; GCN-NEXT: s_endpgm
3408 ; GFX11-LABEL: global_inc_saddr_i32_nortn:
3410 ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3]
3411 ; GFX11-NEXT: s_nop 0
3412 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3413 ; GFX11-NEXT: s_endpgm
3414 %zext.offset = zext i32 %voffset to i64
3415 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3416 %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
3420 define amdgpu_ps void @global_inc_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3421 ; GCN-LABEL: global_inc_saddr_i32_nortn_neg128:
3423 ; GCN-NEXT: global_atomic_inc v0, v1, s[2:3] offset:-128
3424 ; GCN-NEXT: s_endpgm
3426 ; GFX11-LABEL: global_inc_saddr_i32_nortn_neg128:
3428 ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] offset:-128
3429 ; GFX11-NEXT: s_nop 0
3430 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3431 ; GFX11-NEXT: s_endpgm
3432 %zext.offset = zext i32 %voffset to i64
3433 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3434 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3435 %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
3439 define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3440 ; GCN-LABEL: global_inc_saddr_i64_rtn:
3442 ; GCN-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[2:3] glc
3443 ; GCN-NEXT: s_waitcnt vmcnt(0)
3444 ; GCN-NEXT: ; return to shader part epilog
3446 ; GFX11-LABEL: global_inc_saddr_i64_rtn:
3448 ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] glc
3449 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3450 ; GFX11-NEXT: ; return to shader part epilog
3451 %zext.offset = zext i32 %voffset to i64
3452 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3453 %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
3454 %cast.rtn = bitcast i64 %rtn to <2 x float>
3455 ret <2 x float> %cast.rtn
3458 define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3459 ; GCN-LABEL: global_inc_saddr_i64_rtn_neg128:
3461 ; GCN-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
3462 ; GCN-NEXT: s_waitcnt vmcnt(0)
3463 ; GCN-NEXT: ; return to shader part epilog
3465 ; GFX11-LABEL: global_inc_saddr_i64_rtn_neg128:
3467 ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
3468 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3469 ; GFX11-NEXT: ; return to shader part epilog
3470 %zext.offset = zext i32 %voffset to i64
3471 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3472 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3473 %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
3474 %cast.rtn = bitcast i64 %rtn to <2 x float>
3475 ret <2 x float> %cast.rtn
3478 define amdgpu_ps void @global_inc_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3479 ; GCN-LABEL: global_inc_saddr_i64_nortn:
3481 ; GCN-NEXT: global_atomic_inc_x2 v0, v[1:2], s[2:3]
3482 ; GCN-NEXT: s_endpgm
3484 ; GFX11-LABEL: global_inc_saddr_i64_nortn:
3486 ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3]
3487 ; GFX11-NEXT: s_nop 0
3488 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3489 ; GFX11-NEXT: s_endpgm
3490 %zext.offset = zext i32 %voffset to i64
3491 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3492 %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
3496 define amdgpu_ps void @global_inc_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3497 ; GCN-LABEL: global_inc_saddr_i64_nortn_neg128:
3499 ; GCN-NEXT: global_atomic_inc_x2 v0, v[1:2], s[2:3] offset:-128
3500 ; GCN-NEXT: s_endpgm
3502 ; GFX11-LABEL: global_inc_saddr_i64_nortn_neg128:
3504 ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] offset:-128
3505 ; GFX11-NEXT: s_nop 0
3506 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3507 ; GFX11-NEXT: s_endpgm
3508 %zext.offset = zext i32 %voffset to i64
3509 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3510 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3511 %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
3515 ; --------------------------------------------------------------------------------
3517 ; --------------------------------------------------------------------------------
3520 define amdgpu_ps float @global_dec_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3521 ; GCN-LABEL: global_dec_saddr_i32_rtn:
3523 ; GCN-NEXT: global_atomic_dec v0, v0, v1, s[2:3] glc
3524 ; GCN-NEXT: s_waitcnt vmcnt(0)
3525 ; GCN-NEXT: ; return to shader part epilog
3527 ; GFX11-LABEL: global_dec_saddr_i32_rtn:
3529 ; GFX11-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] glc
3530 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3531 ; GFX11-NEXT: ; return to shader part epilog
3532 %zext.offset = zext i32 %voffset to i64
3533 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3534 %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
3535 %cast.rtn = bitcast i32 %rtn to float
3539 define amdgpu_ps float @global_dec_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3540 ; GCN-LABEL: global_dec_saddr_i32_rtn_neg128:
3542 ; GCN-NEXT: global_atomic_dec v0, v0, v1, s[2:3] offset:-128 glc
3543 ; GCN-NEXT: s_waitcnt vmcnt(0)
3544 ; GCN-NEXT: ; return to shader part epilog
3546 ; GFX11-LABEL: global_dec_saddr_i32_rtn_neg128:
3548 ; GFX11-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] offset:-128 glc
3549 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3550 ; GFX11-NEXT: ; return to shader part epilog
3551 %zext.offset = zext i32 %voffset to i64
3552 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3553 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3554 %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
3555 %cast.rtn = bitcast i32 %rtn to float
3559 define amdgpu_ps void @global_dec_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3560 ; GCN-LABEL: global_dec_saddr_i32_nortn:
3562 ; GCN-NEXT: global_atomic_dec v0, v1, s[2:3]
3563 ; GCN-NEXT: s_endpgm
3565 ; GFX11-LABEL: global_dec_saddr_i32_nortn:
3567 ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3]
3568 ; GFX11-NEXT: s_nop 0
3569 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3570 ; GFX11-NEXT: s_endpgm
3571 %zext.offset = zext i32 %voffset to i64
3572 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3573 %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
3577 define amdgpu_ps void @global_dec_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) {
3578 ; GCN-LABEL: global_dec_saddr_i32_nortn_neg128:
3580 ; GCN-NEXT: global_atomic_dec v0, v1, s[2:3] offset:-128
3581 ; GCN-NEXT: s_endpgm
3583 ; GFX11-LABEL: global_dec_saddr_i32_nortn_neg128:
3585 ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] offset:-128
3586 ; GFX11-NEXT: s_nop 0
3587 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3588 ; GFX11-NEXT: s_endpgm
3589 %zext.offset = zext i32 %voffset to i64
3590 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3591 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3592 %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
3596 define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3597 ; GCN-LABEL: global_dec_saddr_i64_rtn:
3599 ; GCN-NEXT: global_atomic_dec_x2 v[0:1], v0, v[1:2], s[2:3] glc
3600 ; GCN-NEXT: s_waitcnt vmcnt(0)
3601 ; GCN-NEXT: ; return to shader part epilog
3603 ; GFX11-LABEL: global_dec_saddr_i64_rtn:
3605 ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] glc
3606 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3607 ; GFX11-NEXT: ; return to shader part epilog
3608 %zext.offset = zext i32 %voffset to i64
3609 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3610 %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
3611 %cast.rtn = bitcast i64 %rtn to <2 x float>
3612 ret <2 x float> %cast.rtn
3615 define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3616 ; GCN-LABEL: global_dec_saddr_i64_rtn_neg128:
3618 ; GCN-NEXT: global_atomic_dec_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
3619 ; GCN-NEXT: s_waitcnt vmcnt(0)
3620 ; GCN-NEXT: ; return to shader part epilog
3622 ; GFX11-LABEL: global_dec_saddr_i64_rtn_neg128:
3624 ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc
3625 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3626 ; GFX11-NEXT: ; return to shader part epilog
3627 %zext.offset = zext i32 %voffset to i64
3628 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3629 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3630 %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
3631 %cast.rtn = bitcast i64 %rtn to <2 x float>
3632 ret <2 x float> %cast.rtn
3635 define amdgpu_ps void @global_dec_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3636 ; GCN-LABEL: global_dec_saddr_i64_nortn:
3638 ; GCN-NEXT: global_atomic_dec_x2 v0, v[1:2], s[2:3]
3639 ; GCN-NEXT: s_endpgm
3641 ; GFX11-LABEL: global_dec_saddr_i64_nortn:
3643 ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3]
3644 ; GFX11-NEXT: s_nop 0
3645 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3646 ; GFX11-NEXT: s_endpgm
3647 %zext.offset = zext i32 %voffset to i64
3648 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3649 %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
3653 define amdgpu_ps void @global_dec_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) {
3654 ; GCN-LABEL: global_dec_saddr_i64_nortn_neg128:
3656 ; GCN-NEXT: global_atomic_dec_x2 v0, v[1:2], s[2:3] offset:-128
3657 ; GCN-NEXT: s_endpgm
3659 ; GFX11-LABEL: global_dec_saddr_i64_nortn_neg128:
3661 ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] offset:-128
3662 ; GFX11-NEXT: s_nop 0
3663 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3664 ; GFX11-NEXT: s_endpgm
3665 %zext.offset = zext i32 %voffset to i64
3666 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
3667 %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
3668 %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
3672 attributes #0 = { argmemonly nounwind willreturn }