1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
5 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
6 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
7 declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
8 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
9 declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
10 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
11 declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
12 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
13 declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
14 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
15 declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
16 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
17 declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
18 declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
19 declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
20 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
21 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
22 declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
23 declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
24 declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
26 define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
27 ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
28 ; GFX90A: ; %bb.0: ; %main_body
29 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
30 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
31 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
32 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
33 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
34 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
35 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
36 ; GFX90A-NEXT: s_endpgm
38 ; GFX940-LABEL: raw_buffer_atomic_add_noret_f64:
39 ; GFX940: ; %bb.0: ; %main_body
40 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
41 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
42 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
43 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
45 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
46 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
47 ; GFX940-NEXT: s_endpgm
49 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
53 define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
54 ; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64:
55 ; GFX90A: ; %bb.0: ; %main_body
56 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
57 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
58 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
59 ; GFX90A-NEXT: s_endpgm
61 ; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64:
62 ; GFX940: ; %bb.0: ; %main_body
63 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
64 ; GFX940-NEXT: s_waitcnt vmcnt(0)
65 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
66 ; GFX940-NEXT: s_endpgm
68 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
69 store double %ret, ptr undef
73 define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
74 ; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
75 ; GFX90A: ; %bb.0: ; %main_body
76 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
77 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
78 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
79 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
80 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
81 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
82 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
83 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc
84 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
85 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
86 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
87 ; GFX90A-NEXT: s_endpgm
89 ; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
90 ; GFX940: ; %bb.0: ; %main_body
91 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
92 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
93 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
94 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
95 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
96 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
97 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
98 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
99 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
100 ; GFX940-NEXT: s_waitcnt vmcnt(0)
101 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
102 ; GFX940-NEXT: s_endpgm
104 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
105 store double %ret, ptr addrspace(1) %out, align 8
109 define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
110 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
111 ; GFX90A: ; %bb.0: ; %main_body
112 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
113 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
114 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
115 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
116 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
117 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
118 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
119 ; GFX90A-NEXT: s_endpgm
121 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
122 ; GFX940: ; %bb.0: ; %main_body
123 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
124 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
125 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
126 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
127 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
128 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
129 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
130 ; GFX940-NEXT: s_endpgm
132 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
136 define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
137 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
138 ; GFX90A: ; %bb.0: ; %main_body
139 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
140 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
141 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
142 ; GFX90A-NEXT: s_endpgm
144 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
145 ; GFX940: ; %bb.0: ; %main_body
146 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
147 ; GFX940-NEXT: s_waitcnt vmcnt(0)
148 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
149 ; GFX940-NEXT: s_endpgm
151 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
152 store double %ret, ptr undef
156 define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
157 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
158 ; GFX90A: ; %bb.0: ; %main_body
159 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
160 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
161 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
162 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
163 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
164 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
165 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
166 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc
167 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
168 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
169 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
170 ; GFX90A-NEXT: s_endpgm
172 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
173 ; GFX940: ; %bb.0: ; %main_body
174 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
175 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
176 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
177 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
178 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
179 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
180 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
181 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
182 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
183 ; GFX940-NEXT: s_waitcnt vmcnt(0)
184 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
185 ; GFX940-NEXT: s_endpgm
187 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
188 store double %ret, ptr addrspace(1) %out, align 8
192 define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
193 ; GFX90A-LABEL: struct_buffer_atomic_add_noret_f64:
194 ; GFX90A: ; %bb.0: ; %main_body
195 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
196 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
197 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
198 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
199 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
200 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
201 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
202 ; GFX90A-NEXT: s_endpgm
204 ; GFX940-LABEL: struct_buffer_atomic_add_noret_f64:
205 ; GFX940: ; %bb.0: ; %main_body
206 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
207 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
208 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
209 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
210 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
211 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
212 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
213 ; GFX940-NEXT: s_endpgm
215 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
219 define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
220 ; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64:
221 ; GFX90A: ; %bb.0: ; %main_body
222 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc
223 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
224 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
225 ; GFX90A-NEXT: s_endpgm
227 ; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64:
228 ; GFX940: ; %bb.0: ; %main_body
229 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
230 ; GFX940-NEXT: s_waitcnt vmcnt(0)
231 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
232 ; GFX940-NEXT: s_endpgm
234 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
235 store double %ret, ptr undef
239 define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
240 ; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
241 ; GFX90A: ; %bb.0: ; %main_body
242 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
243 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
244 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
245 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
246 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
247 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
248 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
249 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
250 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
251 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
252 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
253 ; GFX90A-NEXT: s_endpgm
255 ; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
256 ; GFX940: ; %bb.0: ; %main_body
257 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
258 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
259 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
260 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
261 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
262 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
263 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
264 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
265 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
266 ; GFX940-NEXT: s_waitcnt vmcnt(0)
267 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
268 ; GFX940-NEXT: s_endpgm
270 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
271 store double %ret, ptr addrspace(1) %out, align 8
275 define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
276 ; GFX90A-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
277 ; GFX90A: ; %bb.0: ; %main_body
278 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
279 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
280 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
281 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
282 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
283 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
284 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
285 ; GFX90A-NEXT: s_endpgm
287 ; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
288 ; GFX940: ; %bb.0: ; %main_body
289 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
290 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
291 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
292 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
293 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
294 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
295 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
296 ; GFX940-NEXT: s_endpgm
298 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
302 define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
303 ; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
304 ; GFX90A: ; %bb.0: ; %main_body
305 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc
306 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
307 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
308 ; GFX90A-NEXT: s_endpgm
310 ; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
311 ; GFX940: ; %bb.0: ; %main_body
312 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
313 ; GFX940-NEXT: s_waitcnt vmcnt(0)
314 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
315 ; GFX940-NEXT: s_endpgm
317 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
318 store double %ret, ptr undef
322 define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
323 ; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
324 ; GFX90A: ; %bb.0: ; %main_body
325 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
326 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
327 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
328 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
329 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
330 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
331 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
332 ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
333 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
334 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
335 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
336 ; GFX90A-NEXT: s_endpgm
338 ; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
339 ; GFX940: ; %bb.0: ; %main_body
340 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
341 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
342 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
343 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
344 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
345 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
346 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
347 ; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
348 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
349 ; GFX940-NEXT: s_waitcnt vmcnt(0)
350 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
351 ; GFX940-NEXT: s_endpgm
353 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
354 store double %ret, ptr addrspace(1) %out, align 8
358 define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
359 ; GFX90A-LABEL: raw_buffer_atomic_min_noret_f64:
360 ; GFX90A: ; %bb.0: ; %main_body
361 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
362 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
363 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
364 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
365 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
366 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
367 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
368 ; GFX90A-NEXT: s_endpgm
370 ; GFX940-LABEL: raw_buffer_atomic_min_noret_f64:
371 ; GFX940: ; %bb.0: ; %main_body
372 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
373 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
374 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
375 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
376 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
377 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
378 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
379 ; GFX940-NEXT: s_endpgm
381 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
385 define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
386 ; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64:
387 ; GFX90A: ; %bb.0: ; %main_body
388 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
389 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
390 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
391 ; GFX90A-NEXT: s_endpgm
393 ; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64:
394 ; GFX940: ; %bb.0: ; %main_body
395 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
396 ; GFX940-NEXT: s_waitcnt vmcnt(0)
397 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
398 ; GFX940-NEXT: s_endpgm
400 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
401 store double %ret, ptr undef
405 define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
406 ; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
407 ; GFX90A: ; %bb.0: ; %main_body
408 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
409 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
410 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
411 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
412 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
413 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
414 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
415 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen glc slc
416 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
417 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
418 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
419 ; GFX90A-NEXT: s_endpgm
421 ; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
422 ; GFX940: ; %bb.0: ; %main_body
423 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
424 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
425 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
426 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
427 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
428 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
429 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
430 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
431 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
432 ; GFX940-NEXT: s_waitcnt vmcnt(0)
433 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
434 ; GFX940-NEXT: s_endpgm
436 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
437 store double %ret, ptr addrspace(1) %out, align 8
441 define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
442 ; GFX90A-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
443 ; GFX90A: ; %bb.0: ; %main_body
444 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
445 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
446 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
447 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
448 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
449 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
450 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
451 ; GFX90A-NEXT: s_endpgm
453 ; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
454 ; GFX940: ; %bb.0: ; %main_body
455 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
456 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
457 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
458 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
459 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
460 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
461 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
462 ; GFX940-NEXT: s_endpgm
464 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
468 define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
469 ; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
470 ; GFX90A: ; %bb.0: ; %main_body
471 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
472 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
473 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
474 ; GFX90A-NEXT: s_endpgm
476 ; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
477 ; GFX940: ; %bb.0: ; %main_body
478 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
479 ; GFX940-NEXT: s_waitcnt vmcnt(0)
480 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
481 ; GFX940-NEXT: s_endpgm
483 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
484 store double %ret, ptr undef
488 define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
489 ; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
490 ; GFX90A: ; %bb.0: ; %main_body
491 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
492 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
493 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
494 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
495 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
496 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
497 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
498 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen glc slc
499 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
500 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
501 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
502 ; GFX90A-NEXT: s_endpgm
504 ; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
505 ; GFX940: ; %bb.0: ; %main_body
506 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
507 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
508 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
509 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
510 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
511 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
512 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
513 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
514 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
515 ; GFX940-NEXT: s_waitcnt vmcnt(0)
516 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
517 ; GFX940-NEXT: s_endpgm
519 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
520 store double %ret, ptr addrspace(1) %out, align 8
524 define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
525 ; GFX90A-LABEL: struct_buffer_atomic_min_noret_f64:
526 ; GFX90A: ; %bb.0: ; %main_body
527 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
528 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
529 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
530 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
531 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
532 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
533 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
534 ; GFX90A-NEXT: s_endpgm
536 ; GFX940-LABEL: struct_buffer_atomic_min_noret_f64:
537 ; GFX940: ; %bb.0: ; %main_body
538 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
539 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
540 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
541 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
542 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
543 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
544 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
545 ; GFX940-NEXT: s_endpgm
547 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
551 define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
552 ; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64:
553 ; GFX90A: ; %bb.0: ; %main_body
554 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc
555 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
556 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
557 ; GFX90A-NEXT: s_endpgm
559 ; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64:
560 ; GFX940: ; %bb.0: ; %main_body
561 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
562 ; GFX940-NEXT: s_waitcnt vmcnt(0)
563 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
564 ; GFX940-NEXT: s_endpgm
566 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
567 store double %ret, ptr undef
571 define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
572 ; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
573 ; GFX90A: ; %bb.0: ; %main_body
574 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
575 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
576 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
577 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
578 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
579 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
580 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
581 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
582 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
583 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
584 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
585 ; GFX90A-NEXT: s_endpgm
587 ; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
588 ; GFX940: ; %bb.0: ; %main_body
589 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
590 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
591 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
592 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
593 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
594 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
595 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
596 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
597 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
598 ; GFX940-NEXT: s_waitcnt vmcnt(0)
599 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
600 ; GFX940-NEXT: s_endpgm
602 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
603 store double %ret, ptr addrspace(1) %out, align 8
607 define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
608 ; GFX90A-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
609 ; GFX90A: ; %bb.0: ; %main_body
610 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
611 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
612 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
613 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
614 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
615 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
616 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
617 ; GFX90A-NEXT: s_endpgm
619 ; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
620 ; GFX940: ; %bb.0: ; %main_body
621 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
622 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
623 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
624 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
625 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
626 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
627 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
628 ; GFX940-NEXT: s_endpgm
630 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
634 define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
635 ; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
636 ; GFX90A: ; %bb.0: ; %main_body
637 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc
638 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
639 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
640 ; GFX90A-NEXT: s_endpgm
642 ; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
643 ; GFX940: ; %bb.0: ; %main_body
644 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
645 ; GFX940-NEXT: s_waitcnt vmcnt(0)
646 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
647 ; GFX940-NEXT: s_endpgm
649 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
650 store double %ret, ptr undef
654 define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
655 ; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
656 ; GFX90A: ; %bb.0: ; %main_body
657 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
658 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
659 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
660 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
661 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
662 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
663 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
664 ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
665 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
666 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
667 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
668 ; GFX90A-NEXT: s_endpgm
670 ; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
671 ; GFX940: ; %bb.0: ; %main_body
672 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
673 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
674 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
675 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
676 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
677 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
678 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
679 ; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
680 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
681 ; GFX940-NEXT: s_waitcnt vmcnt(0)
682 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
683 ; GFX940-NEXT: s_endpgm
685 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
686 store double %ret, ptr addrspace(1) %out, align 8
690 define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
691 ; GFX90A-LABEL: raw_buffer_atomic_max_noret_f64:
692 ; GFX90A: ; %bb.0: ; %main_body
693 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
694 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
695 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
696 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
697 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
698 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
699 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
700 ; GFX90A-NEXT: s_endpgm
702 ; GFX940-LABEL: raw_buffer_atomic_max_noret_f64:
703 ; GFX940: ; %bb.0: ; %main_body
704 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
705 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
706 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
707 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
708 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
709 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
710 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
711 ; GFX940-NEXT: s_endpgm
713 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
717 define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
718 ; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64:
719 ; GFX90A: ; %bb.0: ; %main_body
720 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
721 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
722 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
723 ; GFX90A-NEXT: s_endpgm
725 ; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64:
726 ; GFX940: ; %bb.0: ; %main_body
727 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
728 ; GFX940-NEXT: s_waitcnt vmcnt(0)
729 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
730 ; GFX940-NEXT: s_endpgm
732 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
733 store double %ret, ptr undef
737 define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
738 ; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
739 ; GFX90A: ; %bb.0: ; %main_body
740 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
741 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
742 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
743 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
744 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
745 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
746 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
747 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen glc slc
748 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
749 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
750 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
751 ; GFX90A-NEXT: s_endpgm
753 ; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
754 ; GFX940: ; %bb.0: ; %main_body
755 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
756 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
757 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
758 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
759 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
760 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
761 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
762 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
763 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
764 ; GFX940-NEXT: s_waitcnt vmcnt(0)
765 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
766 ; GFX940-NEXT: s_endpgm
768 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
769 store double %ret, ptr addrspace(1) %out, align 8
773 define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
774 ; GFX90A-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
775 ; GFX90A: ; %bb.0: ; %main_body
776 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
777 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
778 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
779 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
780 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
781 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
782 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
783 ; GFX90A-NEXT: s_endpgm
785 ; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
786 ; GFX940: ; %bb.0: ; %main_body
787 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
788 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
789 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
790 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
791 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
792 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
793 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
794 ; GFX940-NEXT: s_endpgm
796 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
800 define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
801 ; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
802 ; GFX90A: ; %bb.0: ; %main_body
803 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
804 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
805 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
806 ; GFX90A-NEXT: s_endpgm
808 ; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
809 ; GFX940: ; %bb.0: ; %main_body
810 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
811 ; GFX940-NEXT: s_waitcnt vmcnt(0)
812 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
813 ; GFX940-NEXT: s_endpgm
815 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
816 store double %ret, ptr undef
820 define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
821 ; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
822 ; GFX90A: ; %bb.0: ; %main_body
823 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
824 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
825 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
826 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
827 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
828 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
829 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
830 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen glc slc
831 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
832 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
833 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
834 ; GFX90A-NEXT: s_endpgm
836 ; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
837 ; GFX940: ; %bb.0: ; %main_body
838 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
839 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
840 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
841 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
842 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
843 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
844 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
845 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
846 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
847 ; GFX940-NEXT: s_waitcnt vmcnt(0)
848 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
849 ; GFX940-NEXT: s_endpgm
851 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
852 store double %ret, ptr addrspace(1) %out, align 8
856 define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
857 ; GFX90A-LABEL: struct_buffer_atomic_max_noret_f64:
858 ; GFX90A: ; %bb.0: ; %main_body
859 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
860 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
861 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
862 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
863 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
864 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
865 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
866 ; GFX90A-NEXT: s_endpgm
868 ; GFX940-LABEL: struct_buffer_atomic_max_noret_f64:
869 ; GFX940: ; %bb.0: ; %main_body
870 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
871 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
872 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
873 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
874 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
875 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
876 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
877 ; GFX940-NEXT: s_endpgm
879 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
883 define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
884 ; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64:
885 ; GFX90A: ; %bb.0: ; %main_body
886 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc
887 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
888 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
889 ; GFX90A-NEXT: s_endpgm
891 ; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64:
892 ; GFX940: ; %bb.0: ; %main_body
893 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
894 ; GFX940-NEXT: s_waitcnt vmcnt(0)
895 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
896 ; GFX940-NEXT: s_endpgm
898 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
899 store double %ret, ptr undef
903 define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
904 ; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
905 ; GFX90A: ; %bb.0: ; %main_body
906 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
907 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
908 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
909 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
910 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
911 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
912 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
913 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
914 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
915 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
916 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
917 ; GFX90A-NEXT: s_endpgm
919 ; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
920 ; GFX940: ; %bb.0: ; %main_body
921 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
922 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
923 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
924 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
925 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
926 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
927 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
928 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
929 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
930 ; GFX940-NEXT: s_waitcnt vmcnt(0)
931 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
932 ; GFX940-NEXT: s_endpgm
934 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
935 store double %ret, ptr addrspace(1) %out, align 8
939 define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
940 ; GFX90A-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
941 ; GFX90A: ; %bb.0: ; %main_body
942 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
943 ; GFX90A-NEXT: s_load_dword s8, s[2:3], 0x3c
944 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
945 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
946 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
947 ; GFX90A-NEXT: v_mov_b32_e32 v2, s8
948 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
949 ; GFX90A-NEXT: s_endpgm
951 ; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
952 ; GFX940: ; %bb.0: ; %main_body
953 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
954 ; GFX940-NEXT: s_load_dword s8, s[2:3], 0x3c
955 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
956 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
957 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
958 ; GFX940-NEXT: v_mov_b32_e32 v2, s8
959 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
960 ; GFX940-NEXT: s_endpgm
962 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
966 define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
967 ; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
968 ; GFX90A: ; %bb.0: ; %main_body
969 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc
970 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
971 ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
972 ; GFX90A-NEXT: s_endpgm
974 ; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
975 ; GFX940: ; %bb.0: ; %main_body
976 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
977 ; GFX940-NEXT: s_waitcnt vmcnt(0)
978 ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
979 ; GFX940-NEXT: s_endpgm
981 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
982 store double %ret, ptr undef
986 define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
987 ; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
988 ; GFX90A: ; %bb.0: ; %main_body
989 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
990 ; GFX90A-NEXT: s_load_dword s10, s[2:3], 0x3c
991 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
992 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
993 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
994 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
995 ; GFX90A-NEXT: v_mov_b32_e32 v2, s10
996 ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
997 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
998 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
999 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
1000 ; GFX90A-NEXT: s_endpgm
1002 ; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
1003 ; GFX940: ; %bb.0: ; %main_body
1004 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
1005 ; GFX940-NEXT: s_load_dword s10, s[2:3], 0x3c
1006 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1007 ; GFX940-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
1008 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1009 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1010 ; GFX940-NEXT: v_mov_b32_e32 v2, s10
1011 ; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
1012 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1013 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1014 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
1015 ; GFX940-NEXT: s_endpgm
1017 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
1018 store double %ret, ptr addrspace(1) %out, align 8
1022 define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) {
1023 ; GFX90A-LABEL: global_atomic_fadd_f64_noret:
1024 ; GFX90A: ; %bb.0: ; %main_body
1025 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1026 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1027 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1028 ; GFX90A-NEXT: v_mov_b32_e32 v0, s6
1029 ; GFX90A-NEXT: v_mov_b32_e32 v1, s7
1030 ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1031 ; GFX90A-NEXT: s_endpgm
1033 ; GFX940-LABEL: global_atomic_fadd_f64_noret:
1034 ; GFX940: ; %bb.0: ; %main_body
1035 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1036 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1037 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1038 ; GFX940-NEXT: v_mov_b32_e32 v0, s6
1039 ; GFX940-NEXT: v_mov_b32_e32 v1, s7
1040 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1041 ; GFX940-NEXT: s_endpgm
1043 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1047 define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) {
1048 ; GFX90A-LABEL: global_atomic_fmin_f64_noret:
1049 ; GFX90A: ; %bb.0: ; %main_body
1050 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1051 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1052 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1053 ; GFX90A-NEXT: v_mov_b32_e32 v0, s6
1054 ; GFX90A-NEXT: v_mov_b32_e32 v1, s7
1055 ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
1056 ; GFX90A-NEXT: s_endpgm
1058 ; GFX940-LABEL: global_atomic_fmin_f64_noret:
1059 ; GFX940: ; %bb.0: ; %main_body
1060 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1061 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1062 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1063 ; GFX940-NEXT: v_mov_b32_e32 v0, s6
1064 ; GFX940-NEXT: v_mov_b32_e32 v1, s7
1065 ; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
1066 ; GFX940-NEXT: s_endpgm
1068 %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1072 define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) {
1073 ; GFX90A-LABEL: global_atomic_fmax_f64_noret:
1074 ; GFX90A: ; %bb.0: ; %main_body
1075 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1076 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1077 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1078 ; GFX90A-NEXT: v_mov_b32_e32 v0, s6
1079 ; GFX90A-NEXT: v_mov_b32_e32 v1, s7
1080 ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
1081 ; GFX90A-NEXT: s_endpgm
1083 ; GFX940-LABEL: global_atomic_fmax_f64_noret:
1084 ; GFX940: ; %bb.0: ; %main_body
1085 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1086 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1087 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1088 ; GFX940-NEXT: v_mov_b32_e32 v0, s6
1089 ; GFX940-NEXT: v_mov_b32_e32 v1, s7
1090 ; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
1091 ; GFX940-NEXT: s_endpgm
1093 %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1097 define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 {
1098 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
1099 ; GFX90A: ; %bb.0: ; %main_body
1100 ; GFX90A-NEXT: s_mov_b64 s[4:5], exec
1101 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0
1102 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0
1103 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1104 ; GFX90A-NEXT: s_and_saveexec_b64 s[0:1], vcc
1105 ; GFX90A-NEXT: s_cbranch_execz .LBB39_3
1106 ; GFX90A-NEXT: ; %bb.1:
1107 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1108 ; GFX90A-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
1109 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
1110 ; GFX90A-NEXT: s_mov_b64 s[2:3], 0
1111 ; GFX90A-NEXT: v_mul_f64 v[4:5], v[0:1], 4.0
1112 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1113 ; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
1114 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1115 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1116 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
1117 ; GFX90A-NEXT: .LBB39_2: ; %atomicrmw.start
1118 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1119 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
1120 ; GFX90A-NEXT: buffer_wbl2
1121 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
1122 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1123 ; GFX90A-NEXT: buffer_invl2
1124 ; GFX90A-NEXT: buffer_wbinvl1_vol
1125 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1126 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
1127 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1128 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
1129 ; GFX90A-NEXT: s_cbranch_execnz .LBB39_2
1130 ; GFX90A-NEXT: .LBB39_3:
1131 ; GFX90A-NEXT: s_endpgm
1133 ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat:
1134 ; GFX940: ; %bb.0: ; %main_body
1135 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1136 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1137 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1138 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1139 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1140 ; GFX940-NEXT: s_cbranch_execz .LBB39_2
1141 ; GFX940-NEXT: ; %bb.1:
1142 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1143 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1144 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1145 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1146 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1147 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1148 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1149 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
1150 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1151 ; GFX940-NEXT: buffer_inv sc0 sc1
1152 ; GFX940-NEXT: .LBB39_2:
1153 ; GFX940-NEXT: s_endpgm
1155 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
1159 define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 {
1160 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
1161 ; GFX90A: ; %bb.0: ; %main_body
1162 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
1163 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1164 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1165 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1166 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1167 ; GFX90A-NEXT: s_cbranch_execz .LBB40_2
1168 ; GFX90A-NEXT: ; %bb.1:
1169 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1170 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1171 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1172 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1173 ; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1174 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1175 ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1176 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1177 ; GFX90A-NEXT: buffer_wbinvl1_vol
1178 ; GFX90A-NEXT: .LBB40_2:
1179 ; GFX90A-NEXT: s_endpgm
1181 ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent:
1182 ; GFX940: ; %bb.0: ; %main_body
1183 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1184 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1185 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1186 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1187 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1188 ; GFX940-NEXT: s_cbranch_execz .LBB40_2
1189 ; GFX940-NEXT: ; %bb.1:
1190 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1191 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1192 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1193 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1194 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1195 ; GFX940-NEXT: buffer_wbl2 sc1
1196 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1197 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1198 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1199 ; GFX940-NEXT: buffer_inv sc1
1200 ; GFX940-NEXT: .LBB40_2:
1201 ; GFX940-NEXT: s_endpgm
1203 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
1207 define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 {
1208 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
1209 ; GFX90A: ; %bb.0: ; %main_body
1210 ; GFX90A-NEXT: s_mov_b64 s[4:5], exec
1211 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0
1212 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0
1213 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1214 ; GFX90A-NEXT: s_and_saveexec_b64 s[0:1], vcc
1215 ; GFX90A-NEXT: s_cbranch_execz .LBB41_3
1216 ; GFX90A-NEXT: ; %bb.1:
1217 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1218 ; GFX90A-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
1219 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
1220 ; GFX90A-NEXT: s_mov_b64 s[2:3], 0
1221 ; GFX90A-NEXT: v_mul_f64 v[4:5], v[0:1], 4.0
1222 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1223 ; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
1224 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1225 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1226 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
1227 ; GFX90A-NEXT: .LBB41_2: ; %atomicrmw.start
1228 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1229 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
1230 ; GFX90A-NEXT: buffer_wbl2
1231 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
1232 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1233 ; GFX90A-NEXT: buffer_invl2
1234 ; GFX90A-NEXT: buffer_wbinvl1_vol
1235 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1236 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
1237 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1238 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
1239 ; GFX90A-NEXT: s_cbranch_execnz .LBB41_2
1240 ; GFX90A-NEXT: .LBB41_3:
1241 ; GFX90A-NEXT: s_endpgm
1243 ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system:
1244 ; GFX940: ; %bb.0: ; %main_body
1245 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1246 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1247 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1248 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1249 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1250 ; GFX940-NEXT: s_cbranch_execz .LBB41_2
1251 ; GFX940-NEXT: ; %bb.1:
1252 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1253 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1254 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1255 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1256 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1257 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1258 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1259 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
1260 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1261 ; GFX940-NEXT: buffer_inv sc0 sc1
1262 ; GFX940-NEXT: .LBB41_2:
1263 ; GFX940-NEXT: s_endpgm
1265 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
1269 define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 {
1270 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
1271 ; GFX90A: ; %bb.0: ; %main_body
1272 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
1273 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1274 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1275 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1276 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1277 ; GFX90A-NEXT: s_cbranch_execz .LBB42_2
1278 ; GFX90A-NEXT: ; %bb.1:
1279 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1280 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1281 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1282 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1283 ; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1284 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1285 ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1286 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1287 ; GFX90A-NEXT: buffer_wbinvl1_vol
1288 ; GFX90A-NEXT: .LBB42_2:
1289 ; GFX90A-NEXT: s_endpgm
1291 ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush:
1292 ; GFX940: ; %bb.0: ; %main_body
1293 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1294 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1295 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1296 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1297 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1298 ; GFX940-NEXT: s_cbranch_execz .LBB42_2
1299 ; GFX940-NEXT: ; %bb.1:
1300 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1301 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1302 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1303 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1304 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1305 ; GFX940-NEXT: buffer_wbl2 sc1
1306 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1307 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1308 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX940-NEXT: buffer_inv sc1
1310 ; GFX940-NEXT: .LBB42_2:
1311 ; GFX940-NEXT: s_endpgm
1313 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
1317 define double @global_atomic_fadd_f64_rtn(ptr addrspace(1) %ptr, double %data) {
1318 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn:
1319 ; GFX90A: ; %bb.0: ; %main_body
1320 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321 ; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
1322 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1323 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1325 ; GFX940-LABEL: global_atomic_fadd_f64_rtn:
1326 ; GFX940: ; %bb.0: ; %main_body
1327 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1328 ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
1329 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1330 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1332 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1336 define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 {
1337 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
1338 ; GFX90A: ; %bb.0: ; %main_body
1339 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1340 ; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
1341 ; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1342 ; GFX90A-NEXT: .LBB44_1: ; %atomicrmw.start
1343 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1344 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1345 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1346 ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1347 ; GFX90A-NEXT: buffer_wbl2
1348 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
1349 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1350 ; GFX90A-NEXT: buffer_invl2
1351 ; GFX90A-NEXT: buffer_wbinvl1_vol
1352 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1353 ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1354 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
1355 ; GFX90A-NEXT: s_cbranch_execnz .LBB44_1
1356 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1357 ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
1358 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
1359 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
1360 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1362 ; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat:
1363 ; GFX940: ; %bb.0: ; %main_body
1364 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1366 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1367 ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
1368 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1369 ; GFX940-NEXT: buffer_inv sc0 sc1
1370 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1372 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
1376 define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 {
1377 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
1378 ; GFX90A: ; %bb.0: ; %main_body
1379 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1381 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1382 ; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
1383 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1384 ; GFX90A-NEXT: buffer_wbinvl1_vol
1385 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1387 ; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
1388 ; GFX940: ; %bb.0: ; %main_body
1389 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1390 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1391 ; GFX940-NEXT: buffer_wbl2 sc1
1392 ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
1393 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1394 ; GFX940-NEXT: buffer_inv sc1
1395 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1397 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
1401 define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 {
1402 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
1403 ; GFX90A: ; %bb.0: ; %main_body
1404 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405 ; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
1406 ; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1407 ; GFX90A-NEXT: .LBB46_1: ; %atomicrmw.start
1408 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1409 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1410 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1411 ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1412 ; GFX90A-NEXT: buffer_wbl2
1413 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
1414 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1415 ; GFX90A-NEXT: buffer_invl2
1416 ; GFX90A-NEXT: buffer_wbinvl1_vol
1417 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1418 ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1419 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
1420 ; GFX90A-NEXT: s_cbranch_execnz .LBB46_1
1421 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1422 ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
1423 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
1424 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
1425 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1427 ; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system:
1428 ; GFX940: ; %bb.0: ; %main_body
1429 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1431 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1432 ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
1433 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1434 ; GFX940-NEXT: buffer_inv sc0 sc1
1435 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1437 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
1441 define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) {
1442 ; GFX90A-LABEL: global_atomic_fmax_f64_rtn:
1443 ; GFX90A: ; %bb.0: ; %main_body
1444 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc
1446 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1447 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1449 ; GFX940-LABEL: global_atomic_fmax_f64_rtn:
1450 ; GFX940: ; %bb.0: ; %main_body
1451 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452 ; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
1453 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1454 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1456 %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1460 define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) {
1461 ; GFX90A-LABEL: global_atomic_fmin_f64_rtn:
1462 ; GFX90A: ; %bb.0: ; %main_body
1463 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464 ; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc
1465 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1466 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1468 ; GFX940-LABEL: global_atomic_fmin_f64_rtn:
1469 ; GFX940: ; %bb.0: ; %main_body
1470 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1471 ; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
1472 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1475 %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
1479 define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) {
1480 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
1481 ; GFX90A: ; %bb.0: ; %main_body
1482 ; GFX90A-NEXT: s_mov_b64 s[4:5], exec
1483 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0
1484 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0
1485 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1486 ; GFX90A-NEXT: s_and_saveexec_b64 s[0:1], vcc
1487 ; GFX90A-NEXT: s_cbranch_execz .LBB49_3
1488 ; GFX90A-NEXT: ; %bb.1:
1489 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1490 ; GFX90A-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
1491 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
1492 ; GFX90A-NEXT: s_mov_b64 s[2:3], 0
1493 ; GFX90A-NEXT: v_mul_f64 v[4:5], v[0:1], 4.0
1494 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1495 ; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
1496 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1497 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1498 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
1499 ; GFX90A-NEXT: .LBB49_2: ; %atomicrmw.start
1500 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1501 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
1502 ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
1503 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1504 ; GFX90A-NEXT: buffer_wbinvl1_vol
1505 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1506 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
1507 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1508 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
1509 ; GFX90A-NEXT: s_cbranch_execnz .LBB49_2
1510 ; GFX90A-NEXT: .LBB49_3:
1511 ; GFX90A-NEXT: s_endpgm
1513 ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
1514 ; GFX940: ; %bb.0: ; %main_body
1515 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1516 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1517 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1518 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1519 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1520 ; GFX940-NEXT: s_cbranch_execz .LBB49_2
1521 ; GFX940-NEXT: ; %bb.1:
1522 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1523 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1524 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1525 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1526 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1527 ; GFX940-NEXT: buffer_wbl2 sc1
1528 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1529 ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
1530 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1531 ; GFX940-NEXT: buffer_inv sc1
1532 ; GFX940-NEXT: .LBB49_2:
1533 ; GFX940-NEXT: s_endpgm
1535 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
1539 define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
1540 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
1541 ; GFX90A: ; %bb.0: ; %main_body
1542 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1543 ; GFX90A-NEXT: s_mov_b64 s[0:1], 0
1544 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1545 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1546 ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1547 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
1548 ; GFX90A-NEXT: .LBB50_1: ; %atomicrmw.start
1549 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1550 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1551 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
1552 ; GFX90A-NEXT: buffer_wbl2
1553 ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
1554 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1555 ; GFX90A-NEXT: buffer_invl2
1556 ; GFX90A-NEXT: buffer_wbinvl1_vol
1557 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1558 ; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1559 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1560 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
1561 ; GFX90A-NEXT: s_cbranch_execnz .LBB50_1
1562 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1563 ; GFX90A-NEXT: s_endpgm
1565 ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat:
1566 ; GFX940: ; %bb.0: ; %main_body
1567 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1568 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
1569 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1570 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
1571 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1572 ; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1
1573 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1574 ; GFX940-NEXT: buffer_inv sc0 sc1
1575 ; GFX940-NEXT: s_endpgm
1577 %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
1581 define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
1582 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1583 ; GFX90A: ; %bb.0: ; %main_body
1584 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1585 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1586 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000
1587 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1588 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
1589 ; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
1590 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1591 ; GFX90A-NEXT: buffer_wbinvl1_vol
1592 ; GFX90A-NEXT: s_endpgm
1594 ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1595 ; GFX940: ; %bb.0: ; %main_body
1596 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1597 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
1598 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1599 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
1600 ; GFX940-NEXT: buffer_wbl2 sc1
1601 ; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
1602 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1603 ; GFX940-NEXT: buffer_inv sc1
1604 ; GFX940-NEXT: s_endpgm
1606 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
1610 define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
1611 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1612 ; GFX90A: ; %bb.0: ; %main_body
1613 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1614 ; GFX90A-NEXT: s_mov_b64 s[0:1], 0
1615 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1616 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1617 ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1618 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
1619 ; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start
1620 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1621 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1622 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
1623 ; GFX90A-NEXT: buffer_wbl2
1624 ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
1625 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1626 ; GFX90A-NEXT: buffer_invl2
1627 ; GFX90A-NEXT: buffer_wbinvl1_vol
1628 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1629 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1630 ; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1631 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1632 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
1633 ; GFX90A-NEXT: s_cbranch_execnz .LBB52_1
1634 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1635 ; GFX90A-NEXT: s_endpgm
1637 ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1638 ; GFX940: ; %bb.0: ; %main_body
1639 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1640 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
1641 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1642 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
1643 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1644 ; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1
1645 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1646 ; GFX940-NEXT: buffer_inv sc0 sc1
1647 ; GFX940-NEXT: s_endpgm
1649 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
1653 define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
1654 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
1655 ; GFX90A: ; %bb.0: ; %main_body
1656 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1657 ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1658 ; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1659 ; GFX90A-NEXT: .LBB53_1: ; %atomicrmw.start
1660 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1661 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1662 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1663 ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1664 ; GFX90A-NEXT: buffer_wbl2
1665 ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
1666 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1667 ; GFX90A-NEXT: buffer_invl2
1668 ; GFX90A-NEXT: buffer_wbinvl1_vol
1669 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1670 ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1671 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
1672 ; GFX90A-NEXT: s_cbranch_execnz .LBB53_1
1673 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1674 ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
1675 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
1676 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
1677 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1679 ; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat:
1680 ; GFX940: ; %bb.0: ; %main_body
1681 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1683 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1684 ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
1685 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1686 ; GFX940-NEXT: buffer_inv sc0 sc1
1687 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1689 %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
1693 define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
1694 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
1695 ; GFX90A: ; %bb.0: ; %main_body
1696 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1697 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1698 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1699 ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
1700 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1701 ; GFX90A-NEXT: buffer_wbinvl1_vol
1702 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1704 ; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
1705 ; GFX940: ; %bb.0: ; %main_body
1706 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1708 ; GFX940-NEXT: buffer_wbl2 sc1
1709 ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
1710 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1711 ; GFX940-NEXT: buffer_inv sc1
1712 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1714 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
1718 define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
1719 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
1720 ; GFX90A: ; %bb.0: ; %main_body
1721 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722 ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1723 ; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1724 ; GFX90A-NEXT: .LBB55_1: ; %atomicrmw.start
1725 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1726 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1728 ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1729 ; GFX90A-NEXT: buffer_wbl2
1730 ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
1731 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1732 ; GFX90A-NEXT: buffer_invl2
1733 ; GFX90A-NEXT: buffer_wbinvl1_vol
1734 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1735 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1736 ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1737 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
1738 ; GFX90A-NEXT: s_cbranch_execnz .LBB55_1
1739 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1740 ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
1741 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
1742 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
1743 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1745 ; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
1746 ; GFX940: ; %bb.0: ; %main_body
1747 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
1749 ; GFX940-NEXT: buffer_wbl2 sc0 sc1
1750 ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
1751 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1752 ; GFX940-NEXT: buffer_inv sc0 sc1
1753 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1754 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1756 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
1760 define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) {
1761 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
1762 ; GFX90A: ; %bb.0: ; %main_body
1763 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1764 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1765 ; GFX90A-NEXT: v_mov_b32_e32 v0, s4
1766 ; GFX90A-NEXT: v_mov_b32_e32 v1, s5
1767 ; GFX90A-NEXT: v_mov_b32_e32 v2, s6
1768 ; GFX90A-NEXT: v_mov_b32_e32 v3, s7
1769 ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1770 ; GFX90A-NEXT: s_endpgm
1772 ; GFX940-LABEL: flat_atomic_fadd_f64_noret:
1773 ; GFX940: ; %bb.0: ; %main_body
1774 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1775 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1776 ; GFX940-NEXT: v_mov_b32_e32 v0, s4
1777 ; GFX940-NEXT: v_mov_b32_e32 v1, s5
1778 ; GFX940-NEXT: v_mov_b32_e32 v2, s6
1779 ; GFX940-NEXT: v_mov_b32_e32 v3, s7
1780 ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1781 ; GFX940-NEXT: s_endpgm
1783 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
1787 define double @flat_atomic_fadd_f64_rtn(ptr %ptr, double %data) {
1788 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn:
1789 ; GFX90A: ; %bb.0: ; %main_body
1790 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1791 ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
1792 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1793 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1795 ; GFX940-LABEL: flat_atomic_fadd_f64_rtn:
1796 ; GFX940: ; %bb.0: ; %main_body
1797 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798 ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
1799 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1800 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1802 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
1806 define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
1807 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1808 ; GFX90A: ; %bb.0: ; %main_body
1809 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1810 ; GFX90A-NEXT: s_mov_b64 s[0:1], 0
1811 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1812 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1813 ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1814 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
1815 ; GFX90A-NEXT: .LBB58_1: ; %atomicrmw.start
1816 ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1817 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1818 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
1819 ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
1820 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1821 ; GFX90A-NEXT: buffer_wbinvl1_vol
1822 ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1823 ; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1824 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1825 ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
1826 ; GFX90A-NEXT: s_cbranch_execnz .LBB58_1
1827 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
1828 ; GFX90A-NEXT: s_endpgm
1830 ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1831 ; GFX940: ; %bb.0: ; %main_body
1832 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
1833 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
1834 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1835 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
1836 ; GFX940-NEXT: buffer_wbl2 sc1
1837 ; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
1838 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1839 ; GFX940-NEXT: buffer_inv sc1
1840 ; GFX940-NEXT: s_endpgm
1842 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
1846 define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
1847 ; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
1848 ; GFX90A: ; %bb.0: ; %main_body
1849 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1850 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1851 ; GFX90A-NEXT: v_mov_b32_e32 v0, s4
1852 ; GFX90A-NEXT: v_mov_b32_e32 v1, s5
1853 ; GFX90A-NEXT: v_mov_b32_e32 v2, s6
1854 ; GFX90A-NEXT: v_mov_b32_e32 v3, s7
1855 ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
1856 ; GFX90A-NEXT: s_endpgm
1858 ; GFX940-LABEL: flat_atomic_fmin_f64_noret:
1859 ; GFX940: ; %bb.0: ; %main_body
1860 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1861 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1862 ; GFX940-NEXT: v_mov_b32_e32 v0, s4
1863 ; GFX940-NEXT: v_mov_b32_e32 v1, s5
1864 ; GFX940-NEXT: v_mov_b32_e32 v2, s6
1865 ; GFX940-NEXT: v_mov_b32_e32 v3, s7
1866 ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
1867 ; GFX940-NEXT: s_endpgm
1869 %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
1873 define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) {
1874 ; GFX90A-LABEL: flat_atomic_fmin_f64_rtn:
1875 ; GFX90A: ; %bb.0: ; %main_body
1876 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1877 ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
1878 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1879 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1881 ; GFX940-LABEL: flat_atomic_fmin_f64_rtn:
1882 ; GFX940: ; %bb.0: ; %main_body
1883 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884 ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
1885 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1886 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1888 %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
1892 define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) {
1893 ; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
1894 ; GFX90A: ; %bb.0: ; %main_body
1895 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1896 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1897 ; GFX90A-NEXT: v_mov_b32_e32 v0, s4
1898 ; GFX90A-NEXT: v_mov_b32_e32 v1, s5
1899 ; GFX90A-NEXT: v_mov_b32_e32 v2, s6
1900 ; GFX90A-NEXT: v_mov_b32_e32 v3, s7
1901 ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
1902 ; GFX90A-NEXT: s_endpgm
1904 ; GFX940-LABEL: flat_atomic_fmax_f64_noret:
1905 ; GFX940: ; %bb.0: ; %main_body
1906 ; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
1907 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1908 ; GFX940-NEXT: v_mov_b32_e32 v0, s4
1909 ; GFX940-NEXT: v_mov_b32_e32 v1, s5
1910 ; GFX940-NEXT: v_mov_b32_e32 v2, s6
1911 ; GFX940-NEXT: v_mov_b32_e32 v3, s7
1912 ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
1913 ; GFX940-NEXT: s_endpgm
1915 %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
1919 define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) {
1920 ; GFX90A-LABEL: flat_atomic_fmax_f64_rtn:
1921 ; GFX90A: ; %bb.0: ; %main_body
1922 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1923 ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
1924 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1925 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1927 ; GFX940-LABEL: flat_atomic_fmax_f64_rtn:
1928 ; GFX940: ; %bb.0: ; %main_body
1929 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930 ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
1931 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1932 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1934 %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
1938 define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) {
1939 ; GFX90A-LABEL: local_atomic_fadd_f64_noret:
1940 ; GFX90A: ; %bb.0: ; %main_body
1941 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
1942 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1943 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1944 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1945 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1946 ; GFX90A-NEXT: s_cbranch_execz .LBB63_2
1947 ; GFX90A-NEXT: ; %bb.1:
1948 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
1949 ; GFX90A-NEXT: s_load_dword s6, s[2:3], 0x24
1950 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1951 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1952 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1953 ; GFX90A-NEXT: v_mul_f64 v[0:1], s[4:5], v[0:1]
1954 ; GFX90A-NEXT: v_mov_b32_e32 v2, s6
1955 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
1956 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1957 ; GFX90A-NEXT: .LBB63_2:
1958 ; GFX90A-NEXT: s_endpgm
1960 ; GFX940-LABEL: local_atomic_fadd_f64_noret:
1961 ; GFX940: ; %bb.0: ; %main_body
1962 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
1963 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
1964 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
1965 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1966 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1967 ; GFX940-NEXT: s_cbranch_execz .LBB63_2
1968 ; GFX940-NEXT: ; %bb.1:
1969 ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
1970 ; GFX940-NEXT: s_load_dword s6, s[2:3], 0x24
1971 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1972 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1973 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1974 ; GFX940-NEXT: v_mul_f64 v[0:1], s[4:5], v[0:1]
1975 ; GFX940-NEXT: v_mov_b32_e32 v2, s6
1976 ; GFX940-NEXT: ds_add_f64 v2, v[0:1]
1977 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1978 ; GFX940-NEXT: .LBB63_2:
1979 ; GFX940-NEXT: s_endpgm
1981 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
1985 define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
1986 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn:
1987 ; GFX90A: ; %bb.0: ; %main_body
1988 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1989 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2
1990 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
1991 ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
1992 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1993 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1995 ; GFX940-LABEL: local_atomic_fadd_f64_rtn:
1996 ; GFX940: ; %bb.0: ; %main_body
1997 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998 ; GFX940-NEXT: v_mov_b32_e32 v3, v2
1999 ; GFX940-NEXT: v_mov_b32_e32 v2, v1
2000 ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2001 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2002 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2004 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
2008 define amdgpu_kernel void @local_atomic_fadd_f64_noret_from_flat_intrinsic(ptr addrspace(3) %ptr, double %data) {
2009 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_from_flat_intrinsic:
2010 ; GFX90A: ; %bb.0: ; %main_body
2011 ; GFX90A-NEXT: s_load_dword s4, s[2:3], 0x24
2012 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2c
2013 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2014 ; GFX90A-NEXT: v_mov_b32_e32 v2, s4
2015 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
2016 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
2017 ; GFX90A-NEXT: s_endpgm
2019 ; GFX940-LABEL: local_atomic_fadd_f64_noret_from_flat_intrinsic:
2020 ; GFX940: ; %bb.0: ; %main_body
2021 ; GFX940-NEXT: s_load_dword s4, s[2:3], 0x24
2022 ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2c
2023 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2024 ; GFX940-NEXT: v_mov_b32_e32 v2, s4
2025 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
2026 ; GFX940-NEXT: ds_add_f64 v2, v[0:1]
2027 ; GFX940-NEXT: s_endpgm
2029 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
2033 define double @local_atomic_fadd_f64_rtn_from_flat_intrinsic(ptr addrspace(3) %ptr, double %data) {
2034 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_from_flat_intrinsic:
2035 ; GFX90A: ; %bb.0: ; %main_body
2036 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2037 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2
2038 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
2039 ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2040 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2041 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2043 ; GFX940-LABEL: local_atomic_fadd_f64_rtn_from_flat_intrinsic:
2044 ; GFX940: ; %bb.0: ; %main_body
2045 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2046 ; GFX940-NEXT: v_mov_b32_e32 v3, v2
2047 ; GFX940-NEXT: v_mov_b32_e32 v2, v1
2048 ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2049 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2050 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2052 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
2056 define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
2057 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
2058 ; GFX90A: ; %bb.0: ; %main_body
2059 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
2060 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2061 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2062 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2063 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
2064 ; GFX90A-NEXT: s_cbranch_execz .LBB67_2
2065 ; GFX90A-NEXT: ; %bb.1:
2066 ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
2067 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2068 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2069 ; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2070 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2071 ; GFX90A-NEXT: v_mov_b32_e32 v2, s2
2072 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
2073 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2074 ; GFX90A-NEXT: .LBB67_2:
2075 ; GFX90A-NEXT: s_endpgm
2077 ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
2078 ; GFX940: ; %bb.0: ; %main_body
2079 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
2080 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2081 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2082 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2083 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
2084 ; GFX940-NEXT: s_cbranch_execz .LBB67_2
2085 ; GFX940-NEXT: ; %bb.1:
2086 ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
2087 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2088 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2089 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2090 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2091 ; GFX940-NEXT: v_mov_b32_e32 v2, s2
2092 ; GFX940-NEXT: ds_add_f64 v2, v[0:1]
2093 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2094 ; GFX940-NEXT: .LBB67_2:
2095 ; GFX940-NEXT: s_endpgm
2097 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
2101 define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 {
2102 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
2103 ; GFX90A: ; %bb.0: ; %main_body
2104 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
2105 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2106 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2107 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2108 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
2109 ; GFX90A-NEXT: s_cbranch_execz .LBB68_2
2110 ; GFX90A-NEXT: ; %bb.1:
2111 ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
2112 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2113 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2114 ; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2115 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2116 ; GFX90A-NEXT: v_mov_b32_e32 v2, s2
2117 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
2118 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2119 ; GFX90A-NEXT: .LBB68_2:
2120 ; GFX90A-NEXT: s_endpgm
2122 ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
2123 ; GFX940: ; %bb.0: ; %main_body
2124 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
2125 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2126 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2127 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2128 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
2129 ; GFX940-NEXT: s_cbranch_execz .LBB68_2
2130 ; GFX940-NEXT: ; %bb.1:
2131 ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
2132 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2133 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2134 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2135 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2136 ; GFX940-NEXT: v_mov_b32_e32 v2, s2
2137 ; GFX940-NEXT: ds_add_f64 v2, v[0:1]
2138 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2139 ; GFX940-NEXT: .LBB68_2:
2140 ; GFX940-NEXT: s_endpgm
2142 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
2146 define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
2147 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
2148 ; GFX90A: ; %bb.0: ; %main_body
2149 ; GFX90A-NEXT: s_mov_b64 s[0:1], exec
2150 ; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2151 ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2152 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2153 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
2154 ; GFX90A-NEXT: s_cbranch_execz .LBB69_2
2155 ; GFX90A-NEXT: ; %bb.1:
2156 ; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
2157 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2158 ; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2159 ; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2160 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2161 ; GFX90A-NEXT: v_mov_b32_e32 v2, s2
2162 ; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
2163 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2164 ; GFX90A-NEXT: .LBB69_2:
2165 ; GFX90A-NEXT: s_endpgm
2167 ; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
2168 ; GFX940: ; %bb.0: ; %main_body
2169 ; GFX940-NEXT: s_mov_b64 s[0:1], exec
2170 ; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
2171 ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
2172 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2173 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
2174 ; GFX940-NEXT: s_cbranch_execz .LBB69_2
2175 ; GFX940-NEXT: ; %bb.1:
2176 ; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
2177 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
2178 ; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
2179 ; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
2180 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2181 ; GFX940-NEXT: v_mov_b32_e32 v2, s2
2182 ; GFX940-NEXT: ds_add_f64 v2, v[0:1]
2183 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2184 ; GFX940-NEXT: .LBB69_2:
2185 ; GFX940-NEXT: s_endpgm
2187 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
2191 define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 {
2192 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
2193 ; GFX90A: ; %bb.0: ; %main_body
2194 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2196 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
2197 ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2198 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2199 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2201 ; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat:
2202 ; GFX940: ; %bb.0: ; %main_body
2203 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
2205 ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2206 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2207 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2209 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
2213 define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
2214 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
2215 ; GFX90A: ; %bb.0: ; %main_body
2216 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2
2218 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
2219 ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2220 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2221 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2223 ; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
2224 ; GFX940: ; %bb.0: ; %main_body
2225 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2226 ; GFX940-NEXT: v_mov_b32_e32 v3, v2
2227 ; GFX940-NEXT: v_mov_b32_e32 v2, v1
2228 ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2229 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2230 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2232 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
2236 define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
2237 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
2238 ; GFX90A: ; %bb.0: ; %main_body
2239 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2240 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2
2241 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
2242 ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2243 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
2244 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2246 ; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
2247 ; GFX940: ; %bb.0: ; %main_body
2248 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2249 ; GFX940-NEXT: v_mov_b32_e32 v3, v2
2250 ; GFX940-NEXT: v_mov_b32_e32 v2, v1
2251 ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
2252 ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
2253 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2255 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
2259 attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
2260 attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
2261 attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
2262 attributes #3 = { "denormal-fp-math"="ieee,ieee" }
2263 attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }