1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4 ; Not supported in gfx8 or gfx9, except 90a/940
5 ; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
6 ; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
8 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
9 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc
13 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
14 ; GFX6-NEXT: s_setpc_b64 s[30:31]
16 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
18 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc
20 ; GFX7-NEXT: s_waitcnt vmcnt(0)
21 ; GFX7-NEXT: s_setpc_b64 s[30:31]
22 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
26 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
27 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
29 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256 glc
31 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
32 ; GFX6-NEXT: s_setpc_b64 s[30:31]
34 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
36 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256 glc
38 ; GFX7-NEXT: s_waitcnt vmcnt(0)
39 ; GFX7-NEXT: s_setpc_b64 s[30:31]
40 %voffset.add = add i32 %voffset, 256
41 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
45 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
46 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
48 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen glc
50 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
51 ; GFX6-NEXT: s_setpc_b64 s[30:31]
53 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
55 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen glc
57 ; GFX7-NEXT: s_waitcnt vmcnt(0)
58 ; GFX7-NEXT: s_setpc_b64 s[30:31]
59 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
63 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
64 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
66 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc slc
68 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
69 ; GFX6-NEXT: s_setpc_b64 s[30:31]
71 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
73 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc slc
75 ; GFX7-NEXT: s_waitcnt vmcnt(0)
76 ; GFX7-NEXT: s_setpc_b64 s[30:31]
77 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
81 define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
82 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
84 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen
86 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
87 ; GFX6-NEXT: s_setpc_b64 s[30:31]
89 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
91 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen
93 ; GFX7-NEXT: s_waitcnt vmcnt(0)
94 ; GFX7-NEXT: s_setpc_b64 s[30:31]
95 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
99 define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
100 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
102 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256
104 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
105 ; GFX6-NEXT: s_setpc_b64 s[30:31]
107 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
109 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256
111 ; GFX7-NEXT: s_waitcnt vmcnt(0)
112 ; GFX7-NEXT: s_setpc_b64 s[30:31]
113 %voffset.add = add i32 %voffset, 256
114 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
118 ; Natural mapping, no voffset
119 define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
120 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
122 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen
124 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
125 ; GFX6-NEXT: s_setpc_b64 s[30:31]
127 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
129 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen
131 ; GFX7-NEXT: s_waitcnt vmcnt(0)
132 ; GFX7-NEXT: s_setpc_b64 s[30:31]
133 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
137 define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
138 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
140 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen slc
142 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
143 ; GFX6-NEXT: s_setpc_b64 s[30:31]
145 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
147 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen slc
149 ; GFX7-NEXT: s_waitcnt vmcnt(0)
150 ; GFX7-NEXT: s_setpc_b64 s[30:31]
151 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
155 ; Test waterfall loop on resource
156 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
157 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
159 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX6-NEXT: s_mov_b64 s[6:7], exec
161 ; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
162 ; GFX6-NEXT: v_readfirstlane_b32 s8, v2
163 ; GFX6-NEXT: v_readfirstlane_b32 s9, v3
164 ; GFX6-NEXT: v_readfirstlane_b32 s10, v4
165 ; GFX6-NEXT: v_readfirstlane_b32 s11, v5
166 ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
167 ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
168 ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
169 ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
170 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
171 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s16 idxen offen offset:256 glc
172 ; GFX6-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
173 ; GFX6-NEXT: ; implicit-def: $vgpr6_vgpr7
174 ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5]
175 ; GFX6-NEXT: s_cbranch_execnz .LBB8_1
176 ; GFX6-NEXT: ; %bb.2:
177 ; GFX6-NEXT: s_mov_b64 exec, s[6:7]
178 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
179 ; GFX6-NEXT: s_setpc_b64 s[30:31]
181 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
183 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX7-NEXT: s_mov_b64 s[6:7], exec
185 ; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
186 ; GFX7-NEXT: v_readfirstlane_b32 s8, v2
187 ; GFX7-NEXT: v_readfirstlane_b32 s9, v3
188 ; GFX7-NEXT: v_readfirstlane_b32 s10, v4
189 ; GFX7-NEXT: v_readfirstlane_b32 s11, v5
190 ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
191 ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
192 ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
193 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
194 ; GFX7-NEXT: s_waitcnt vmcnt(0)
195 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s16 idxen offen offset:256 glc
196 ; GFX7-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
197 ; GFX7-NEXT: ; implicit-def: $vgpr6_vgpr7
198 ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5]
199 ; GFX7-NEXT: s_cbranch_execnz .LBB8_1
200 ; GFX7-NEXT: ; %bb.2:
201 ; GFX7-NEXT: s_mov_b64 exec, s[6:7]
202 ; GFX7-NEXT: s_waitcnt vmcnt(0)
203 ; GFX7-NEXT: s_setpc_b64 s[30:31]
204 %voffset.add = add i32 %voffset, 256
205 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
209 ; Test waterfall loop on soffset
210 define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
211 ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset:
213 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX6-NEXT: s_mov_b64 s[6:7], exec
215 ; GFX6-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
216 ; GFX6-NEXT: v_readfirstlane_b32 s8, v2
217 ; GFX6-NEXT: v_readfirstlane_b32 s9, v3
218 ; GFX6-NEXT: v_readfirstlane_b32 s10, v4
219 ; GFX6-NEXT: v_readfirstlane_b32 s11, v5
220 ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
221 ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
222 ; GFX6-NEXT: v_readfirstlane_b32 s12, v8
223 ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
224 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s12, v8
225 ; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], vcc
226 ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
227 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
228 ; GFX6-NEXT: buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s12 idxen offen offset:256 glc
229 ; GFX6-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
230 ; GFX6-NEXT: ; implicit-def: $vgpr8
231 ; GFX6-NEXT: ; implicit-def: $vgpr6_vgpr7
232 ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5]
233 ; GFX6-NEXT: s_cbranch_execnz .LBB9_1
234 ; GFX6-NEXT: ; %bb.2:
235 ; GFX6-NEXT: s_mov_b64 exec, s[6:7]
236 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
237 ; GFX6-NEXT: s_setpc_b64 s[30:31]
239 ; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset:
241 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX7-NEXT: s_mov_b64 s[6:7], exec
243 ; GFX7-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
244 ; GFX7-NEXT: v_readfirstlane_b32 s8, v2
245 ; GFX7-NEXT: v_readfirstlane_b32 s9, v3
246 ; GFX7-NEXT: v_readfirstlane_b32 s10, v4
247 ; GFX7-NEXT: v_readfirstlane_b32 s11, v5
248 ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
249 ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
250 ; GFX7-NEXT: v_readfirstlane_b32 s12, v8
251 ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
252 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, s12, v8
253 ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], vcc
254 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
255 ; GFX7-NEXT: s_waitcnt vmcnt(0)
256 ; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s12 idxen offen offset:256 glc
257 ; GFX7-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
258 ; GFX7-NEXT: ; implicit-def: $vgpr8
259 ; GFX7-NEXT: ; implicit-def: $vgpr6_vgpr7
260 ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5]
261 ; GFX7-NEXT: s_cbranch_execnz .LBB9_1
262 ; GFX7-NEXT: ; %bb.2:
263 ; GFX7-NEXT: s_mov_b64 exec, s[6:7]
264 ; GFX7-NEXT: s_waitcnt vmcnt(0)
265 ; GFX7-NEXT: s_setpc_b64 s[30:31]
266 %voffset.add = add i32 %voffset, 256
267 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
271 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)