1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10_W32 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
8 ; REQUIRES: do-not-run-me
10 define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
11 ; GFX7-LABEL: v_div_fmas_f32:
13 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX7-NEXT: v_and_b32_e32 v3, 1, v3
15 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
17 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
18 ; GFX7-NEXT: s_setpc_b64 s[30:31]
20 ; GFX8-LABEL: v_div_fmas_f32:
22 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX8-NEXT: v_and_b32_e32 v3, 1, v3
24 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
26 ; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2
27 ; GFX8-NEXT: s_setpc_b64 s[30:31]
29 ; GFX10_W32-LABEL: v_div_fmas_f32:
31 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX10_W32-NEXT: v_and_b32_e32 v3, 1, v3
33 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
34 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
35 ; GFX10_W32-NEXT: s_setpc_b64 s[30:31]
37 ; GFX10_W64-LABEL: v_div_fmas_f32:
39 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX10_W64-NEXT: v_and_b32_e32 v3, 1, v3
41 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
42 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
43 ; GFX10_W64-NEXT: s_setpc_b64 s[30:31]
45 ; GFX11_W32-LABEL: v_div_fmas_f32:
47 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX11_W32-NEXT: v_and_b32_e32 v3, 1, v3
49 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
50 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
51 ; GFX11_W32-NEXT: s_setpc_b64 s[30:31]
53 ; GFX11_W64-LABEL: v_div_fmas_f32:
55 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX11_W64-NEXT: v_and_b32_e32 v3, 1, v3
57 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
58 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
59 ; GFX11_W64-NEXT: s_setpc_b64 s[30:31]
60 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
64 define double @v_div_fmas_f64(double %a, double %b, double %c, i1 %d) {
65 ; GFX7-LABEL: v_div_fmas_f64:
67 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX7-NEXT: v_and_b32_e32 v6, 1, v6
69 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
71 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
72 ; GFX7-NEXT: s_setpc_b64 s[30:31]
74 ; GFX8-LABEL: v_div_fmas_f64:
76 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX8-NEXT: v_and_b32_e32 v6, 1, v6
78 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
80 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
81 ; GFX8-NEXT: s_setpc_b64 s[30:31]
83 ; GFX10_W32-LABEL: v_div_fmas_f64:
85 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX10_W32-NEXT: v_and_b32_e32 v6, 1, v6
87 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
88 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
89 ; GFX10_W32-NEXT: s_setpc_b64 s[30:31]
91 ; GFX10_W64-LABEL: v_div_fmas_f64:
93 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GFX10_W64-NEXT: v_and_b32_e32 v6, 1, v6
95 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
96 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
97 ; GFX10_W64-NEXT: s_setpc_b64 s[30:31]
99 ; GFX11_W32-LABEL: v_div_fmas_f64:
100 ; GFX11_W32: ; %bb.0:
101 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX11_W32-NEXT: v_and_b32_e32 v6, 1, v6
103 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
104 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
105 ; GFX11_W32-NEXT: s_setpc_b64 s[30:31]
107 ; GFX11_W64-LABEL: v_div_fmas_f64:
108 ; GFX11_W64: ; %bb.0:
109 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX11_W64-NEXT: v_and_b32_e32 v6, 1, v6
111 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
112 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
113 ; GFX11_W64-NEXT: s_setpc_b64 s[30:31]
114 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
118 define amdgpu_ps float @s_div_fmas_f32(float inreg %a, float inreg %b, float inreg %c, i32 inreg %d) {
119 ; GFX7-LABEL: s_div_fmas_f32:
121 ; GFX7-NEXT: s_cmp_eq_u32 s3, 0
122 ; GFX7-NEXT: s_cselect_b32 s3, 1, 0
123 ; GFX7-NEXT: v_mov_b32_e32 v0, s0
124 ; GFX7-NEXT: s_and_b32 s0, 1, s3
125 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
126 ; GFX7-NEXT: v_mov_b32_e32 v2, s2
127 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
129 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
130 ; GFX7-NEXT: ; return to shader part epilog
132 ; GFX8-LABEL: s_div_fmas_f32:
134 ; GFX8-NEXT: s_cmp_eq_u32 s3, 0
135 ; GFX8-NEXT: s_cselect_b32 s3, 1, 0
136 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
137 ; GFX8-NEXT: s_and_b32 s0, 1, s3
138 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
139 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
140 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
142 ; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2
143 ; GFX8-NEXT: ; return to shader part epilog
145 ; GFX10_W32-LABEL: s_div_fmas_f32:
146 ; GFX10_W32: ; %bb.0:
147 ; GFX10_W32-NEXT: s_cmp_eq_u32 s3, 0
148 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s1
149 ; GFX10_W32-NEXT: s_cselect_b32 s3, 1, 0
150 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s2
151 ; GFX10_W32-NEXT: s_and_b32 s3, 1, s3
152 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3
153 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1
154 ; GFX10_W32-NEXT: ; return to shader part epilog
156 ; GFX10_W64-LABEL: s_div_fmas_f32:
157 ; GFX10_W64: ; %bb.0:
158 ; GFX10_W64-NEXT: s_cmp_eq_u32 s3, 0
159 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s1
160 ; GFX10_W64-NEXT: s_cselect_b32 s3, 1, 0
161 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s2
162 ; GFX10_W64-NEXT: s_and_b32 s3, 1, s3
163 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3
164 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1
165 ; GFX10_W64-NEXT: ; return to shader part epilog
167 ; GFX11_W32-LABEL: s_div_fmas_f32:
168 ; GFX11_W32: ; %bb.0:
169 ; GFX11_W32-NEXT: s_cmp_eq_u32 s3, 0
170 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
171 ; GFX11_W32-NEXT: s_cselect_b32 s3, 1, 0
172 ; GFX11_W32-NEXT: s_and_b32 s3, 1, s3
173 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3
174 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1
175 ; GFX11_W32-NEXT: ; return to shader part epilog
177 ; GFX11_W64-LABEL: s_div_fmas_f32:
178 ; GFX11_W64: ; %bb.0:
179 ; GFX11_W64-NEXT: s_cmp_eq_u32 s3, 0
180 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s1
181 ; GFX11_W64-NEXT: s_cselect_b32 s3, 1, 0
182 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s2
183 ; GFX11_W64-NEXT: s_and_b32 s3, 1, s3
184 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3
185 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1
186 ; GFX11_W64-NEXT: ; return to shader part epilog
187 %vcc = icmp eq i32 %d, 0
188 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %vcc)
192 define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double inreg %c, i32 inreg %d) {
193 ; GFX7-LABEL: s_div_fmas_f64:
195 ; GFX7-NEXT: s_cmp_eq_u32 s6, 0
196 ; GFX7-NEXT: s_cselect_b32 s6, 1, 0
197 ; GFX7-NEXT: v_mov_b32_e32 v0, s0
198 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
199 ; GFX7-NEXT: v_mov_b32_e32 v2, s2
200 ; GFX7-NEXT: v_mov_b32_e32 v4, s4
201 ; GFX7-NEXT: s_and_b32 s0, 1, s6
202 ; GFX7-NEXT: v_mov_b32_e32 v3, s3
203 ; GFX7-NEXT: v_mov_b32_e32 v5, s5
204 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
206 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
207 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
208 ; GFX7-NEXT: v_readfirstlane_b32 s1, v1
209 ; GFX7-NEXT: ; return to shader part epilog
211 ; GFX8-LABEL: s_div_fmas_f64:
213 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
214 ; GFX8-NEXT: s_cselect_b32 s6, 1, 0
215 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
216 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
217 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
218 ; GFX8-NEXT: v_mov_b32_e32 v4, s4
219 ; GFX8-NEXT: s_and_b32 s0, 1, s6
220 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
221 ; GFX8-NEXT: v_mov_b32_e32 v5, s5
222 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
224 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
225 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
226 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
227 ; GFX8-NEXT: ; return to shader part epilog
229 ; GFX10_W32-LABEL: s_div_fmas_f64:
230 ; GFX10_W32: ; %bb.0:
231 ; GFX10_W32-NEXT: s_cmp_eq_u32 s6, 0
232 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2
233 ; GFX10_W32-NEXT: s_cselect_b32 s6, 1, 0
234 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, s4
235 ; GFX10_W32-NEXT: s_and_b32 s6, 1, s6
236 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3
237 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
238 ; GFX10_W32-NEXT: v_mov_b32_e32 v3, s5
239 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
240 ; GFX10_W32-NEXT: v_readfirstlane_b32 s0, v0
241 ; GFX10_W32-NEXT: v_readfirstlane_b32 s1, v1
242 ; GFX10_W32-NEXT: ; return to shader part epilog
244 ; GFX10_W64-LABEL: s_div_fmas_f64:
245 ; GFX10_W64: ; %bb.0:
246 ; GFX10_W64-NEXT: s_cmp_eq_u32 s6, 0
247 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2
248 ; GFX10_W64-NEXT: s_cselect_b32 s6, 1, 0
249 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, s4
250 ; GFX10_W64-NEXT: s_and_b32 s6, 1, s6
251 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3
252 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6
253 ; GFX10_W64-NEXT: v_mov_b32_e32 v3, s5
254 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
255 ; GFX10_W64-NEXT: v_readfirstlane_b32 s0, v0
256 ; GFX10_W64-NEXT: v_readfirstlane_b32 s1, v1
257 ; GFX10_W64-NEXT: ; return to shader part epilog
259 ; GFX11_W32-LABEL: s_div_fmas_f64:
260 ; GFX11_W32: ; %bb.0:
261 ; GFX11_W32-NEXT: s_cmp_eq_u32 s6, 0
262 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
263 ; GFX11_W32-NEXT: s_cselect_b32 s6, 1, 0
264 ; GFX11_W32-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
265 ; GFX11_W32-NEXT: s_and_b32 s6, 1, s6
266 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
267 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
268 ; GFX11_W32-NEXT: v_readfirstlane_b32 s0, v0
269 ; GFX11_W32-NEXT: v_readfirstlane_b32 s1, v1
270 ; GFX11_W32-NEXT: ; return to shader part epilog
272 ; GFX11_W64-LABEL: s_div_fmas_f64:
273 ; GFX11_W64: ; %bb.0:
274 ; GFX11_W64-NEXT: s_cmp_eq_u32 s6, 0
275 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
276 ; GFX11_W64-NEXT: s_cselect_b32 s6, 1, 0
277 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, s4
278 ; GFX11_W64-NEXT: s_and_b32 s6, 1, s6
279 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
280 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6
281 ; GFX11_W64-NEXT: v_mov_b32_e32 v3, s5
282 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
283 ; GFX11_W64-NEXT: v_readfirstlane_b32 s0, v0
284 ; GFX11_W64-NEXT: v_readfirstlane_b32 s1, v1
285 ; GFX11_W64-NEXT: ; return to shader part epilog
286 %vcc = icmp eq i32 %d, 0
287 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %vcc)
291 define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
292 ; GFX7-LABEL: test_div_fmas_f32:
294 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
295 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
296 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
297 ; GFX7-NEXT: s_load_dword s5, s[0:1], 0x25
298 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
299 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
300 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
301 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
302 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
303 ; GFX7-NEXT: s_and_b32 s2, 1, s5
304 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
305 ; GFX7-NEXT: s_mov_b32 s2, -1
306 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
308 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
309 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
310 ; GFX7-NEXT: s_endpgm
312 ; GFX8-LABEL: test_div_fmas_f32:
314 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
315 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
316 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
317 ; GFX8-NEXT: s_load_dword s5, s[0:1], 0x94
318 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
319 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
320 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
321 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
322 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
323 ; GFX8-NEXT: s_and_b32 s2, 1, s5
324 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
326 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
327 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
328 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
329 ; GFX8-NEXT: flat_store_dword v[0:1], v2
330 ; GFX8-NEXT: s_endpgm
332 ; GFX10_W32-LABEL: test_div_fmas_f32:
333 ; GFX10_W32: ; %bb.0:
334 ; GFX10_W32-NEXT: s_clause 0x4
335 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
336 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c
337 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70
338 ; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x28
339 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
340 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
341 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
342 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
343 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
344 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
345 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1
346 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
347 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
348 ; GFX10_W32-NEXT: s_endpgm
350 ; GFX10_W64-LABEL: test_div_fmas_f32:
351 ; GFX10_W64: ; %bb.0:
352 ; GFX10_W64-NEXT: s_clause 0x4
353 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
354 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c
355 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70
356 ; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x28
357 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
358 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
359 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
360 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
361 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
362 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
363 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1
364 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
365 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
366 ; GFX10_W64-NEXT: s_endpgm
368 ; GFX11_W32-LABEL: test_div_fmas_f32:
369 ; GFX11_W32: ; %bb.0:
370 ; GFX11_W32-NEXT: s_clause 0x4
371 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
372 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x4c
373 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x70
374 ; GFX11_W32-NEXT: s_load_b32 s5, s[0:1], 0x28
375 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
376 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
377 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
378 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s4
379 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
380 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s5, v0, v1
381 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
382 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
383 ; GFX11_W32-NEXT: s_nop 0
384 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
385 ; GFX11_W32-NEXT: s_endpgm
387 ; GFX11_W64-LABEL: test_div_fmas_f32:
388 ; GFX11_W64: ; %bb.0:
389 ; GFX11_W64-NEXT: s_clause 0x4
390 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
391 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x4c
392 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x70
393 ; GFX11_W64-NEXT: s_load_b32 s5, s[0:1], 0x28
394 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
395 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
396 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
397 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
398 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
399 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s4
400 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s5, v0, v1
401 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
402 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
403 ; GFX11_W64-NEXT: s_nop 0
404 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
405 ; GFX11_W64-NEXT: s_endpgm
406 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
407 store float %result, ptr addrspace(1) %out, align 4
411 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
412 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
414 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13
415 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c
416 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25
417 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
418 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
419 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
420 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
421 ; GFX7-NEXT: s_and_b32 s2, 1, s4
422 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
423 ; GFX7-NEXT: s_mov_b32 s2, -1
424 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
426 ; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1
427 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
428 ; GFX7-NEXT: s_endpgm
430 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
432 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c
433 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70
434 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
435 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
436 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
437 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
438 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
439 ; GFX8-NEXT: s_and_b32 s2, 1, s4
440 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
442 ; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1
443 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
444 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
445 ; GFX8-NEXT: flat_store_dword v[0:1], v2
446 ; GFX8-NEXT: s_endpgm
448 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
449 ; GFX10_W32: ; %bb.0:
450 ; GFX10_W32-NEXT: s_clause 0x3
451 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
452 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
453 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c
454 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
455 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
456 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
457 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
458 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
459 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
460 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
461 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
462 ; GFX10_W32-NEXT: s_endpgm
464 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
465 ; GFX10_W64: ; %bb.0:
466 ; GFX10_W64-NEXT: s_clause 0x3
467 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
468 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
469 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c
470 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
471 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
472 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
473 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
474 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
475 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
476 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
477 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
478 ; GFX10_W64-NEXT: s_endpgm
480 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0:
481 ; GFX11_W32: ; %bb.0:
482 ; GFX11_W32-NEXT: s_clause 0x3
483 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
484 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
485 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x4c
486 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
487 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
488 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
489 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
490 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
491 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
492 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
493 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
494 ; GFX11_W32-NEXT: s_nop 0
495 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
496 ; GFX11_W32-NEXT: s_endpgm
498 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0:
499 ; GFX11_W64: ; %bb.0:
500 ; GFX11_W64-NEXT: s_clause 0x3
501 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
502 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
503 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x4c
504 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
505 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
506 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
507 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
508 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
509 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
510 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
511 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
512 ; GFX11_W64-NEXT: s_nop 0
513 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
514 ; GFX11_W64-NEXT: s_endpgm
515 %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
516 store float %result, ptr addrspace(1) %out, align 4
520 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
521 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
523 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
524 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x4
525 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0xd
526 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
527 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
528 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
529 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
530 ; GFX7-NEXT: s_and_b32 s2, 1, s4
531 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
532 ; GFX7-NEXT: s_mov_b32 s2, -1
533 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
535 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1
536 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
537 ; GFX7-NEXT: s_endpgm
539 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
541 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x8
542 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x10
543 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34
544 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
545 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
546 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
547 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
548 ; GFX8-NEXT: s_and_b32 s2, 1, s4
549 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
551 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1
552 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
553 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
554 ; GFX8-NEXT: flat_store_dword v[0:1], v2
555 ; GFX8-NEXT: s_endpgm
557 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
558 ; GFX10_W32: ; %bb.0:
559 ; GFX10_W32-NEXT: s_clause 0x3
560 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x34
561 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x10
562 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x8
563 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
564 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
565 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
566 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
567 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
568 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
569 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
570 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
571 ; GFX10_W32-NEXT: s_endpgm
573 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
574 ; GFX10_W64: ; %bb.0:
575 ; GFX10_W64-NEXT: s_clause 0x3
576 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x34
577 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x10
578 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x8
579 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
580 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
581 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
582 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
583 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
584 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
585 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
586 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
587 ; GFX10_W64-NEXT: s_endpgm
589 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
590 ; GFX11_W32: ; %bb.0:
591 ; GFX11_W32-NEXT: s_clause 0x3
592 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x34
593 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x10
594 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x8
595 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
596 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
597 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
598 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
599 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
600 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
601 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
602 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
603 ; GFX11_W32-NEXT: s_nop 0
604 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
605 ; GFX11_W32-NEXT: s_endpgm
607 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
608 ; GFX11_W64: ; %bb.0:
609 ; GFX11_W64-NEXT: s_clause 0x3
610 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x34
611 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x10
612 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x8
613 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
614 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
615 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
616 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
617 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
618 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
619 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
620 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
621 ; GFX11_W64-NEXT: s_nop 0
622 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
623 ; GFX11_W64-NEXT: s_endpgm
624 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d)
625 store float %result, ptr addrspace(1) %out, align 4
629 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
630 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2:
632 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
633 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
634 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25
635 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
636 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
637 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
638 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
639 ; GFX7-NEXT: s_and_b32 s2, 1, s4
640 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
641 ; GFX7-NEXT: s_mov_b32 s2, -1
642 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
644 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, 1.0
645 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
646 ; GFX7-NEXT: s_endpgm
648 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2:
650 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
651 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
652 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
653 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
654 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
655 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
656 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
657 ; GFX8-NEXT: s_and_b32 s2, 1, s4
658 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
660 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, 1.0
661 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
662 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
663 ; GFX8-NEXT: flat_store_dword v[0:1], v2
664 ; GFX8-NEXT: s_endpgm
666 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2:
667 ; GFX10_W32: ; %bb.0:
668 ; GFX10_W32-NEXT: s_clause 0x3
669 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
670 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c
671 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
672 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
673 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
674 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
675 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
676 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
677 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
678 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0
679 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
680 ; GFX10_W32-NEXT: s_endpgm
682 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2:
683 ; GFX10_W64: ; %bb.0:
684 ; GFX10_W64-NEXT: s_clause 0x3
685 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
686 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c
687 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
688 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
689 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
690 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
691 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
692 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
693 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
694 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0
695 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
696 ; GFX10_W64-NEXT: s_endpgm
698 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2:
699 ; GFX11_W32: ; %bb.0:
700 ; GFX11_W32-NEXT: s_clause 0x3
701 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
702 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x4c
703 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
704 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
705 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
706 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
707 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
708 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
709 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
710 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
711 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
712 ; GFX11_W32-NEXT: s_nop 0
713 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
714 ; GFX11_W32-NEXT: s_endpgm
716 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2:
717 ; GFX11_W64: ; %bb.0:
718 ; GFX11_W64-NEXT: s_clause 0x3
719 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
720 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x4c
721 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
722 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
723 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
724 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
725 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
726 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
727 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
728 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
729 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
730 ; GFX11_W64-NEXT: s_nop 0
731 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
732 ; GFX11_W64-NEXT: s_endpgm
733 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d)
734 store float %result, ptr addrspace(1) %out, align 4
738 define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) {
739 ; GFX7-LABEL: test_div_fmas_f64:
741 ; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
742 ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x8
743 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
744 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
745 ; GFX7-NEXT: v_mov_b32_e32 v2, s8
746 ; GFX7-NEXT: v_mov_b32_e32 v4, s10
747 ; GFX7-NEXT: s_and_b32 s0, 1, s0
748 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
749 ; GFX7-NEXT: v_mov_b32_e32 v3, s9
750 ; GFX7-NEXT: v_mov_b32_e32 v5, s11
751 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
752 ; GFX7-NEXT: s_mov_b32 s6, -1
753 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
755 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
756 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
757 ; GFX7-NEXT: s_endpgm
759 ; GFX8-LABEL: test_div_fmas_f64:
761 ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
762 ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x20
763 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
764 ; GFX8-NEXT: v_mov_b32_e32 v0, s6
765 ; GFX8-NEXT: v_mov_b32_e32 v2, s8
766 ; GFX8-NEXT: v_mov_b32_e32 v4, s10
767 ; GFX8-NEXT: s_and_b32 s0, 1, s0
768 ; GFX8-NEXT: v_mov_b32_e32 v1, s7
769 ; GFX8-NEXT: v_mov_b32_e32 v3, s9
770 ; GFX8-NEXT: v_mov_b32_e32 v5, s11
771 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
773 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
774 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
775 ; GFX8-NEXT: v_mov_b32_e32 v3, s5
776 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
777 ; GFX8-NEXT: s_endpgm
779 ; GFX10_W32-LABEL: test_div_fmas_f64:
780 ; GFX10_W32: ; %bb.0:
781 ; GFX10_W32-NEXT: s_clause 0x1
782 ; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x20
783 ; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
784 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
785 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s2
786 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8
787 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, s10
788 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
789 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s9
790 ; GFX10_W32-NEXT: v_mov_b32_e32 v3, s11
791 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
792 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, 0
793 ; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
794 ; GFX10_W32-NEXT: s_endpgm
796 ; GFX10_W64-LABEL: test_div_fmas_f64:
797 ; GFX10_W64: ; %bb.0:
798 ; GFX10_W64-NEXT: s_clause 0x1
799 ; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x20
800 ; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
801 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
802 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s2
803 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8
804 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, s10
805 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
806 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s9
807 ; GFX10_W64-NEXT: v_mov_b32_e32 v3, s11
808 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
809 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, 0
810 ; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
811 ; GFX10_W64-NEXT: s_endpgm
813 ; GFX11_W32-LABEL: test_div_fmas_f64:
814 ; GFX11_W32: ; %bb.0:
815 ; GFX11_W32-NEXT: s_clause 0x1
816 ; GFX11_W32-NEXT: s_load_b32 s8, s[0:1], 0x20
817 ; GFX11_W32-NEXT: s_load_b256 s[0:7], s[0:1], 0x0
818 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
819 ; GFX11_W32-NEXT: s_and_b32 s8, 1, s8
820 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
821 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s8
822 ; GFX11_W32-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
823 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
824 ; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0
825 ; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1]
826 ; GFX11_W32-NEXT: s_nop 0
827 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
828 ; GFX11_W32-NEXT: s_endpgm
830 ; GFX11_W64-LABEL: test_div_fmas_f64:
831 ; GFX11_W64: ; %bb.0:
832 ; GFX11_W64-NEXT: s_clause 0x1
833 ; GFX11_W64-NEXT: s_load_b32 s8, s[0:1], 0x20
834 ; GFX11_W64-NEXT: s_load_b256 s[0:7], s[0:1], 0x0
835 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
836 ; GFX11_W64-NEXT: s_and_b32 s8, 1, s8
837 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4
838 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, s6
839 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s8
840 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5
841 ; GFX11_W64-NEXT: v_mov_b32_e32 v3, s7
842 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
843 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0
844 ; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1]
845 ; GFX11_W64-NEXT: s_nop 0
846 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
847 ; GFX11_W64-NEXT: s_endpgm
848 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
849 store double %result, ptr addrspace(1) %out, align 8
853 define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) {
854 ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc:
856 ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2
857 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
858 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
859 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
860 ; GFX7-NEXT: s_cmp_eq_u32 s7, 0
861 ; GFX7-NEXT: s_cselect_b32 s2, 1, 0
862 ; GFX7-NEXT: s_and_b32 s2, 1, s2
863 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
864 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
865 ; GFX7-NEXT: v_mov_b32_e32 v2, s6
866 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
867 ; GFX7-NEXT: s_mov_b32 s2, -1
869 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
870 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
871 ; GFX7-NEXT: s_endpgm
873 ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc:
875 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
876 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
877 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
878 ; GFX8-NEXT: s_cmp_eq_u32 s7, 0
879 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0
880 ; GFX8-NEXT: s_and_b32 s2, 1, s2
881 ; GFX8-NEXT: v_mov_b32_e32 v0, s4
882 ; GFX8-NEXT: v_mov_b32_e32 v1, s5
883 ; GFX8-NEXT: v_mov_b32_e32 v2, s6
884 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
886 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
887 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
888 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
889 ; GFX8-NEXT: flat_store_dword v[0:1], v2
890 ; GFX8-NEXT: s_endpgm
892 ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
893 ; GFX10_W32: ; %bb.0:
894 ; GFX10_W32-NEXT: s_clause 0x1
895 ; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
896 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
897 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
898 ; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0
899 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
900 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0
901 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
902 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0
903 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
904 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
905 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
906 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
907 ; GFX10_W32-NEXT: s_endpgm
909 ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
910 ; GFX10_W64: ; %bb.0:
911 ; GFX10_W64-NEXT: s_clause 0x1
912 ; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
913 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
914 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
915 ; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0
916 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
917 ; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0
918 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
919 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0
920 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
921 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
922 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
923 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
924 ; GFX10_W64-NEXT: s_endpgm
926 ; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
927 ; GFX11_W32: ; %bb.0:
928 ; GFX11_W32-NEXT: s_clause 0x1
929 ; GFX11_W32-NEXT: s_load_b128 s[4:7], s[0:1], 0x8
930 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
931 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
932 ; GFX11_W32-NEXT: s_cmp_eq_u32 s7, 0
933 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
934 ; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0
935 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
936 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
937 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
938 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
939 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
940 ; GFX11_W32-NEXT: s_nop 0
941 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
942 ; GFX11_W32-NEXT: s_endpgm
944 ; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
945 ; GFX11_W64: ; %bb.0:
946 ; GFX11_W64-NEXT: s_clause 0x1
947 ; GFX11_W64-NEXT: s_load_b128 s[4:7], s[0:1], 0x8
948 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
949 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
950 ; GFX11_W64-NEXT: s_cmp_eq_u32 s7, 0
951 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5
952 ; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0
953 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6
954 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
955 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
956 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
957 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
958 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
959 ; GFX11_W64-NEXT: s_nop 0
960 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
961 ; GFX11_W64-NEXT: s_endpgm
962 %cmp = icmp eq i32 %i, 0
963 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp)
964 store float %result, ptr addrspace(1) %out, align 4
968 define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
969 ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
971 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
972 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
973 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
974 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
975 ; GFX7-NEXT: s_mov_b64 vcc, 0
976 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
977 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
978 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
979 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
980 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
981 ; GFX7-NEXT: s_mov_b32 s2, -1
982 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
983 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
984 ; GFX7-NEXT: s_endpgm
986 ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
988 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
989 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
990 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
991 ; GFX8-NEXT: s_mov_b64 vcc, 0
992 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
993 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
994 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
995 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
996 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
997 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
998 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
999 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1000 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1001 ; GFX8-NEXT: s_endpgm
1003 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1004 ; GFX10_W32: ; %bb.0:
1005 ; GFX10_W32-NEXT: s_clause 0x3
1006 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
1007 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
1008 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
1009 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1010 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
1011 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1012 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4
1013 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5
1014 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1015 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1016 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
1017 ; GFX10_W32-NEXT: s_endpgm
1019 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1020 ; GFX10_W64: ; %bb.0:
1021 ; GFX10_W64-NEXT: s_clause 0x3
1022 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
1023 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
1024 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
1025 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1026 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0
1027 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1028 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4
1029 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5
1030 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1031 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1032 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
1033 ; GFX10_W64-NEXT: s_endpgm
1035 ; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1036 ; GFX11_W32: ; %bb.0:
1037 ; GFX11_W32-NEXT: s_clause 0x3
1038 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x4c
1039 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
1040 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
1041 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1042 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0
1043 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1044 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1045 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1046 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1047 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
1048 ; GFX11_W32-NEXT: s_nop 0
1049 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1050 ; GFX11_W32-NEXT: s_endpgm
1052 ; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1053 ; GFX11_W64: ; %bb.0:
1054 ; GFX11_W64-NEXT: s_clause 0x3
1055 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x4c
1056 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
1057 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
1058 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1059 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0
1060 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1061 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
1062 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
1063 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1064 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1065 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
1066 ; GFX11_W64-NEXT: s_nop 0
1067 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1068 ; GFX11_W64-NEXT: s_endpgm
1069 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false)
1070 store float %result, ptr addrspace(1) %out, align 4
1074 define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
1075 ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1077 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
1078 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
1079 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
1080 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1081 ; GFX7-NEXT: s_mov_b64 vcc, -1
1082 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1083 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
1084 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
1085 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
1086 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
1087 ; GFX7-NEXT: s_mov_b32 s2, -1
1088 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1089 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
1090 ; GFX7-NEXT: s_endpgm
1092 ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1094 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
1095 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
1096 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
1097 ; GFX8-NEXT: s_mov_b64 vcc, -1
1098 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1099 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1100 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
1101 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
1102 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
1103 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
1104 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1105 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1106 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1107 ; GFX8-NEXT: s_endpgm
1109 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1110 ; GFX10_W32: ; %bb.0:
1111 ; GFX10_W32-NEXT: s_clause 0x3
1112 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
1113 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
1114 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
1115 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1116 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1
1117 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1118 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4
1119 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5
1120 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1121 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1122 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
1123 ; GFX10_W32-NEXT: s_endpgm
1125 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1126 ; GFX10_W64: ; %bb.0:
1127 ; GFX10_W64-NEXT: s_clause 0x3
1128 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
1129 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
1130 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
1131 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1132 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1
1133 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1134 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4
1135 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5
1136 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1137 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1138 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
1139 ; GFX10_W64-NEXT: s_endpgm
1141 ; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1142 ; GFX11_W32: ; %bb.0:
1143 ; GFX11_W32-NEXT: s_clause 0x3
1144 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x4c
1145 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
1146 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
1147 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1148 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, -1
1149 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1150 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1151 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1152 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1153 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
1154 ; GFX11_W32-NEXT: s_nop 0
1155 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1156 ; GFX11_W32-NEXT: s_endpgm
1158 ; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1159 ; GFX11_W64: ; %bb.0:
1160 ; GFX11_W64-NEXT: s_clause 0x3
1161 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x4c
1162 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
1163 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
1164 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1165 ; GFX11_W64-NEXT: s_mov_b64 vcc, -1
1166 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1167 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
1168 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
1169 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1170 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1171 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
1172 ; GFX11_W64-NEXT: s_nop 0
1173 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1174 ; GFX11_W64-NEXT: s_endpgm
1175 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true)
1176 store float %result, ptr addrspace(1) %out, align 4
1180 define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) {
1181 ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1183 ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1184 ; GFX7-NEXT: s_load_dword s8, s[0:1], 0xc
1185 ; GFX7-NEXT: s_mov_b32 s2, 0
1186 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1187 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
1188 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1189 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1190 ; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
1191 ; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[0:3], 0 addr64 glc
1192 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1193 ; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4 glc
1194 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1195 ; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8 glc
1196 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1197 ; GFX7-NEXT: s_cmp_lg_u32 s8, 0
1198 ; GFX7-NEXT: s_cselect_b32 s0, 1, 0
1199 ; GFX7-NEXT: s_and_b32 s0, 1, s0
1200 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1201 ; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1202 ; GFX7-NEXT: s_mov_b32 s2, -1
1203 ; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1]
1204 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
1205 ; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1
1206 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1207 ; GFX7-NEXT: s_endpgm
1209 ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1211 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1212 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x30
1213 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1214 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1215 ; GFX8-NEXT: v_mov_b32_e32 v1, s6
1216 ; GFX8-NEXT: v_mov_b32_e32 v2, s7
1217 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1218 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1219 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v1
1220 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v2, vcc
1221 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 8, v1
1222 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
1223 ; GFX8-NEXT: flat_load_dword v1, v[1:2] glc
1224 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1225 ; GFX8-NEXT: flat_load_dword v2, v[3:4] glc
1226 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1227 ; GFX8-NEXT: flat_load_dword v3, v[5:6] glc
1228 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1229 ; GFX8-NEXT: s_add_u32 s0, s4, 8
1230 ; GFX8-NEXT: s_addc_u32 s1, s5, 0
1231 ; GFX8-NEXT: s_cmp_lg_u32 s2, 0
1232 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0
1233 ; GFX8-NEXT: s_and_b32 s2, 1, s2
1234 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1235 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
1236 ; GFX8-NEXT: s_and_b64 vcc, vcc, s[2:3]
1237 ; GFX8-NEXT: s_nop 1
1238 ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1239 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1240 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1241 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1242 ; GFX8-NEXT: s_endpgm
1244 ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1245 ; GFX10_W32: ; %bb.0:
1246 ; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1247 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1248 ; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x30
1249 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1250 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1251 ; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc
1252 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1253 ; GFX10_W32-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1254 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1255 ; GFX10_W32-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1256 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1257 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1258 ; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0
1259 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0
1260 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0
1261 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
1262 ; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
1263 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v4
1264 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[4:5] offset:8
1265 ; GFX10_W32-NEXT: s_endpgm
1267 ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1268 ; GFX10_W64: ; %bb.0:
1269 ; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1270 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1271 ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x30
1272 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1273 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1274 ; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc
1275 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1276 ; GFX10_W64-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1277 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1278 ; GFX10_W64-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1279 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1280 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1281 ; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0
1282 ; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0
1283 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0
1284 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1285 ; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[0:1]
1286 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v4
1287 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[4:5] offset:8
1288 ; GFX10_W64-NEXT: s_endpgm
1290 ; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1291 ; GFX11_W32: ; %bb.0:
1292 ; GFX11_W32-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
1293 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1294 ; GFX11_W32-NEXT: s_load_b32 s0, s[0:1], 0x30
1295 ; GFX11_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1296 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1297 ; GFX11_W32-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc
1298 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1299 ; GFX11_W32-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1300 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1301 ; GFX11_W32-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1302 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0
1304 ; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0
1305 ; GFX11_W32-NEXT: s_and_b32 s0, 1, s0
1306 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
1307 ; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
1308 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1
1309 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1310 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
1311 ; GFX11_W32-NEXT: s_nop 0
1312 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1313 ; GFX11_W32-NEXT: s_endpgm
1315 ; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1316 ; GFX11_W64: ; %bb.0:
1317 ; GFX11_W64-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
1318 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1319 ; GFX11_W64-NEXT: s_load_b32 s0, s[0:1], 0x30
1320 ; GFX11_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1321 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1322 ; GFX11_W64-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc
1323 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1324 ; GFX11_W64-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1325 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1326 ; GFX11_W64-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1327 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1328 ; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0
1329 ; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0
1330 ; GFX11_W64-NEXT: s_and_b32 s0, 1, s0
1331 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1332 ; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[0:1]
1333 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1
1334 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1335 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
1336 ; GFX11_W64-NEXT: s_nop 0
1337 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1338 ; GFX11_W64-NEXT: s_endpgm
1339 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1340 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1341 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1342 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1343 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1345 %a = load volatile float, ptr addrspace(1) %gep.a
1346 %b = load volatile float, ptr addrspace(1) %gep.b
1347 %c = load volatile float, ptr addrspace(1) %gep.c
1349 %cmp0 = icmp eq i32 %tid, 0
1350 %cmp1 = icmp ne i32 %d, 0
1351 %and = and i1 %cmp0, %cmp1
1353 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and)
1354 store float %result, ptr addrspace(1) %gep.out, align 4
1358 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) {
1359 ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
1360 ; GFX7: ; %bb.0: ; %entry
1361 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xa
1362 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1363 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
1364 ; GFX7-NEXT: s_mov_b32 s6, 0
1365 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1366 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1367 ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
1368 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1369 ; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
1370 ; GFX7-NEXT: s_cbranch_execz .LBB13_2
1371 ; GFX7-NEXT: ; %bb.1: ; %bb
1372 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x14
1373 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1374 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0
1375 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1376 ; GFX7-NEXT: s_cmp_lg_u32 s4, 0
1377 ; GFX7-NEXT: s_cselect_b32 s6, 1, 0
1378 ; GFX7-NEXT: .LBB13_2: ; %exit
1379 ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3]
1380 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1381 ; GFX7-NEXT: s_and_b32 s0, 1, s6
1382 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
1383 ; GFX7-NEXT: s_mov_b32 s6, -1
1384 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1385 ; GFX7-NEXT: s_nop 1
1386 ; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1387 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1388 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1389 ; GFX7-NEXT: s_endpgm
1391 ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
1392 ; GFX8: ; %bb.0: ; %entry
1393 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1394 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1395 ; GFX8-NEXT: s_mov_b32 s4, 0
1396 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1397 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
1398 ; GFX8-NEXT: v_mov_b32_e32 v2, s3
1399 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1400 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1401 ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2]
1402 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1403 ; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
1404 ; GFX8-NEXT: s_cbranch_execz .LBB13_2
1405 ; GFX8-NEXT: ; %bb.1: ; %bb
1406 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1407 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1408 ; GFX8-NEXT: s_load_dword s4, s[4:5], 0x0
1409 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1410 ; GFX8-NEXT: s_cmp_lg_u32 s4, 0
1411 ; GFX8-NEXT: s_cselect_b32 s4, 1, 0
1412 ; GFX8-NEXT: .LBB13_2: ; %exit
1413 ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
1414 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1415 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1416 ; GFX8-NEXT: s_add_u32 s0, s0, 8
1417 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
1418 ; GFX8-NEXT: s_and_b32 s2, 1, s4
1419 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1420 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1421 ; GFX8-NEXT: s_nop 2
1422 ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1423 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1424 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1425 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1426 ; GFX8-NEXT: s_endpgm
1428 ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1429 ; GFX10_W32: ; %bb.0: ; %entry
1430 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1431 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1432 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1433 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1434 ; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3]
1435 ; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3
1436 ; GFX10_W32-NEXT: s_mov_b32 s2, 0
1437 ; GFX10_W32-NEXT: s_and_saveexec_b32 s3, vcc_lo
1438 ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2
1439 ; GFX10_W32-NEXT: ; %bb.1: ; %bb
1440 ; GFX10_W32-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1441 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1442 ; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x0
1443 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1444 ; GFX10_W32-NEXT: s_cmp_lg_u32 s2, 0
1445 ; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0
1446 ; GFX10_W32-NEXT: .LBB13_2: ; %exit
1447 ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s3
1448 ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1449 ; GFX10_W32-NEXT: s_and_b32 s2, 1, s2
1450 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
1451 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1452 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1453 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1454 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1455 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8
1456 ; GFX10_W32-NEXT: s_endpgm
1458 ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1459 ; GFX10_W64: ; %bb.0: ; %entry
1460 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1461 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1462 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1463 ; GFX10_W64-NEXT: s_mov_b32 s4, 0
1464 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1465 ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3]
1466 ; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3
1467 ; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], vcc
1468 ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2
1469 ; GFX10_W64-NEXT: ; %bb.1: ; %bb
1470 ; GFX10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1471 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1472 ; GFX10_W64-NEXT: s_load_dword s4, s[4:5], 0x0
1473 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1474 ; GFX10_W64-NEXT: s_cmp_lg_u32 s4, 0
1475 ; GFX10_W64-NEXT: s_cselect_b32 s4, 1, 0
1476 ; GFX10_W64-NEXT: .LBB13_2: ; %exit
1477 ; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1478 ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1479 ; GFX10_W64-NEXT: s_and_b32 s2, 1, s4
1480 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1481 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1482 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1483 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1484 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1485 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8
1486 ; GFX10_W64-NEXT: s_endpgm
1488 ; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1489 ; GFX11_W32: ; %bb.0: ; %entry
1490 ; GFX11_W32-NEXT: s_load_b64 s[2:3], s[0:1], 0x28
1491 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1492 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1493 ; GFX11_W32-NEXT: global_load_b96 v[1:3], v1, s[2:3]
1494 ; GFX11_W32-NEXT: s_mov_b32 s2, 0
1495 ; GFX11_W32-NEXT: s_mov_b32 s3, exec_lo
1496 ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v0
1497 ; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2
1498 ; GFX11_W32-NEXT: ; %bb.1: ; %bb
1499 ; GFX11_W32-NEXT: s_load_b64 s[4:5], s[0:1], 0x50
1500 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1501 ; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x0
1502 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1503 ; GFX11_W32-NEXT: s_cmp_lg_u32 s2, 0
1504 ; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0
1505 ; GFX11_W32-NEXT: .LBB13_2: ; %exit
1506 ; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s3
1507 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1508 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
1509 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
1510 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1511 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1512 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1513 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1514 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
1515 ; GFX11_W32-NEXT: s_nop 0
1516 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1517 ; GFX11_W32-NEXT: s_endpgm
1519 ; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1520 ; GFX11_W64: ; %bb.0: ; %entry
1521 ; GFX11_W64-NEXT: s_load_b64 s[2:3], s[0:1], 0x28
1522 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1523 ; GFX11_W64-NEXT: s_mov_b32 s4, 0
1524 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1525 ; GFX11_W64-NEXT: global_load_b96 v[1:3], v1, s[2:3]
1526 ; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec
1527 ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v0
1528 ; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2
1529 ; GFX11_W64-NEXT: ; %bb.1: ; %bb
1530 ; GFX11_W64-NEXT: s_load_b64 s[4:5], s[0:1], 0x50
1531 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1532 ; GFX11_W64-NEXT: s_load_b32 s4, s[4:5], 0x0
1533 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1534 ; GFX11_W64-NEXT: s_cmp_lg_u32 s4, 0
1535 ; GFX11_W64-NEXT: s_cselect_b32 s4, 1, 0
1536 ; GFX11_W64-NEXT: .LBB13_2: ; %exit
1537 ; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1538 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1539 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s4
1540 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1541 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1542 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1543 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1544 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1545 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
1546 ; GFX11_W64-NEXT: s_nop 0
1547 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1548 ; GFX11_W64-NEXT: s_endpgm
1550 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1551 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1552 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1553 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1555 %a = load float, ptr addrspace(1) %gep.a
1556 %b = load float, ptr addrspace(1) %gep.b
1557 %c = load float, ptr addrspace(1) %gep.c
1559 %cmp0 = icmp eq i32 %tid, 0
1560 br i1 %cmp0, label %bb, label %exit
1563 %val = load i32, ptr addrspace(1) %dummy
1564 %cmp1 = icmp ne i32 %val, 0
1568 %cond = phi i1 [false, %entry], [%cmp1, %bb]
1569 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1570 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond)
1571 store float %result, ptr addrspace(1) %gep.out, align 4
1575 declare i32 @llvm.amdgcn.workitem.id.x() #0
1576 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0
1577 declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0
1579 attributes #0 = { nounwind readnone speculatable }