1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10_W32 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
9 define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
10 ; GFX7-LABEL: v_div_fmas_f32:
12 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GFX7-NEXT: v_and_b32_e32 v3, 1, v3
14 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
16 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
17 ; GFX7-NEXT: s_setpc_b64 s[30:31]
19 ; GFX8-LABEL: v_div_fmas_f32:
21 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX8-NEXT: v_and_b32_e32 v3, 1, v3
23 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
25 ; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2
26 ; GFX8-NEXT: s_setpc_b64 s[30:31]
28 ; GFX10_W32-LABEL: v_div_fmas_f32:
30 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX10_W32-NEXT: v_and_b32_e32 v3, 1, v3
32 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
33 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
34 ; GFX10_W32-NEXT: s_setpc_b64 s[30:31]
36 ; GFX10_W64-LABEL: v_div_fmas_f32:
38 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX10_W64-NEXT: v_and_b32_e32 v3, 1, v3
40 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
41 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
42 ; GFX10_W64-NEXT: s_setpc_b64 s[30:31]
44 ; GFX11_W32-LABEL: v_div_fmas_f32:
46 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX11_W32-NEXT: v_and_b32_e32 v3, 1, v3
48 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
49 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
50 ; GFX11_W32-NEXT: s_setpc_b64 s[30:31]
52 ; GFX11_W64-LABEL: v_div_fmas_f32:
54 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX11_W64-NEXT: v_and_b32_e32 v3, 1, v3
56 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
57 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
58 ; GFX11_W64-NEXT: s_setpc_b64 s[30:31]
59 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
63 define double @v_div_fmas_f64(double %a, double %b, double %c, i1 %d) {
64 ; GFX7-LABEL: v_div_fmas_f64:
66 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX7-NEXT: v_and_b32_e32 v6, 1, v6
68 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
70 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
71 ; GFX7-NEXT: s_setpc_b64 s[30:31]
73 ; GFX8-LABEL: v_div_fmas_f64:
75 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX8-NEXT: v_and_b32_e32 v6, 1, v6
77 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
79 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
80 ; GFX8-NEXT: s_setpc_b64 s[30:31]
82 ; GFX10_W32-LABEL: v_div_fmas_f64:
84 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX10_W32-NEXT: v_and_b32_e32 v6, 1, v6
86 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
87 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
88 ; GFX10_W32-NEXT: s_setpc_b64 s[30:31]
90 ; GFX10_W64-LABEL: v_div_fmas_f64:
92 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX10_W64-NEXT: v_and_b32_e32 v6, 1, v6
94 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
95 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
96 ; GFX10_W64-NEXT: s_setpc_b64 s[30:31]
98 ; GFX11_W32-LABEL: v_div_fmas_f64:
100 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX11_W32-NEXT: v_and_b32_e32 v6, 1, v6
102 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
103 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
104 ; GFX11_W32-NEXT: s_setpc_b64 s[30:31]
106 ; GFX11_W64-LABEL: v_div_fmas_f64:
107 ; GFX11_W64: ; %bb.0:
108 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX11_W64-NEXT: v_and_b32_e32 v6, 1, v6
110 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
111 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
112 ; GFX11_W64-NEXT: s_setpc_b64 s[30:31]
113 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
117 define amdgpu_ps float @s_div_fmas_f32(float inreg %a, float inreg %b, float inreg %c, i32 inreg %d) {
118 ; GFX7-LABEL: s_div_fmas_f32:
120 ; GFX7-NEXT: s_cmp_eq_u32 s3, 0
121 ; GFX7-NEXT: s_cselect_b32 s3, 1, 0
122 ; GFX7-NEXT: v_mov_b32_e32 v0, s0
123 ; GFX7-NEXT: s_and_b32 s0, 1, s3
124 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
125 ; GFX7-NEXT: v_mov_b32_e32 v2, s2
126 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
128 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
129 ; GFX7-NEXT: ; return to shader part epilog
131 ; GFX8-LABEL: s_div_fmas_f32:
133 ; GFX8-NEXT: s_cmp_eq_u32 s3, 0
134 ; GFX8-NEXT: s_cselect_b32 s3, 1, 0
135 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
136 ; GFX8-NEXT: s_and_b32 s0, 1, s3
137 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
138 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
139 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
141 ; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2
142 ; GFX8-NEXT: ; return to shader part epilog
144 ; GFX10_W32-LABEL: s_div_fmas_f32:
145 ; GFX10_W32: ; %bb.0:
146 ; GFX10_W32-NEXT: s_cmp_eq_u32 s3, 0
147 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s1
148 ; GFX10_W32-NEXT: s_cselect_b32 s3, 1, 0
149 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s2
150 ; GFX10_W32-NEXT: s_and_b32 s3, 1, s3
151 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3
152 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1
153 ; GFX10_W32-NEXT: ; return to shader part epilog
155 ; GFX10_W64-LABEL: s_div_fmas_f32:
156 ; GFX10_W64: ; %bb.0:
157 ; GFX10_W64-NEXT: s_cmp_eq_u32 s3, 0
158 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s1
159 ; GFX10_W64-NEXT: s_cselect_b32 s3, 1, 0
160 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s2
161 ; GFX10_W64-NEXT: s_and_b32 s3, 1, s3
162 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3
163 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1
164 ; GFX10_W64-NEXT: ; return to shader part epilog
166 ; GFX11_W32-LABEL: s_div_fmas_f32:
167 ; GFX11_W32: ; %bb.0:
168 ; GFX11_W32-NEXT: s_cmp_eq_u32 s3, 0
169 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
170 ; GFX11_W32-NEXT: s_cselect_b32 s3, 1, 0
171 ; GFX11_W32-NEXT: s_and_b32 s3, 1, s3
172 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3
173 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1
174 ; GFX11_W32-NEXT: ; return to shader part epilog
176 ; GFX11_W64-LABEL: s_div_fmas_f32:
177 ; GFX11_W64: ; %bb.0:
178 ; GFX11_W64-NEXT: s_cmp_eq_u32 s3, 0
179 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s1
180 ; GFX11_W64-NEXT: s_cselect_b32 s3, 1, 0
181 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s2
182 ; GFX11_W64-NEXT: s_and_b32 s3, 1, s3
183 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3
184 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1
185 ; GFX11_W64-NEXT: ; return to shader part epilog
186 %vcc = icmp eq i32 %d, 0
187 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %vcc)
191 define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double inreg %c, i32 inreg %d) {
192 ; GFX7-LABEL: s_div_fmas_f64:
194 ; GFX7-NEXT: s_cmp_eq_u32 s6, 0
195 ; GFX7-NEXT: s_cselect_b32 s6, 1, 0
196 ; GFX7-NEXT: v_mov_b32_e32 v0, s0
197 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
198 ; GFX7-NEXT: v_mov_b32_e32 v2, s2
199 ; GFX7-NEXT: v_mov_b32_e32 v4, s4
200 ; GFX7-NEXT: s_and_b32 s0, 1, s6
201 ; GFX7-NEXT: v_mov_b32_e32 v3, s3
202 ; GFX7-NEXT: v_mov_b32_e32 v5, s5
203 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
205 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
206 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
207 ; GFX7-NEXT: v_readfirstlane_b32 s1, v1
208 ; GFX7-NEXT: ; return to shader part epilog
210 ; GFX8-LABEL: s_div_fmas_f64:
212 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
213 ; GFX8-NEXT: s_cselect_b32 s6, 1, 0
214 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
215 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
216 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
217 ; GFX8-NEXT: v_mov_b32_e32 v4, s4
218 ; GFX8-NEXT: s_and_b32 s0, 1, s6
219 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
220 ; GFX8-NEXT: v_mov_b32_e32 v5, s5
221 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
223 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
224 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
225 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
226 ; GFX8-NEXT: ; return to shader part epilog
228 ; GFX10_W32-LABEL: s_div_fmas_f64:
229 ; GFX10_W32: ; %bb.0:
230 ; GFX10_W32-NEXT: s_cmp_eq_u32 s6, 0
231 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2
232 ; GFX10_W32-NEXT: s_cselect_b32 s6, 1, 0
233 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, s4
234 ; GFX10_W32-NEXT: s_and_b32 s6, 1, s6
235 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3
236 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
237 ; GFX10_W32-NEXT: v_mov_b32_e32 v3, s5
238 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
239 ; GFX10_W32-NEXT: v_readfirstlane_b32 s0, v0
240 ; GFX10_W32-NEXT: v_readfirstlane_b32 s1, v1
241 ; GFX10_W32-NEXT: ; return to shader part epilog
243 ; GFX10_W64-LABEL: s_div_fmas_f64:
244 ; GFX10_W64: ; %bb.0:
245 ; GFX10_W64-NEXT: s_cmp_eq_u32 s6, 0
246 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2
247 ; GFX10_W64-NEXT: s_cselect_b32 s6, 1, 0
248 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, s4
249 ; GFX10_W64-NEXT: s_and_b32 s6, 1, s6
250 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3
251 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6
252 ; GFX10_W64-NEXT: v_mov_b32_e32 v3, s5
253 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
254 ; GFX10_W64-NEXT: v_readfirstlane_b32 s0, v0
255 ; GFX10_W64-NEXT: v_readfirstlane_b32 s1, v1
256 ; GFX10_W64-NEXT: ; return to shader part epilog
258 ; GFX11_W32-LABEL: s_div_fmas_f64:
259 ; GFX11_W32: ; %bb.0:
260 ; GFX11_W32-NEXT: s_cmp_eq_u32 s6, 0
261 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
262 ; GFX11_W32-NEXT: s_cselect_b32 s6, 1, 0
263 ; GFX11_W32-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
264 ; GFX11_W32-NEXT: s_and_b32 s6, 1, s6
265 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
266 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
267 ; GFX11_W32-NEXT: v_readfirstlane_b32 s0, v0
268 ; GFX11_W32-NEXT: v_readfirstlane_b32 s1, v1
269 ; GFX11_W32-NEXT: ; return to shader part epilog
271 ; GFX11_W64-LABEL: s_div_fmas_f64:
272 ; GFX11_W64: ; %bb.0:
273 ; GFX11_W64-NEXT: s_cmp_eq_u32 s6, 0
274 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
275 ; GFX11_W64-NEXT: s_cselect_b32 s6, 1, 0
276 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, s4
277 ; GFX11_W64-NEXT: s_and_b32 s6, 1, s6
278 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
279 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6
280 ; GFX11_W64-NEXT: v_mov_b32_e32 v3, s5
281 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
282 ; GFX11_W64-NEXT: v_readfirstlane_b32 s0, v0
283 ; GFX11_W64-NEXT: v_readfirstlane_b32 s1, v1
284 ; GFX11_W64-NEXT: ; return to shader part epilog
285 %vcc = icmp eq i32 %d, 0
286 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %vcc)
290 define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
291 ; GFX7-LABEL: test_div_fmas_f32:
293 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
294 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
295 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
296 ; GFX7-NEXT: s_load_dword s5, s[0:1], 0x25
297 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
298 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
299 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
300 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
301 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
302 ; GFX7-NEXT: s_and_b32 s2, 1, s5
303 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
304 ; GFX7-NEXT: s_mov_b32 s2, -1
305 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
307 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
308 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
309 ; GFX7-NEXT: s_endpgm
311 ; GFX8-LABEL: test_div_fmas_f32:
313 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
314 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
315 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
316 ; GFX8-NEXT: s_load_dword s5, s[0:1], 0x94
317 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
318 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
319 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
320 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
321 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
322 ; GFX8-NEXT: s_and_b32 s2, 1, s5
323 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
325 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
326 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
327 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
328 ; GFX8-NEXT: flat_store_dword v[0:1], v2
329 ; GFX8-NEXT: s_endpgm
331 ; GFX10_W32-LABEL: test_div_fmas_f32:
332 ; GFX10_W32: ; %bb.0:
333 ; GFX10_W32-NEXT: s_clause 0x4
334 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
335 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c
336 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70
337 ; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x28
338 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
339 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
340 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
341 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
342 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
343 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
344 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1
345 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
346 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
347 ; GFX10_W32-NEXT: s_endpgm
349 ; GFX10_W64-LABEL: test_div_fmas_f32:
350 ; GFX10_W64: ; %bb.0:
351 ; GFX10_W64-NEXT: s_clause 0x4
352 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
353 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c
354 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70
355 ; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x28
356 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
357 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
358 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
359 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
360 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
361 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
362 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1
363 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
364 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
365 ; GFX10_W64-NEXT: s_endpgm
367 ; GFX11_W32-LABEL: test_div_fmas_f32:
368 ; GFX11_W32: ; %bb.0:
369 ; GFX11_W32-NEXT: s_clause 0x4
370 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
371 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x4c
372 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x70
373 ; GFX11_W32-NEXT: s_load_b32 s5, s[0:1], 0x28
374 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
375 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
376 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
377 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s4
378 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
379 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s5, v0, v1
380 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
381 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
382 ; GFX11_W32-NEXT: s_nop 0
383 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
384 ; GFX11_W32-NEXT: s_endpgm
386 ; GFX11_W64-LABEL: test_div_fmas_f32:
387 ; GFX11_W64: ; %bb.0:
388 ; GFX11_W64-NEXT: s_clause 0x4
389 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
390 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x4c
391 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x70
392 ; GFX11_W64-NEXT: s_load_b32 s5, s[0:1], 0x28
393 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
394 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
395 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
396 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
397 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
398 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s4
399 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s5, v0, v1
400 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
401 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
402 ; GFX11_W64-NEXT: s_nop 0
403 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
404 ; GFX11_W64-NEXT: s_endpgm
405 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
406 store float %result, ptr addrspace(1) %out, align 4
410 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
411 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
413 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13
414 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c
415 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25
416 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
417 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
418 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
419 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
420 ; GFX7-NEXT: s_and_b32 s2, 1, s4
421 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
422 ; GFX7-NEXT: s_mov_b32 s2, -1
423 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
425 ; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1
426 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
427 ; GFX7-NEXT: s_endpgm
429 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
431 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c
432 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70
433 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
434 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
435 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
436 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
437 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
438 ; GFX8-NEXT: s_and_b32 s2, 1, s4
439 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
441 ; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1
442 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
443 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
444 ; GFX8-NEXT: flat_store_dword v[0:1], v2
445 ; GFX8-NEXT: s_endpgm
447 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
448 ; GFX10_W32: ; %bb.0:
449 ; GFX10_W32-NEXT: s_clause 0x3
450 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
451 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
452 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c
453 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
454 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
455 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
456 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
457 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
458 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
459 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
460 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
461 ; GFX10_W32-NEXT: s_endpgm
463 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
464 ; GFX10_W64: ; %bb.0:
465 ; GFX10_W64-NEXT: s_clause 0x3
466 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
467 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
468 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c
469 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
470 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
471 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
472 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
473 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
474 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
475 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
476 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
477 ; GFX10_W64-NEXT: s_endpgm
479 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0:
480 ; GFX11_W32: ; %bb.0:
481 ; GFX11_W32-NEXT: s_clause 0x3
482 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
483 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
484 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x4c
485 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
486 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
487 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
488 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
489 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
490 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
491 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
492 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
493 ; GFX11_W32-NEXT: s_nop 0
494 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
495 ; GFX11_W32-NEXT: s_endpgm
497 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0:
498 ; GFX11_W64: ; %bb.0:
499 ; GFX11_W64-NEXT: s_clause 0x3
500 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
501 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
502 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x4c
503 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
504 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
505 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
506 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
507 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
508 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
509 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
510 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
511 ; GFX11_W64-NEXT: s_nop 0
512 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
513 ; GFX11_W64-NEXT: s_endpgm
514 %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
515 store float %result, ptr addrspace(1) %out, align 4
519 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
520 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
522 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
523 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x4
524 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0xd
525 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
526 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
527 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
528 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
529 ; GFX7-NEXT: s_and_b32 s2, 1, s4
530 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
531 ; GFX7-NEXT: s_mov_b32 s2, -1
532 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
534 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1
535 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
536 ; GFX7-NEXT: s_endpgm
538 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
540 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x8
541 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x10
542 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34
543 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
544 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
545 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
546 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
547 ; GFX8-NEXT: s_and_b32 s2, 1, s4
548 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
550 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1
551 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
552 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
553 ; GFX8-NEXT: flat_store_dword v[0:1], v2
554 ; GFX8-NEXT: s_endpgm
556 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
557 ; GFX10_W32: ; %bb.0:
558 ; GFX10_W32-NEXT: s_clause 0x3
559 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x34
560 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x10
561 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x8
562 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
563 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
564 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
565 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
566 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
567 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
568 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
569 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
570 ; GFX10_W32-NEXT: s_endpgm
572 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
573 ; GFX10_W64: ; %bb.0:
574 ; GFX10_W64-NEXT: s_clause 0x3
575 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x34
576 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x10
577 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x8
578 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
579 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
580 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
581 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
582 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
583 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
584 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
585 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
586 ; GFX10_W64-NEXT: s_endpgm
588 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
589 ; GFX11_W32: ; %bb.0:
590 ; GFX11_W32-NEXT: s_clause 0x3
591 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x34
592 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x10
593 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x8
594 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
595 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
596 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
597 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
598 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
599 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
600 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
601 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
602 ; GFX11_W32-NEXT: s_nop 0
603 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
604 ; GFX11_W32-NEXT: s_endpgm
606 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
607 ; GFX11_W64: ; %bb.0:
608 ; GFX11_W64-NEXT: s_clause 0x3
609 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x34
610 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x10
611 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x8
612 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
613 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
614 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
615 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
616 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
617 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
618 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
619 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
620 ; GFX11_W64-NEXT: s_nop 0
621 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
622 ; GFX11_W64-NEXT: s_endpgm
623 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d)
624 store float %result, ptr addrspace(1) %out, align 4
628 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
629 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2:
631 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
632 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
633 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25
634 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
635 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
636 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
637 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
638 ; GFX7-NEXT: s_and_b32 s2, 1, s4
639 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
640 ; GFX7-NEXT: s_mov_b32 s2, -1
641 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
643 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, 1.0
644 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
645 ; GFX7-NEXT: s_endpgm
647 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2:
649 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
650 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
651 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
652 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
653 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
654 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
655 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
656 ; GFX8-NEXT: s_and_b32 s2, 1, s4
657 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
659 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, 1.0
660 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
661 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
662 ; GFX8-NEXT: flat_store_dword v[0:1], v2
663 ; GFX8-NEXT: s_endpgm
665 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2:
666 ; GFX10_W32: ; %bb.0:
667 ; GFX10_W32-NEXT: s_clause 0x3
668 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94
669 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c
670 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
671 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
672 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
673 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
674 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4
675 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
676 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
677 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0
678 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
679 ; GFX10_W32-NEXT: s_endpgm
681 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2:
682 ; GFX10_W64: ; %bb.0:
683 ; GFX10_W64-NEXT: s_clause 0x3
684 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94
685 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c
686 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
687 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
688 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
689 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
690 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4
691 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
692 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
693 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0
694 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
695 ; GFX10_W64-NEXT: s_endpgm
697 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2:
698 ; GFX11_W32: ; %bb.0:
699 ; GFX11_W32-NEXT: s_clause 0x3
700 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x94
701 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x4c
702 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
703 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
704 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
705 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
706 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
707 ; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
708 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
709 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
710 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
711 ; GFX11_W32-NEXT: s_nop 0
712 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
713 ; GFX11_W32-NEXT: s_endpgm
715 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2:
716 ; GFX11_W64: ; %bb.0:
717 ; GFX11_W64-NEXT: s_clause 0x3
718 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x94
719 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x4c
720 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
721 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
722 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
723 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
724 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
725 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
726 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
727 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
728 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
729 ; GFX11_W64-NEXT: s_nop 0
730 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
731 ; GFX11_W64-NEXT: s_endpgm
732 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d)
733 store float %result, ptr addrspace(1) %out, align 4
737 define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) {
738 ; GFX7-LABEL: test_div_fmas_f64:
740 ; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
741 ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x8
742 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
743 ; GFX7-NEXT: v_mov_b32_e32 v0, s6
744 ; GFX7-NEXT: v_mov_b32_e32 v2, s8
745 ; GFX7-NEXT: v_mov_b32_e32 v4, s10
746 ; GFX7-NEXT: s_and_b32 s0, 1, s0
747 ; GFX7-NEXT: v_mov_b32_e32 v1, s7
748 ; GFX7-NEXT: v_mov_b32_e32 v3, s9
749 ; GFX7-NEXT: v_mov_b32_e32 v5, s11
750 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
751 ; GFX7-NEXT: s_mov_b32 s6, -1
752 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
754 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
755 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
756 ; GFX7-NEXT: s_endpgm
758 ; GFX8-LABEL: test_div_fmas_f64:
760 ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
761 ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x20
762 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
763 ; GFX8-NEXT: v_mov_b32_e32 v0, s6
764 ; GFX8-NEXT: v_mov_b32_e32 v2, s8
765 ; GFX8-NEXT: v_mov_b32_e32 v4, s10
766 ; GFX8-NEXT: s_and_b32 s0, 1, s0
767 ; GFX8-NEXT: v_mov_b32_e32 v1, s7
768 ; GFX8-NEXT: v_mov_b32_e32 v3, s9
769 ; GFX8-NEXT: v_mov_b32_e32 v5, s11
770 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
772 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
773 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
774 ; GFX8-NEXT: v_mov_b32_e32 v3, s5
775 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
776 ; GFX8-NEXT: s_endpgm
778 ; GFX10_W32-LABEL: test_div_fmas_f64:
779 ; GFX10_W32: ; %bb.0:
780 ; GFX10_W32-NEXT: s_clause 0x1
781 ; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x20
782 ; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
783 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
784 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s2
785 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8
786 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, s10
787 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
788 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s9
789 ; GFX10_W32-NEXT: v_mov_b32_e32 v3, s11
790 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
791 ; GFX10_W32-NEXT: v_mov_b32_e32 v2, 0
792 ; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
793 ; GFX10_W32-NEXT: s_endpgm
795 ; GFX10_W64-LABEL: test_div_fmas_f64:
796 ; GFX10_W64: ; %bb.0:
797 ; GFX10_W64-NEXT: s_clause 0x1
798 ; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x20
799 ; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
800 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
801 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s2
802 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8
803 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, s10
804 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
805 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s9
806 ; GFX10_W64-NEXT: v_mov_b32_e32 v3, s11
807 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
808 ; GFX10_W64-NEXT: v_mov_b32_e32 v2, 0
809 ; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
810 ; GFX10_W64-NEXT: s_endpgm
812 ; GFX11_W32-LABEL: test_div_fmas_f64:
813 ; GFX11_W32: ; %bb.0:
814 ; GFX11_W32-NEXT: s_clause 0x1
815 ; GFX11_W32-NEXT: s_load_b32 s8, s[0:1], 0x20
816 ; GFX11_W32-NEXT: s_load_b256 s[0:7], s[0:1], 0x0
817 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
818 ; GFX11_W32-NEXT: s_and_b32 s8, 1, s8
819 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
820 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s8
821 ; GFX11_W32-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
822 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
823 ; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0
824 ; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1]
825 ; GFX11_W32-NEXT: s_nop 0
826 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
827 ; GFX11_W32-NEXT: s_endpgm
829 ; GFX11_W64-LABEL: test_div_fmas_f64:
830 ; GFX11_W64: ; %bb.0:
831 ; GFX11_W64-NEXT: s_clause 0x1
832 ; GFX11_W64-NEXT: s_load_b32 s8, s[0:1], 0x20
833 ; GFX11_W64-NEXT: s_load_b256 s[0:7], s[0:1], 0x0
834 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
835 ; GFX11_W64-NEXT: s_and_b32 s8, 1, s8
836 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4
837 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, s6
838 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s8
839 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5
840 ; GFX11_W64-NEXT: v_mov_b32_e32 v3, s7
841 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
842 ; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0
843 ; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1]
844 ; GFX11_W64-NEXT: s_nop 0
845 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
846 ; GFX11_W64-NEXT: s_endpgm
847 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
848 store double %result, ptr addrspace(1) %out, align 8
852 define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) {
853 ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc:
855 ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2
856 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
857 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
858 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
859 ; GFX7-NEXT: s_cmp_eq_u32 s7, 0
860 ; GFX7-NEXT: s_cselect_b32 s2, 1, 0
861 ; GFX7-NEXT: s_and_b32 s2, 1, s2
862 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
863 ; GFX7-NEXT: v_mov_b32_e32 v1, s5
864 ; GFX7-NEXT: v_mov_b32_e32 v2, s6
865 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
866 ; GFX7-NEXT: s_mov_b32 s2, -1
868 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
869 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
870 ; GFX7-NEXT: s_endpgm
872 ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc:
874 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
875 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
876 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
877 ; GFX8-NEXT: s_cmp_eq_u32 s7, 0
878 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0
879 ; GFX8-NEXT: s_and_b32 s2, 1, s2
880 ; GFX8-NEXT: v_mov_b32_e32 v0, s4
881 ; GFX8-NEXT: v_mov_b32_e32 v1, s5
882 ; GFX8-NEXT: v_mov_b32_e32 v2, s6
883 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
885 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
886 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
887 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
888 ; GFX8-NEXT: flat_store_dword v[0:1], v2
889 ; GFX8-NEXT: s_endpgm
891 ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
892 ; GFX10_W32: ; %bb.0:
893 ; GFX10_W32-NEXT: s_clause 0x1
894 ; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
895 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
896 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
897 ; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0
898 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
899 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0
900 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
901 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0
902 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
903 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
904 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
905 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
906 ; GFX10_W32-NEXT: s_endpgm
908 ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
909 ; GFX10_W64: ; %bb.0:
910 ; GFX10_W64-NEXT: s_clause 0x1
911 ; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
912 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
913 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
914 ; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0
915 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
916 ; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0
917 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
918 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0
919 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
920 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
921 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
922 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
923 ; GFX10_W64-NEXT: s_endpgm
925 ; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
926 ; GFX11_W32: ; %bb.0:
927 ; GFX11_W32-NEXT: s_clause 0x1
928 ; GFX11_W32-NEXT: s_load_b128 s[4:7], s[0:1], 0x8
929 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
930 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
931 ; GFX11_W32-NEXT: s_cmp_eq_u32 s7, 0
932 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
933 ; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0
934 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
935 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
936 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
937 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
938 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
939 ; GFX11_W32-NEXT: s_nop 0
940 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
941 ; GFX11_W32-NEXT: s_endpgm
943 ; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
944 ; GFX11_W64: ; %bb.0:
945 ; GFX11_W64-NEXT: s_clause 0x1
946 ; GFX11_W64-NEXT: s_load_b128 s[4:7], s[0:1], 0x8
947 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
948 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
949 ; GFX11_W64-NEXT: s_cmp_eq_u32 s7, 0
950 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5
951 ; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0
952 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6
953 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
954 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
955 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
956 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
957 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
958 ; GFX11_W64-NEXT: s_nop 0
959 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
960 ; GFX11_W64-NEXT: s_endpgm
961 %cmp = icmp eq i32 %i, 0
962 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp)
963 store float %result, ptr addrspace(1) %out, align 4
967 define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
968 ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
970 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
971 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
972 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
973 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
974 ; GFX7-NEXT: s_mov_b64 vcc, 0
975 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
976 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
977 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
978 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
979 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
980 ; GFX7-NEXT: s_mov_b32 s2, -1
981 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
982 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
983 ; GFX7-NEXT: s_endpgm
985 ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
987 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
988 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
989 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
990 ; GFX8-NEXT: s_mov_b64 vcc, 0
991 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
992 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
993 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
994 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
995 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
996 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
997 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
998 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
999 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1000 ; GFX8-NEXT: s_endpgm
1002 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1003 ; GFX10_W32: ; %bb.0:
1004 ; GFX10_W32-NEXT: s_clause 0x3
1005 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
1006 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
1007 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
1008 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1009 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
1010 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1011 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4
1012 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5
1013 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1014 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1015 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
1016 ; GFX10_W32-NEXT: s_endpgm
1018 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1019 ; GFX10_W64: ; %bb.0:
1020 ; GFX10_W64-NEXT: s_clause 0x3
1021 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
1022 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
1023 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
1024 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1025 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0
1026 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1027 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4
1028 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5
1029 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1030 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1031 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
1032 ; GFX10_W64-NEXT: s_endpgm
1034 ; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1035 ; GFX11_W32: ; %bb.0:
1036 ; GFX11_W32-NEXT: s_clause 0x3
1037 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x4c
1038 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
1039 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
1040 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1041 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0
1042 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1043 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1044 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1045 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1046 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
1047 ; GFX11_W32-NEXT: s_nop 0
1048 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1049 ; GFX11_W32-NEXT: s_endpgm
1051 ; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1052 ; GFX11_W64: ; %bb.0:
1053 ; GFX11_W64-NEXT: s_clause 0x3
1054 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x4c
1055 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
1056 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
1057 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1058 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0
1059 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1060 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
1061 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
1062 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1063 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1064 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
1065 ; GFX11_W64-NEXT: s_nop 0
1066 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1067 ; GFX11_W64-NEXT: s_endpgm
1068 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false)
1069 store float %result, ptr addrspace(1) %out, align 4
1073 define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
1074 ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1076 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa
1077 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13
1078 ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c
1079 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1080 ; GFX7-NEXT: s_mov_b64 vcc, -1
1081 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1082 ; GFX7-NEXT: v_mov_b32_e32 v0, s2
1083 ; GFX7-NEXT: v_mov_b32_e32 v1, s3
1084 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
1085 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
1086 ; GFX7-NEXT: s_mov_b32 s2, -1
1087 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1088 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
1089 ; GFX7-NEXT: s_endpgm
1091 ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1093 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28
1094 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c
1095 ; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70
1096 ; GFX8-NEXT: s_mov_b64 vcc, -1
1097 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1098 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1099 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
1100 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
1101 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
1102 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
1103 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1104 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1105 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1106 ; GFX8-NEXT: s_endpgm
1108 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1109 ; GFX10_W32: ; %bb.0:
1110 ; GFX10_W32-NEXT: s_clause 0x3
1111 ; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
1112 ; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70
1113 ; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28
1114 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1115 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1
1116 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1117 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4
1118 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5
1119 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1120 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1121 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[2:3]
1122 ; GFX10_W32-NEXT: s_endpgm
1124 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1125 ; GFX10_W64: ; %bb.0:
1126 ; GFX10_W64-NEXT: s_clause 0x3
1127 ; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
1128 ; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70
1129 ; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28
1130 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1131 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1
1132 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1133 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4
1134 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5
1135 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1
1136 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1137 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[2:3]
1138 ; GFX10_W64-NEXT: s_endpgm
1140 ; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1141 ; GFX11_W32: ; %bb.0:
1142 ; GFX11_W32-NEXT: s_clause 0x3
1143 ; GFX11_W32-NEXT: s_load_b32 s2, s[0:1], 0x4c
1144 ; GFX11_W32-NEXT: s_load_b32 s3, s[0:1], 0x70
1145 ; GFX11_W32-NEXT: s_load_b32 s4, s[0:1], 0x28
1146 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1147 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, -1
1148 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1149 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1150 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1151 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1152 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
1153 ; GFX11_W32-NEXT: s_nop 0
1154 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1155 ; GFX11_W32-NEXT: s_endpgm
1157 ; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1158 ; GFX11_W64: ; %bb.0:
1159 ; GFX11_W64-NEXT: s_clause 0x3
1160 ; GFX11_W64-NEXT: s_load_b32 s2, s[0:1], 0x4c
1161 ; GFX11_W64-NEXT: s_load_b32 s3, s[0:1], 0x70
1162 ; GFX11_W64-NEXT: s_load_b32 s4, s[0:1], 0x28
1163 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1164 ; GFX11_W64-NEXT: s_mov_b64 vcc, -1
1165 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1166 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2
1167 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3
1168 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
1169 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1170 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
1171 ; GFX11_W64-NEXT: s_nop 0
1172 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1173 ; GFX11_W64-NEXT: s_endpgm
1174 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true)
1175 store float %result, ptr addrspace(1) %out, align 4
1179 define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) {
1180 ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1182 ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1183 ; GFX7-NEXT: s_load_dword s8, s[0:1], 0xc
1184 ; GFX7-NEXT: s_mov_b32 s2, 0
1185 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1186 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
1187 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1188 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1189 ; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
1190 ; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[0:3], 0 addr64 glc
1191 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1192 ; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4 glc
1193 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1194 ; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8 glc
1195 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1196 ; GFX7-NEXT: s_cmp_lg_u32 s8, 0
1197 ; GFX7-NEXT: s_cselect_b32 s0, 1, 0
1198 ; GFX7-NEXT: s_and_b32 s0, 1, s0
1199 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1200 ; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1201 ; GFX7-NEXT: s_mov_b32 s2, -1
1202 ; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1]
1203 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
1204 ; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1
1205 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1206 ; GFX7-NEXT: s_endpgm
1208 ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1210 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1211 ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x30
1212 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1213 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1214 ; GFX8-NEXT: v_mov_b32_e32 v1, s6
1215 ; GFX8-NEXT: v_mov_b32_e32 v2, s7
1216 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1217 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1218 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v1
1219 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v2, vcc
1220 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 8, v1
1221 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
1222 ; GFX8-NEXT: flat_load_dword v1, v[1:2] glc
1223 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1224 ; GFX8-NEXT: flat_load_dword v2, v[3:4] glc
1225 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1226 ; GFX8-NEXT: flat_load_dword v3, v[5:6] glc
1227 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1228 ; GFX8-NEXT: s_add_u32 s0, s4, 8
1229 ; GFX8-NEXT: s_addc_u32 s1, s5, 0
1230 ; GFX8-NEXT: s_cmp_lg_u32 s2, 0
1231 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0
1232 ; GFX8-NEXT: s_and_b32 s2, 1, s2
1233 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1234 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
1235 ; GFX8-NEXT: s_and_b64 vcc, vcc, s[2:3]
1236 ; GFX8-NEXT: s_nop 1
1237 ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1238 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1239 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1240 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1241 ; GFX8-NEXT: s_endpgm
1243 ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1244 ; GFX10_W32: ; %bb.0:
1245 ; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1246 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1247 ; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x30
1248 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1249 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1250 ; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc
1251 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1252 ; GFX10_W32-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1253 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1254 ; GFX10_W32-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1255 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1256 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1257 ; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0
1258 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0
1259 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0
1260 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
1261 ; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
1262 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v4
1263 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[4:5] offset:8
1264 ; GFX10_W32-NEXT: s_endpgm
1266 ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1267 ; GFX10_W64: ; %bb.0:
1268 ; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
1269 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1270 ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x30
1271 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1272 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1273 ; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc
1274 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1275 ; GFX10_W64-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1276 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1277 ; GFX10_W64-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1278 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1279 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1280 ; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0
1281 ; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0
1282 ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0
1283 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1284 ; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[0:1]
1285 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v4
1286 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[4:5] offset:8
1287 ; GFX10_W64-NEXT: s_endpgm
1289 ; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1290 ; GFX11_W32: ; %bb.0:
1291 ; GFX11_W32-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
1292 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1293 ; GFX11_W32-NEXT: s_load_b32 s0, s[0:1], 0x30
1294 ; GFX11_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1295 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1296 ; GFX11_W32-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc
1297 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1298 ; GFX11_W32-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1299 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1300 ; GFX11_W32-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1301 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1302 ; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0
1303 ; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0
1304 ; GFX11_W32-NEXT: s_and_b32 s0, 1, s0
1305 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
1306 ; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
1307 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1
1308 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1309 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
1310 ; GFX11_W32-NEXT: s_nop 0
1311 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1312 ; GFX11_W32-NEXT: s_endpgm
1314 ; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1315 ; GFX11_W64: ; %bb.0:
1316 ; GFX11_W64-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
1317 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1318 ; GFX11_W64-NEXT: s_load_b32 s0, s[0:1], 0x30
1319 ; GFX11_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1320 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1321 ; GFX11_W64-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc
1322 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1323 ; GFX11_W64-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1324 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1325 ; GFX11_W64-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1326 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1327 ; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0
1328 ; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0
1329 ; GFX11_W64-NEXT: s_and_b32 s0, 1, s0
1330 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
1331 ; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[0:1]
1332 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1
1333 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1334 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
1335 ; GFX11_W64-NEXT: s_nop 0
1336 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1337 ; GFX11_W64-NEXT: s_endpgm
1338 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1339 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1340 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1341 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1342 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1344 %a = load volatile float, ptr addrspace(1) %gep.a
1345 %b = load volatile float, ptr addrspace(1) %gep.b
1346 %c = load volatile float, ptr addrspace(1) %gep.c
1348 %cmp0 = icmp eq i32 %tid, 0
1349 %cmp1 = icmp ne i32 %d, 0
1350 %and = and i1 %cmp0, %cmp1
1352 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and)
1353 store float %result, ptr addrspace(1) %gep.out, align 4
1357 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) {
1358 ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
1359 ; GFX7: ; %bb.0: ; %entry
1360 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xa
1361 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1362 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
1363 ; GFX7-NEXT: s_mov_b32 s6, 0
1364 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
1365 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1366 ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
1367 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1368 ; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
1369 ; GFX7-NEXT: s_cbranch_execz .LBB13_2
1370 ; GFX7-NEXT: ; %bb.1: ; %bb
1371 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x14
1372 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1373 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0
1374 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1375 ; GFX7-NEXT: s_cmp_lg_u32 s4, 0
1376 ; GFX7-NEXT: s_cselect_b32 s6, 1, 0
1377 ; GFX7-NEXT: .LBB13_2: ; %exit
1378 ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3]
1379 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1380 ; GFX7-NEXT: s_and_b32 s0, 1, s6
1381 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
1382 ; GFX7-NEXT: s_mov_b32 s6, -1
1383 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1384 ; GFX7-NEXT: s_nop 1
1385 ; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1386 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1387 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1388 ; GFX7-NEXT: s_endpgm
1390 ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
1391 ; GFX8: ; %bb.0: ; %entry
1392 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1393 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1394 ; GFX8-NEXT: s_mov_b32 s4, 0
1395 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1396 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
1397 ; GFX8-NEXT: v_mov_b32_e32 v2, s3
1398 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1399 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1400 ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2]
1401 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1402 ; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
1403 ; GFX8-NEXT: s_cbranch_execz .LBB13_2
1404 ; GFX8-NEXT: ; %bb.1: ; %bb
1405 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1406 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1407 ; GFX8-NEXT: s_load_dword s4, s[4:5], 0x0
1408 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1409 ; GFX8-NEXT: s_cmp_lg_u32 s4, 0
1410 ; GFX8-NEXT: s_cselect_b32 s4, 1, 0
1411 ; GFX8-NEXT: .LBB13_2: ; %exit
1412 ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
1413 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1414 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1415 ; GFX8-NEXT: s_add_u32 s0, s0, 8
1416 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
1417 ; GFX8-NEXT: s_and_b32 s2, 1, s4
1418 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1419 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1420 ; GFX8-NEXT: s_nop 2
1421 ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1422 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1423 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1424 ; GFX8-NEXT: flat_store_dword v[0:1], v2
1425 ; GFX8-NEXT: s_endpgm
1427 ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1428 ; GFX10_W32: ; %bb.0: ; %entry
1429 ; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1430 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1431 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1432 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1433 ; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3]
1434 ; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3
1435 ; GFX10_W32-NEXT: s_mov_b32 s2, 0
1436 ; GFX10_W32-NEXT: s_and_saveexec_b32 s3, vcc_lo
1437 ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2
1438 ; GFX10_W32-NEXT: ; %bb.1: ; %bb
1439 ; GFX10_W32-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1440 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1441 ; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x0
1442 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1443 ; GFX10_W32-NEXT: s_cmp_lg_u32 s2, 0
1444 ; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0
1445 ; GFX10_W32-NEXT: .LBB13_2: ; %exit
1446 ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s3
1447 ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1448 ; GFX10_W32-NEXT: s_and_b32 s2, 1, s2
1449 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
1450 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1451 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1452 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
1453 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1454 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8
1455 ; GFX10_W32-NEXT: s_endpgm
1457 ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1458 ; GFX10_W64: ; %bb.0: ; %entry
1459 ; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28
1460 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1461 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1462 ; GFX10_W64-NEXT: s_mov_b32 s4, 0
1463 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1464 ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3]
1465 ; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3
1466 ; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], vcc
1467 ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2
1468 ; GFX10_W64-NEXT: ; %bb.1: ; %bb
1469 ; GFX10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x50
1470 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1471 ; GFX10_W64-NEXT: s_load_dword s4, s[4:5], 0x0
1472 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1473 ; GFX10_W64-NEXT: s_cmp_lg_u32 s4, 0
1474 ; GFX10_W64-NEXT: s_cselect_b32 s4, 1, 0
1475 ; GFX10_W64-NEXT: .LBB13_2: ; %exit
1476 ; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1477 ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1478 ; GFX10_W64-NEXT: s_and_b32 s2, 1, s4
1479 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1480 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1481 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1482 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
1483 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1484 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8
1485 ; GFX10_W64-NEXT: s_endpgm
1487 ; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1488 ; GFX11_W32: ; %bb.0: ; %entry
1489 ; GFX11_W32-NEXT: s_load_b64 s[2:3], s[0:1], 0x28
1490 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1491 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1492 ; GFX11_W32-NEXT: global_load_b96 v[1:3], v1, s[2:3]
1493 ; GFX11_W32-NEXT: s_mov_b32 s2, 0
1494 ; GFX11_W32-NEXT: s_mov_b32 s3, exec_lo
1495 ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v0
1496 ; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2
1497 ; GFX11_W32-NEXT: ; %bb.1: ; %bb
1498 ; GFX11_W32-NEXT: s_load_b64 s[4:5], s[0:1], 0x50
1499 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1500 ; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x0
1501 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1502 ; GFX11_W32-NEXT: s_cmp_lg_u32 s2, 0
1503 ; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0
1504 ; GFX11_W32-NEXT: .LBB13_2: ; %exit
1505 ; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s3
1506 ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1507 ; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
1508 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
1509 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
1510 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1511 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
1512 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1513 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
1514 ; GFX11_W32-NEXT: s_nop 0
1515 ; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1516 ; GFX11_W32-NEXT: s_endpgm
1518 ; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1519 ; GFX11_W64: ; %bb.0: ; %entry
1520 ; GFX11_W64-NEXT: s_load_b64 s[2:3], s[0:1], 0x28
1521 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1522 ; GFX11_W64-NEXT: s_mov_b32 s4, 0
1523 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1524 ; GFX11_W64-NEXT: global_load_b96 v[1:3], v1, s[2:3]
1525 ; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec
1526 ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v0
1527 ; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2
1528 ; GFX11_W64-NEXT: ; %bb.1: ; %bb
1529 ; GFX11_W64-NEXT: s_load_b64 s[4:5], s[0:1], 0x50
1530 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1531 ; GFX11_W64-NEXT: s_load_b32 s4, s[4:5], 0x0
1532 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1533 ; GFX11_W64-NEXT: s_cmp_lg_u32 s4, 0
1534 ; GFX11_W64-NEXT: s_cselect_b32 s4, 1, 0
1535 ; GFX11_W64-NEXT: .LBB13_2: ; %exit
1536 ; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1537 ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1538 ; GFX11_W64-NEXT: s_and_b32 s2, 1, s4
1539 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1540 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
1541 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1542 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
1543 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1544 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
1545 ; GFX11_W64-NEXT: s_nop 0
1546 ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1547 ; GFX11_W64-NEXT: s_endpgm
1549 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1550 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1551 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1552 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1554 %a = load float, ptr addrspace(1) %gep.a
1555 %b = load float, ptr addrspace(1) %gep.b
1556 %c = load float, ptr addrspace(1) %gep.c
1558 %cmp0 = icmp eq i32 %tid, 0
1559 br i1 %cmp0, label %bb, label %exit
1562 %val = load i32, ptr addrspace(1) %dummy
1563 %cmp1 = icmp ne i32 %val, 0
1567 %cond = phi i1 [false, %entry], [%cmp1, %bb]
1568 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1569 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond)
1570 store float %result, ptr addrspace(1) %gep.out, align 4
1574 declare i32 @llvm.amdgcn.workitem.id.x() #0
1575 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0
1576 declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0
1578 attributes #0 = { nounwind readnone speculatable }