1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
7 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
9 ; fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10 ; fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12 define float @test_f32_sub_mul(float %x, float %y, float %z) {
13 ; GFX9-LABEL: test_f32_sub_mul:
14 ; GFX9: ; %bb.0: ; %.entry
15 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
17 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
18 ; GFX9-NEXT: s_setpc_b64 s[30:31]
20 ; GFX9-CONTRACT-LABEL: test_f32_sub_mul:
21 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
22 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
24 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
26 ; GFX9-DENORM-LABEL: test_f32_sub_mul:
27 ; GFX9-DENORM: ; %bb.0: ; %.entry
28 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
30 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10-LABEL: test_f32_sub_mul:
33 ; GFX10: ; %bb.0: ; %.entry
34 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
36 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
37 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
38 ; GFX10-NEXT: s_setpc_b64 s[30:31]
40 ; GFX10-CONTRACT-LABEL: test_f32_sub_mul:
41 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
42 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
44 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
45 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
47 ; GFX10-DENORM-LABEL: test_f32_sub_mul:
48 ; GFX10-DENORM: ; %bb.0: ; %.entry
49 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
51 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
52 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
54 %a = fmul float %x, %y
55 %b = fsub float %a, %z
59 define float @test_f32_sub_mul_rhs(float %x, float %y, float %z) {
60 ; GFX9-LABEL: test_f32_sub_mul_rhs:
61 ; GFX9: ; %bb.0: ; %.entry
62 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
64 ; GFX9-NEXT: v_sub_f32_e32 v0, v2, v0
65 ; GFX9-NEXT: s_setpc_b64 s[30:31]
67 ; GFX9-CONTRACT-LABEL: test_f32_sub_mul_rhs:
68 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
69 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
71 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
73 ; GFX9-DENORM-LABEL: test_f32_sub_mul_rhs:
74 ; GFX9-DENORM: ; %bb.0: ; %.entry
75 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2
77 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
79 ; GFX10-LABEL: test_f32_sub_mul_rhs:
80 ; GFX10: ; %bb.0: ; %.entry
81 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
83 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
84 ; GFX10-NEXT: v_sub_f32_e32 v0, v2, v0
85 ; GFX10-NEXT: s_setpc_b64 s[30:31]
87 ; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs:
88 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
89 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
91 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
92 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
94 ; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs:
95 ; GFX10-DENORM: ; %bb.0: ; %.entry
96 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
98 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2
99 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
101 %a = fmul float %x, %y
102 %b = fsub float %z, %a
106 define half @test_half_sub_mul(half %x, half %y, half %z) {
107 ; GFX9-LABEL: test_half_sub_mul:
108 ; GFX9: ; %bb.0: ; %.entry
109 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
111 ; GFX9-NEXT: v_add_f16_e64 v0, v0, -v2
112 ; GFX9-NEXT: s_setpc_b64 s[30:31]
114 ; GFX9-CONTRACT-LABEL: test_half_sub_mul:
115 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
116 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
118 ; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
119 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
121 ; GFX9-DENORM-LABEL: test_half_sub_mul:
122 ; GFX9-DENORM: ; %bb.0: ; %.entry
123 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, -v2
125 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
127 ; GFX10-LABEL: test_half_sub_mul:
128 ; GFX10: ; %bb.0: ; %.entry
129 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
131 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
132 ; GFX10-NEXT: v_add_f16_e64 v0, v0, -v2
133 ; GFX10-NEXT: s_setpc_b64 s[30:31]
135 ; GFX10-CONTRACT-LABEL: test_half_sub_mul:
136 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
137 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
139 ; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
140 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
141 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
143 ; GFX10-DENORM-LABEL: test_half_sub_mul:
144 ; GFX10-DENORM: ; %bb.0: ; %.entry
145 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
147 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
148 ; GFX10-DENORM-NEXT: v_add_f16_e64 v0, v0, -v2
149 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
151 %a = fmul half %x, %y
152 %b = fsub half %a, %z
156 define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
157 ; GFX9-LABEL: test_half_sub_mul_rhs:
158 ; GFX9: ; %bb.0: ; %.entry
159 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX9-NEXT: v_mul_f16_e64 v0, v0, -v1
161 ; GFX9-NEXT: v_add_f16_e32 v0, v2, v0
162 ; GFX9-NEXT: s_setpc_b64 s[30:31]
164 ; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
165 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
166 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v0, 0x8000, v0
168 ; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
169 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
171 ; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
172 ; GFX9-DENORM: ; %bb.0: ; %.entry
173 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, -v1, v2
175 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
177 ; GFX10-LABEL: test_half_sub_mul_rhs:
178 ; GFX10: ; %bb.0: ; %.entry
179 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
181 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1
182 ; GFX10-NEXT: v_add_f16_e32 v0, v2, v0
183 ; GFX10-NEXT: s_setpc_b64 s[30:31]
185 ; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs:
186 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
187 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
189 ; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v0, 0x8000, v0
190 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
191 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
193 ; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:
194 ; GFX10-DENORM: ; %bb.0: ; %.entry
195 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
197 ; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
198 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0
199 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
201 %a = fmul half %x, %y
202 %b = fsub half %z, %a
206 define double @test_double_sub_mul(double %x, double %y, double %z) {
207 ; GFX9-LABEL: test_double_sub_mul:
208 ; GFX9: ; %bb.0: ; %.entry
209 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
211 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
212 ; GFX9-NEXT: s_setpc_b64 s[30:31]
214 ; GFX9-CONTRACT-LABEL: test_double_sub_mul:
215 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
216 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
218 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
220 ; GFX9-DENORM-LABEL: test_double_sub_mul:
221 ; GFX9-DENORM: ; %bb.0: ; %.entry
222 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
224 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
225 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
227 ; GFX10-LABEL: test_double_sub_mul:
228 ; GFX10: ; %bb.0: ; %.entry
229 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
231 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
232 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
233 ; GFX10-NEXT: s_setpc_b64 s[30:31]
235 ; GFX10-CONTRACT-LABEL: test_double_sub_mul:
236 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
237 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
239 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
240 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
242 ; GFX10-DENORM-LABEL: test_double_sub_mul:
243 ; GFX10-DENORM: ; %bb.0: ; %.entry
244 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
246 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
247 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
248 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
250 %a = fmul double %x, %y
251 %b = fsub double %a, %z
255 define double @test_double_sub_mul_rhs(double %x, double %y, double %z) {
256 ; GFX9-LABEL: test_double_sub_mul_rhs:
257 ; GFX9: ; %bb.0: ; %.entry
258 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
260 ; GFX9-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
261 ; GFX9-NEXT: s_setpc_b64 s[30:31]
263 ; GFX9-CONTRACT-LABEL: test_double_sub_mul_rhs:
264 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
265 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
267 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
269 ; GFX9-DENORM-LABEL: test_double_sub_mul_rhs:
270 ; GFX9-DENORM: ; %bb.0: ; %.entry
271 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
273 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
274 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
276 ; GFX10-LABEL: test_double_sub_mul_rhs:
277 ; GFX10: ; %bb.0: ; %.entry
278 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
280 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
281 ; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
282 ; GFX10-NEXT: s_setpc_b64 s[30:31]
284 ; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs:
285 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
286 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
288 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
289 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
291 ; GFX10-DENORM-LABEL: test_double_sub_mul_rhs:
292 ; GFX10-DENORM: ; %bb.0: ; %.entry
293 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
295 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
296 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
297 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
299 %a = fmul double %x, %y
300 %b = fsub double %z, %a
304 define <4 x float> @test_v4f32_sub_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
305 ; GFX9-LABEL: test_v4f32_sub_mul:
306 ; GFX9: ; %bb.0: ; %.entry
307 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4
309 ; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5
310 ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6
311 ; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7
312 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v8
313 ; GFX9-NEXT: v_sub_f32_e32 v1, v1, v9
314 ; GFX9-NEXT: v_sub_f32_e32 v2, v2, v10
315 ; GFX9-NEXT: v_sub_f32_e32 v3, v3, v11
316 ; GFX9-NEXT: s_setpc_b64 s[30:31]
318 ; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul:
319 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
320 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
322 ; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
323 ; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
324 ; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11
325 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
327 ; GFX9-DENORM-LABEL: test_v4f32_sub_mul:
328 ; GFX9-DENORM: ; %bb.0: ; %.entry
329 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8
331 ; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9
332 ; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10
333 ; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11
334 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
336 ; GFX10-LABEL: test_v4f32_sub_mul:
337 ; GFX10: ; %bb.0: ; %.entry
338 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
340 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
341 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
342 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
343 ; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7
344 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v8
345 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v9
346 ; GFX10-NEXT: v_sub_f32_e32 v2, v2, v10
347 ; GFX10-NEXT: v_sub_f32_e32 v3, v3, v11
348 ; GFX10-NEXT: s_setpc_b64 s[30:31]
350 ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul:
351 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
352 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
354 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
355 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
356 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
357 ; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11
358 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
360 ; GFX10-DENORM-LABEL: test_v4f32_sub_mul:
361 ; GFX10-DENORM: ; %bb.0: ; %.entry
362 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
364 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8
365 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9
366 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10
367 ; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11
368 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
370 %a = fmul <4 x float> %x, %y
371 %b = fsub <4 x float> %a, %z
375 define <4 x float> @test_v4f32_sub_mul_rhs(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
376 ; GFX9-LABEL: test_v4f32_sub_mul_rhs:
377 ; GFX9: ; %bb.0: ; %.entry
378 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4
380 ; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5
381 ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6
382 ; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7
383 ; GFX9-NEXT: v_sub_f32_e32 v0, v8, v0
384 ; GFX9-NEXT: v_sub_f32_e32 v1, v9, v1
385 ; GFX9-NEXT: v_sub_f32_e32 v2, v10, v2
386 ; GFX9-NEXT: v_sub_f32_e32 v3, v11, v3
387 ; GFX9-NEXT: s_setpc_b64 s[30:31]
389 ; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
390 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
391 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
393 ; GFX9-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
394 ; GFX9-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
395 ; GFX9-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11
396 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
398 ; GFX9-DENORM-LABEL: test_v4f32_sub_mul_rhs:
399 ; GFX9-DENORM: ; %bb.0: ; %.entry
400 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8
402 ; GFX9-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9
403 ; GFX9-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10
404 ; GFX9-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11
405 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
407 ; GFX10-LABEL: test_v4f32_sub_mul_rhs:
408 ; GFX10: ; %bb.0: ; %.entry
409 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
411 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
412 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
413 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
414 ; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7
415 ; GFX10-NEXT: v_sub_f32_e32 v0, v8, v0
416 ; GFX10-NEXT: v_sub_f32_e32 v1, v9, v1
417 ; GFX10-NEXT: v_sub_f32_e32 v2, v10, v2
418 ; GFX10-NEXT: v_sub_f32_e32 v3, v11, v3
419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
421 ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
422 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
423 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
425 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
426 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
427 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
428 ; GFX10-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11
429 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
431 ; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs:
432 ; GFX10-DENORM: ; %bb.0: ; %.entry
433 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
435 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8
436 ; GFX10-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9
437 ; GFX10-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10
438 ; GFX10-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11
439 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
441 %a = fmul <4 x float> %x, %y
442 %b = fsub <4 x float> %z, %a
446 define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
447 ; GFX9-LABEL: test_v4f16_sub_mul:
448 ; GFX9: ; %bb.0: ; %.entry
449 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
451 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
452 ; GFX9-NEXT: v_add_f16_e64 v2, v0, -v4
453 ; GFX9-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
454 ; GFX9-NEXT: v_add_f16_e64 v3, v1, -v5
455 ; GFX9-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
456 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
457 ; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
458 ; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
459 ; GFX9-NEXT: s_setpc_b64 s[30:31]
461 ; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul:
462 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
463 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
465 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
466 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
468 ; GFX9-DENORM-LABEL: test_v4f16_sub_mul:
469 ; GFX9-DENORM: ; %bb.0: ; %.entry
470 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
472 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
473 ; GFX9-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
474 ; GFX9-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
475 ; GFX9-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
476 ; GFX9-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
477 ; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
478 ; GFX9-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
479 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
480 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
482 ; GFX10-LABEL: test_v4f16_sub_mul:
483 ; GFX10: ; %bb.0: ; %.entry
484 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
486 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
487 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
488 ; GFX10-NEXT: v_add_f16_e64 v2, v0, -v4
489 ; GFX10-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
490 ; GFX10-NEXT: v_add_f16_e64 v3, v1, -v5
491 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
492 ; GFX10-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
493 ; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0
494 ; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1
495 ; GFX10-NEXT: s_setpc_b64 s[30:31]
497 ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
498 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
499 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
501 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
502 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
503 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
505 ; GFX10-DENORM-LABEL: test_v4f16_sub_mul:
506 ; GFX10-DENORM: ; %bb.0: ; %.entry
507 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
509 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
510 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
511 ; GFX10-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
512 ; GFX10-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
513 ; GFX10-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
514 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
515 ; GFX10-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
516 ; GFX10-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
517 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
518 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
520 %a = fmul <4 x half> %x, %y
521 %b = fsub <4 x half> %a, %z
525 define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
526 ; GFX9-LABEL: test_v4f16_sub_mul_rhs:
527 ; GFX9: ; %bb.0: ; %.entry
528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
530 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
531 ; GFX9-NEXT: v_add_f16_e64 v2, v4, -v0
532 ; GFX9-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
533 ; GFX9-NEXT: v_add_f16_e64 v3, v5, -v1
534 ; GFX9-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
535 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
536 ; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
537 ; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
538 ; GFX9-NEXT: s_setpc_b64 s[30:31]
540 ; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
541 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
542 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
544 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
545 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
547 ; GFX9-DENORM-LABEL: test_v4f16_sub_mul_rhs:
548 ; GFX9-DENORM: ; %bb.0: ; %.entry
549 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
551 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
552 ; GFX9-DENORM-NEXT: v_add_f16_e64 v2, v4, -v0
553 ; GFX9-DENORM-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
554 ; GFX9-DENORM-NEXT: v_add_f16_e64 v3, v5, -v1
555 ; GFX9-DENORM-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
556 ; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
557 ; GFX9-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
558 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
559 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
561 ; GFX10-LABEL: test_v4f16_sub_mul_rhs:
562 ; GFX10: ; %bb.0: ; %.entry
563 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
565 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
566 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
567 ; GFX10-NEXT: v_add_f16_e64 v2, v4, -v0
568 ; GFX10-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
569 ; GFX10-NEXT: v_add_f16_e64 v3, v5, -v1
570 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
571 ; GFX10-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
572 ; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0
573 ; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1
574 ; GFX10-NEXT: s_setpc_b64 s[30:31]
576 ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
577 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
578 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
580 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
581 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
582 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
584 ; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs:
585 ; GFX10-DENORM: ; %bb.0: ; %.entry
586 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
588 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
589 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
590 ; GFX10-DENORM-NEXT: v_add_f16_e64 v2, v4, -v0
591 ; GFX10-DENORM-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
592 ; GFX10-DENORM-NEXT: v_add_f16_e64 v3, v5, -v1
593 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
594 ; GFX10-DENORM-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
595 ; GFX10-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
596 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
597 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
599 %a = fmul <4 x half> %x, %y
600 %b = fsub <4 x half> %z, %a
604 define <4 x double> @test_v4f64_sub_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
605 ; GFX9-LABEL: test_v4f64_sub_mul:
606 ; GFX9: ; %bb.0: ; %.entry
607 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
609 ; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
610 ; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
611 ; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
612 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
613 ; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
614 ; GFX9-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
615 ; GFX9-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
616 ; GFX9-NEXT: s_setpc_b64 s[30:31]
618 ; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul:
619 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
620 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
622 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
623 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
624 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
625 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
627 ; GFX9-DENORM-LABEL: test_v4f64_sub_mul:
628 ; GFX9-DENORM: ; %bb.0: ; %.entry
629 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
631 ; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
632 ; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
633 ; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
634 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
635 ; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
636 ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
637 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
638 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
640 ; GFX10-LABEL: test_v4f64_sub_mul:
641 ; GFX10: ; %bb.0: ; %.entry
642 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
644 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
645 ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
646 ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
647 ; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
648 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
649 ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
650 ; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
651 ; GFX10-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
652 ; GFX10-NEXT: s_setpc_b64 s[30:31]
654 ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul:
655 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
656 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
658 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
659 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
660 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
661 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
662 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
664 ; GFX10-DENORM-LABEL: test_v4f64_sub_mul:
665 ; GFX10-DENORM: ; %bb.0: ; %.entry
666 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
668 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
669 ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
670 ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
671 ; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
672 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
673 ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
674 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
675 ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
676 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
678 %a = fmul <4 x double> %x, %y
679 %b = fsub <4 x double> %a, %z
683 define <4 x double> @test_v4f64_sub_mul_rhs(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
684 ; GFX9-LABEL: test_v4f64_sub_mul_rhs:
685 ; GFX9: ; %bb.0: ; %.entry
686 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
687 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
688 ; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
689 ; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
690 ; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
691 ; GFX9-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
692 ; GFX9-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
693 ; GFX9-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
694 ; GFX9-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
695 ; GFX9-NEXT: s_setpc_b64 s[30:31]
697 ; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
698 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
699 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
701 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
702 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
703 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
704 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
706 ; GFX9-DENORM-LABEL: test_v4f64_sub_mul_rhs:
707 ; GFX9-DENORM: ; %bb.0: ; %.entry
708 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
709 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
710 ; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
711 ; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
712 ; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
713 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
714 ; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
715 ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
716 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
717 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
719 ; GFX10-LABEL: test_v4f64_sub_mul_rhs:
720 ; GFX10: ; %bb.0: ; %.entry
721 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
723 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
724 ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
725 ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
726 ; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
727 ; GFX10-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
728 ; GFX10-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
729 ; GFX10-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
730 ; GFX10-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
731 ; GFX10-NEXT: s_setpc_b64 s[30:31]
733 ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
734 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
735 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
737 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
738 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
739 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
740 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
741 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
743 ; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs:
744 ; GFX10-DENORM: ; %bb.0: ; %.entry
745 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
747 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
748 ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
749 ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
750 ; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
751 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
752 ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
753 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
754 ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
755 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
757 %a = fmul <4 x double> %x, %y
758 %b = fsub <4 x double> %z, %a