1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
7 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
8 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -check-prefix=GFX11-CONTRACT %s
9 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX11-DENORM %s
11 ; fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12 ; fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14 define float @test_f32_sub_mul(float %x, float %y, float %z) {
15 ; GFX9-LABEL: test_f32_sub_mul:
16 ; GFX9: ; %bb.0: ; %.entry
17 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
19 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
20 ; GFX9-NEXT: s_setpc_b64 s[30:31]
22 ; GFX9-CONTRACT-LABEL: test_f32_sub_mul:
23 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
24 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
26 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-DENORM-LABEL: test_f32_sub_mul:
29 ; GFX9-DENORM: ; %bb.0: ; %.entry
30 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
32 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
34 ; GFX10-LABEL: test_f32_sub_mul:
35 ; GFX10: ; %bb.0: ; %.entry
36 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
38 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
39 ; GFX10-NEXT: s_setpc_b64 s[30:31]
41 ; GFX10-CONTRACT-LABEL: test_f32_sub_mul:
42 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
43 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
45 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
47 ; GFX10-DENORM-LABEL: test_f32_sub_mul:
48 ; GFX10-DENORM: ; %bb.0: ; %.entry
49 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
51 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
53 ; GFX11-CONTRACT-LABEL: test_f32_sub_mul:
54 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
55 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
57 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
59 ; GFX11-DENORM-LABEL: test_f32_sub_mul:
60 ; GFX11-DENORM: ; %bb.0: ; %.entry
61 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62 ; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1
63 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
64 ; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
65 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
67 %a = fmul float %x, %y
68 %b = fsub float %a, %z
72 define float @test_f32_sub_mul_rhs(float %x, float %y, float %z) {
73 ; GFX9-LABEL: test_f32_sub_mul_rhs:
74 ; GFX9: ; %bb.0: ; %.entry
75 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
77 ; GFX9-NEXT: v_sub_f32_e32 v0, v2, v0
78 ; GFX9-NEXT: s_setpc_b64 s[30:31]
80 ; GFX9-CONTRACT-LABEL: test_f32_sub_mul_rhs:
81 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
82 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
84 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
86 ; GFX9-DENORM-LABEL: test_f32_sub_mul_rhs:
87 ; GFX9-DENORM: ; %bb.0: ; %.entry
88 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2
90 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
92 ; GFX10-LABEL: test_f32_sub_mul_rhs:
93 ; GFX10: ; %bb.0: ; %.entry
94 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
96 ; GFX10-NEXT: v_sub_f32_e32 v0, v2, v0
97 ; GFX10-NEXT: s_setpc_b64 s[30:31]
99 ; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs:
100 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
101 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
103 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
105 ; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs:
106 ; GFX10-DENORM: ; %bb.0: ; %.entry
107 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2
109 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
111 ; GFX11-CONTRACT-LABEL: test_f32_sub_mul_rhs:
112 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
113 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
115 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
117 ; GFX11-DENORM-LABEL: test_f32_sub_mul_rhs:
118 ; GFX11-DENORM: ; %bb.0: ; %.entry
119 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1
121 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
122 ; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v2, v0
123 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
125 %a = fmul float %x, %y
126 %b = fsub float %z, %a
130 define half @test_half_sub_mul(half %x, half %y, half %z) {
131 ; GFX9-LABEL: test_half_sub_mul:
132 ; GFX9: ; %bb.0: ; %.entry
133 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
135 ; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
136 ; GFX9-NEXT: s_setpc_b64 s[30:31]
138 ; GFX9-CONTRACT-LABEL: test_half_sub_mul:
139 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
140 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
142 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
144 ; GFX9-DENORM-LABEL: test_half_sub_mul:
145 ; GFX9-DENORM: ; %bb.0: ; %.entry
146 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, -v2
148 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
150 ; GFX10-LABEL: test_half_sub_mul:
151 ; GFX10: ; %bb.0: ; %.entry
152 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
154 ; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
155 ; GFX10-NEXT: s_setpc_b64 s[30:31]
157 ; GFX10-CONTRACT-LABEL: test_half_sub_mul:
158 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
159 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
161 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
163 ; GFX10-DENORM-LABEL: test_half_sub_mul:
164 ; GFX10-DENORM: ; %bb.0: ; %.entry
165 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
167 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
168 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
170 ; GFX11-CONTRACT-LABEL: test_half_sub_mul:
171 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
172 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX11-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
174 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
176 ; GFX11-DENORM-LABEL: test_half_sub_mul:
177 ; GFX11-DENORM: ; %bb.0: ; %.entry
178 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
180 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
181 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
182 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
184 %a = fmul half %x, %y
185 %b = fsub half %a, %z
189 define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
190 ; GFX9-LABEL: test_half_sub_mul_rhs:
191 ; GFX9: ; %bb.0: ; %.entry
192 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
194 ; GFX9-NEXT: v_sub_f16_e32 v0, v2, v0
195 ; GFX9-NEXT: s_setpc_b64 s[30:31]
197 ; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
198 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
199 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX9-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
201 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
203 ; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
204 ; GFX9-DENORM: ; %bb.0: ; %.entry
205 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, -v0, v1, v2
207 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
209 ; GFX10-LABEL: test_half_sub_mul_rhs:
210 ; GFX10: ; %bb.0: ; %.entry
211 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
213 ; GFX10-NEXT: v_sub_f16_e32 v0, v2, v0
214 ; GFX10-NEXT: s_setpc_b64 s[30:31]
216 ; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs:
217 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
218 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
220 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
222 ; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:
223 ; GFX10-DENORM: ; %bb.0: ; %.entry
224 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
226 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0
227 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
229 ; GFX11-CONTRACT-LABEL: test_half_sub_mul_rhs:
230 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
231 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX11-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
233 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
235 ; GFX11-DENORM-LABEL: test_half_sub_mul_rhs:
236 ; GFX11-DENORM: ; %bb.0: ; %.entry
237 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
239 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
240 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0
241 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
243 %a = fmul half %x, %y
244 %b = fsub half %z, %a
248 define double @test_double_sub_mul(double %x, double %y, double %z) {
249 ; GFX9-LABEL: test_double_sub_mul:
250 ; GFX9: ; %bb.0: ; %.entry
251 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
253 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
254 ; GFX9-NEXT: s_setpc_b64 s[30:31]
256 ; GFX9-CONTRACT-LABEL: test_double_sub_mul:
257 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
258 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
260 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
262 ; GFX9-DENORM-LABEL: test_double_sub_mul:
263 ; GFX9-DENORM: ; %bb.0: ; %.entry
264 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
266 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
267 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
269 ; GFX10-LABEL: test_double_sub_mul:
270 ; GFX10: ; %bb.0: ; %.entry
271 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
273 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
274 ; GFX10-NEXT: s_setpc_b64 s[30:31]
276 ; GFX10-CONTRACT-LABEL: test_double_sub_mul:
277 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
278 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
280 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
282 ; GFX10-DENORM-LABEL: test_double_sub_mul:
283 ; GFX10-DENORM: ; %bb.0: ; %.entry
284 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
286 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
287 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
289 ; GFX11-CONTRACT-LABEL: test_double_sub_mul:
290 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
291 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
293 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
295 ; GFX11-DENORM-LABEL: test_double_sub_mul:
296 ; GFX11-DENORM: ; %bb.0: ; %.entry
297 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
299 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
300 ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
301 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
303 %a = fmul double %x, %y
304 %b = fsub double %a, %z
308 define double @test_double_sub_mul_rhs(double %x, double %y, double %z) {
309 ; GFX9-LABEL: test_double_sub_mul_rhs:
310 ; GFX9: ; %bb.0: ; %.entry
311 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
313 ; GFX9-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
314 ; GFX9-NEXT: s_setpc_b64 s[30:31]
316 ; GFX9-CONTRACT-LABEL: test_double_sub_mul_rhs:
317 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
318 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
320 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
322 ; GFX9-DENORM-LABEL: test_double_sub_mul_rhs:
323 ; GFX9-DENORM: ; %bb.0: ; %.entry
324 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
326 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
327 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
329 ; GFX10-LABEL: test_double_sub_mul_rhs:
330 ; GFX10: ; %bb.0: ; %.entry
331 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
333 ; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
334 ; GFX10-NEXT: s_setpc_b64 s[30:31]
336 ; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs:
337 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
338 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
340 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10-DENORM-LABEL: test_double_sub_mul_rhs:
343 ; GFX10-DENORM: ; %bb.0: ; %.entry
344 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
346 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
347 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
349 ; GFX11-CONTRACT-LABEL: test_double_sub_mul_rhs:
350 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
351 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
353 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
355 ; GFX11-DENORM-LABEL: test_double_sub_mul_rhs:
356 ; GFX11-DENORM: ; %bb.0: ; %.entry
357 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
359 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
360 ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
361 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
363 %a = fmul double %x, %y
364 %b = fsub double %z, %a
368 define <4 x float> @test_v4f32_sub_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
369 ; GFX9-LABEL: test_v4f32_sub_mul:
370 ; GFX9: ; %bb.0: ; %.entry
371 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4
373 ; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5
374 ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6
375 ; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7
376 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v8
377 ; GFX9-NEXT: v_sub_f32_e32 v1, v1, v9
378 ; GFX9-NEXT: v_sub_f32_e32 v2, v2, v10
379 ; GFX9-NEXT: v_sub_f32_e32 v3, v3, v11
380 ; GFX9-NEXT: s_setpc_b64 s[30:31]
382 ; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul:
383 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
384 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
386 ; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
387 ; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
388 ; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11
389 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
391 ; GFX9-DENORM-LABEL: test_v4f32_sub_mul:
392 ; GFX9-DENORM: ; %bb.0: ; %.entry
393 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8
395 ; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9
396 ; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10
397 ; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11
398 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
400 ; GFX10-LABEL: test_v4f32_sub_mul:
401 ; GFX10: ; %bb.0: ; %.entry
402 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
404 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
405 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
406 ; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7
407 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v8
408 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v9
409 ; GFX10-NEXT: v_sub_f32_e32 v2, v2, v10
410 ; GFX10-NEXT: v_sub_f32_e32 v3, v3, v11
411 ; GFX10-NEXT: s_setpc_b64 s[30:31]
413 ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul:
414 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
415 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
417 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
418 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
419 ; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11
420 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
422 ; GFX10-DENORM-LABEL: test_v4f32_sub_mul:
423 ; GFX10-DENORM: ; %bb.0: ; %.entry
424 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8
426 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9
427 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10
428 ; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11
429 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
431 ; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul:
432 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
433 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
435 ; GFX11-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
436 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
437 ; GFX11-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11
438 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
440 ; GFX11-DENORM-LABEL: test_v4f32_sub_mul:
441 ; GFX11-DENORM: ; %bb.0: ; %.entry
442 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
444 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
445 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
446 ; GFX11-DENORM-NEXT: v_dual_sub_f32 v0, v0, v8 :: v_dual_sub_f32 v1, v1, v9
447 ; GFX11-DENORM-NEXT: v_dual_sub_f32 v2, v2, v10 :: v_dual_sub_f32 v3, v3, v11
448 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
450 %a = fmul <4 x float> %x, %y
451 %b = fsub <4 x float> %a, %z
455 define <4 x float> @test_v4f32_sub_mul_rhs(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
456 ; GFX9-LABEL: test_v4f32_sub_mul_rhs:
457 ; GFX9: ; %bb.0: ; %.entry
458 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4
460 ; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5
461 ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6
462 ; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7
463 ; GFX9-NEXT: v_sub_f32_e32 v0, v8, v0
464 ; GFX9-NEXT: v_sub_f32_e32 v1, v9, v1
465 ; GFX9-NEXT: v_sub_f32_e32 v2, v10, v2
466 ; GFX9-NEXT: v_sub_f32_e32 v3, v11, v3
467 ; GFX9-NEXT: s_setpc_b64 s[30:31]
469 ; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
470 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
471 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
473 ; GFX9-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
474 ; GFX9-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
475 ; GFX9-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11
476 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
478 ; GFX9-DENORM-LABEL: test_v4f32_sub_mul_rhs:
479 ; GFX9-DENORM: ; %bb.0: ; %.entry
480 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8
482 ; GFX9-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9
483 ; GFX9-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10
484 ; GFX9-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11
485 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
487 ; GFX10-LABEL: test_v4f32_sub_mul_rhs:
488 ; GFX10: ; %bb.0: ; %.entry
489 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
491 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
492 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
493 ; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7
494 ; GFX10-NEXT: v_sub_f32_e32 v0, v8, v0
495 ; GFX10-NEXT: v_sub_f32_e32 v1, v9, v1
496 ; GFX10-NEXT: v_sub_f32_e32 v2, v10, v2
497 ; GFX10-NEXT: v_sub_f32_e32 v3, v11, v3
498 ; GFX10-NEXT: s_setpc_b64 s[30:31]
500 ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
501 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
502 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
504 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
505 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
506 ; GFX10-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11
507 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
509 ; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs:
510 ; GFX10-DENORM: ; %bb.0: ; %.entry
511 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8
513 ; GFX10-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9
514 ; GFX10-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10
515 ; GFX10-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11
516 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
518 ; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
519 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
520 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
522 ; GFX11-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
523 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
524 ; GFX11-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11
525 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
527 ; GFX11-DENORM-LABEL: test_v4f32_sub_mul_rhs:
528 ; GFX11-DENORM: ; %bb.0: ; %.entry
529 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
531 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
532 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
533 ; GFX11-DENORM-NEXT: v_dual_sub_f32 v0, v8, v0 :: v_dual_sub_f32 v1, v9, v1
534 ; GFX11-DENORM-NEXT: v_dual_sub_f32 v2, v10, v2 :: v_dual_sub_f32 v3, v11, v3
535 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
537 %a = fmul <4 x float> %x, %y
538 %b = fsub <4 x float> %z, %a
542 define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
543 ; GFX9-LABEL: test_v4f16_sub_mul:
544 ; GFX9: ; %bb.0: ; %.entry
545 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
547 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
548 ; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
549 ; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
550 ; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
551 ; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
552 ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
553 ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
554 ; GFX9-NEXT: s_setpc_b64 s[30:31]
556 ; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul:
557 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
558 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
560 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
561 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
563 ; GFX9-DENORM-LABEL: test_v4f16_sub_mul:
564 ; GFX9-DENORM: ; %bb.0: ; %.entry
565 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
567 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
568 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
569 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
570 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
571 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
572 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
573 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
574 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
576 ; GFX10-LABEL: test_v4f16_sub_mul:
577 ; GFX10: ; %bb.0: ; %.entry
578 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
580 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
581 ; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
582 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
583 ; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
584 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
585 ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
586 ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
587 ; GFX10-NEXT: s_setpc_b64 s[30:31]
589 ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
590 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
591 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
593 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
594 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
596 ; GFX10-DENORM-LABEL: test_v4f16_sub_mul:
597 ; GFX10-DENORM: ; %bb.0: ; %.entry
598 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
600 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
601 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
602 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
603 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
604 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
605 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
606 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
607 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
609 ; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul:
610 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
611 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
613 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
614 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
616 ; GFX11-DENORM-LABEL: test_v4f16_sub_mul:
617 ; GFX11-DENORM: ; %bb.0: ; %.entry
618 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
620 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
621 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
622 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5
623 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
624 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
625 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1
626 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v4
627 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v1, v5
628 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
629 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v6, v2
630 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v7, v3
631 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
632 ; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2
633 ; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3
634 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
636 %a = fmul <4 x half> %x, %y
637 %b = fsub <4 x half> %a, %z
641 define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
642 ; GFX9-LABEL: test_v4f16_sub_mul_rhs:
643 ; GFX9: ; %bb.0: ; %.entry
644 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
646 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
647 ; GFX9-NEXT: v_sub_f16_e32 v2, v4, v0
648 ; GFX9-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
649 ; GFX9-NEXT: v_sub_f16_e32 v3, v5, v1
650 ; GFX9-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
651 ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
652 ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
653 ; GFX9-NEXT: s_setpc_b64 s[30:31]
655 ; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
656 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
657 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
659 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
660 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
662 ; GFX9-DENORM-LABEL: test_v4f16_sub_mul_rhs:
663 ; GFX9-DENORM: ; %bb.0: ; %.entry
664 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
666 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
667 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
668 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
669 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
670 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
671 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
672 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
673 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
675 ; GFX10-LABEL: test_v4f16_sub_mul_rhs:
676 ; GFX10: ; %bb.0: ; %.entry
677 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
679 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
680 ; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0
681 ; GFX10-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
682 ; GFX10-NEXT: v_sub_f16_e32 v3, v5, v1
683 ; GFX10-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
684 ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
685 ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
686 ; GFX10-NEXT: s_setpc_b64 s[30:31]
688 ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
689 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
690 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
692 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
693 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
695 ; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs:
696 ; GFX10-DENORM: ; %bb.0: ; %.entry
697 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
699 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
700 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
701 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
702 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
703 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
704 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
705 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
706 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
708 ; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
709 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
710 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
712 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
713 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
715 ; GFX11-DENORM-LABEL: test_v4f16_sub_mul_rhs:
716 ; GFX11-DENORM: ; %bb.0: ; %.entry
717 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
719 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
720 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
721 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5
722 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
723 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
724 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1
725 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v4, v0
726 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v5, v1
727 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
728 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v2, v6
729 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v3, v7
730 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
731 ; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2
732 ; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3
733 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
735 %a = fmul <4 x half> %x, %y
736 %b = fsub <4 x half> %z, %a
740 define <4 x double> @test_v4f64_sub_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
741 ; GFX9-LABEL: test_v4f64_sub_mul:
742 ; GFX9: ; %bb.0: ; %.entry
743 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
745 ; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
746 ; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
747 ; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
748 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
749 ; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
750 ; GFX9-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
751 ; GFX9-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
752 ; GFX9-NEXT: s_setpc_b64 s[30:31]
754 ; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul:
755 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
756 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
758 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
759 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
760 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
761 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
763 ; GFX9-DENORM-LABEL: test_v4f64_sub_mul:
764 ; GFX9-DENORM: ; %bb.0: ; %.entry
765 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
767 ; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
768 ; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
769 ; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
770 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
771 ; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
772 ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
773 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
774 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
776 ; GFX10-LABEL: test_v4f64_sub_mul:
777 ; GFX10: ; %bb.0: ; %.entry
778 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
780 ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
781 ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
782 ; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
783 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
784 ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
785 ; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
786 ; GFX10-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
787 ; GFX10-NEXT: s_setpc_b64 s[30:31]
789 ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul:
790 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
791 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
793 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
794 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
795 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
796 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
798 ; GFX10-DENORM-LABEL: test_v4f64_sub_mul:
799 ; GFX10-DENORM: ; %bb.0: ; %.entry
800 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
802 ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
803 ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
804 ; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
805 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
806 ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
807 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
808 ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
809 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
811 ; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul:
812 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
813 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
815 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
816 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
817 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
818 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
820 ; GFX11-DENORM-LABEL: test_v4f64_sub_mul:
821 ; GFX11-DENORM: ; %bb.0: ; %.entry
822 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
824 ; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
825 ; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
826 ; GFX11-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
827 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
828 ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17]
829 ; GFX11-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19]
830 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
831 ; GFX11-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21]
832 ; GFX11-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23]
833 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
835 %a = fmul <4 x double> %x, %y
836 %b = fsub <4 x double> %a, %z
840 define <4 x double> @test_v4f64_sub_mul_rhs(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
841 ; GFX9-LABEL: test_v4f64_sub_mul_rhs:
842 ; GFX9: ; %bb.0: ; %.entry
843 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
844 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
845 ; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
846 ; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
847 ; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
848 ; GFX9-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
849 ; GFX9-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
850 ; GFX9-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
851 ; GFX9-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
852 ; GFX9-NEXT: s_setpc_b64 s[30:31]
854 ; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
855 ; GFX9-CONTRACT: ; %bb.0: ; %.entry
856 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
858 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
859 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
860 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
861 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
863 ; GFX9-DENORM-LABEL: test_v4f64_sub_mul_rhs:
864 ; GFX9-DENORM: ; %bb.0: ; %.entry
865 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
867 ; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
868 ; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
869 ; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
870 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
871 ; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
872 ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
873 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
874 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
876 ; GFX10-LABEL: test_v4f64_sub_mul_rhs:
877 ; GFX10: ; %bb.0: ; %.entry
878 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
880 ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
881 ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
882 ; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
883 ; GFX10-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
884 ; GFX10-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
885 ; GFX10-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
886 ; GFX10-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
887 ; GFX10-NEXT: s_setpc_b64 s[30:31]
889 ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
890 ; GFX10-CONTRACT: ; %bb.0: ; %.entry
891 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
892 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
893 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
894 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
895 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
896 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
898 ; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs:
899 ; GFX10-DENORM: ; %bb.0: ; %.entry
900 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
902 ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
903 ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
904 ; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
905 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
906 ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
907 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
908 ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
909 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
911 ; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
912 ; GFX11-CONTRACT: ; %bb.0: ; %.entry
913 ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
915 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
916 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
917 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
918 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
920 ; GFX11-DENORM-LABEL: test_v4f64_sub_mul_rhs:
921 ; GFX11-DENORM: ; %bb.0: ; %.entry
922 ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
924 ; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
925 ; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
926 ; GFX11-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
927 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
928 ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1]
929 ; GFX11-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3]
930 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
931 ; GFX11-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5]
932 ; GFX11-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7]
933 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31]
935 %a = fmul <4 x double> %x, %y
936 %b = fsub <4 x double> %z, %a