1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
7 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
9 ; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10 define float @test_f32_sub_ext_neg_mul(float %x, float %y, float %z) {
11 ; GFX9-LABEL: test_f32_sub_ext_neg_mul:
12 ; GFX9: ; %bb.0: ; %entry
13 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX9-NEXT: v_mul_f32_e64 v0, v0, -v1
15 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
16 ; GFX9-NEXT: s_setpc_b64 s[30:31]
18 ; GFX9-CONTRACT-LABEL: test_f32_sub_ext_neg_mul:
19 ; GFX9-CONTRACT: ; %bb.0: ; %entry
20 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2
22 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
24 ; GFX9-DENORM-LABEL: test_f32_sub_ext_neg_mul:
25 ; GFX9-DENORM: ; %bb.0: ; %entry
26 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2
28 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
30 ; GFX10-LABEL: test_f32_sub_ext_neg_mul:
31 ; GFX10: ; %bb.0: ; %entry
32 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v1
34 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
35 ; GFX10-NEXT: s_setpc_b64 s[30:31]
37 ; GFX10-CONTRACT-LABEL: test_f32_sub_ext_neg_mul:
38 ; GFX10-CONTRACT: ; %bb.0: ; %entry
39 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2
41 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
43 ; GFX10-DENORM-LABEL: test_f32_sub_ext_neg_mul:
44 ; GFX10-DENORM: ; %bb.0: ; %entry
45 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2
47 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
49 %a = fmul float %x, %y
51 %c = fsub float %b, %z
55 define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
56 ; GFX9-LABEL: test_f16_sub_ext_neg_mul:
57 ; GFX9: ; %bb.0: ; %entry
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_mul_f16_e64 v0, v0, -v1
60 ; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
61 ; GFX9-NEXT: s_setpc_b64 s[30:31]
63 ; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
64 ; GFX9-CONTRACT: ; %bb.0: ; %entry
65 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2
67 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
69 ; GFX9-DENORM-LABEL: test_f16_sub_ext_neg_mul:
70 ; GFX9-DENORM: ; %bb.0: ; %entry
71 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, -v1, -v2
73 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
75 ; GFX10-LABEL: test_f16_sub_ext_neg_mul:
76 ; GFX10: ; %bb.0: ; %entry
77 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1
79 ; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
80 ; GFX10-NEXT: s_setpc_b64 s[30:31]
82 ; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
83 ; GFX10-CONTRACT: ; %bb.0: ; %entry
84 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2
86 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
88 ; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul:
89 ; GFX10-DENORM: ; %bb.0: ; %entry
90 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
92 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
93 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
101 define double @test_f64_sub_ext_neg_mul(double %x, double %y, double %z) {
102 ; GFX9-LABEL: test_f64_sub_ext_neg_mul:
103 ; GFX9: ; %bb.0: ; %entry
104 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
106 ; GFX9-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
107 ; GFX9-NEXT: s_setpc_b64 s[30:31]
109 ; GFX9-CONTRACT-LABEL: test_f64_sub_ext_neg_mul:
110 ; GFX9-CONTRACT: ; %bb.0: ; %entry
111 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
113 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
115 ; GFX9-DENORM-LABEL: test_f64_sub_ext_neg_mul:
116 ; GFX9-DENORM: ; %bb.0: ; %entry
117 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
119 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
120 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
122 ; GFX10-LABEL: test_f64_sub_ext_neg_mul:
123 ; GFX10: ; %bb.0: ; %entry
124 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
126 ; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
127 ; GFX10-NEXT: s_setpc_b64 s[30:31]
129 ; GFX10-CONTRACT-LABEL: test_f64_sub_ext_neg_mul:
130 ; GFX10-CONTRACT: ; %bb.0: ; %entry
131 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
133 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
135 ; GFX10-DENORM-LABEL: test_f64_sub_ext_neg_mul:
136 ; GFX10-DENORM: ; %bb.0: ; %entry
137 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
139 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
140 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
142 %a = fmul double %x, %y
144 %c = fsub double %b, %z
148 ; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
149 define <4 x float> @test_v4f32_sub_ext_neg_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
150 ; GFX9-LABEL: test_v4f32_sub_ext_neg_mul:
151 ; GFX9: ; %bb.0: ; %entry
152 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX9-NEXT: v_mul_f32_e64 v0, v0, -v4
154 ; GFX9-NEXT: v_mul_f32_e64 v1, v1, -v5
155 ; GFX9-NEXT: v_mul_f32_e64 v2, v2, -v6
156 ; GFX9-NEXT: v_mul_f32_e64 v3, v3, -v7
157 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v8
158 ; GFX9-NEXT: v_sub_f32_e32 v1, v1, v9
159 ; GFX9-NEXT: v_sub_f32_e32 v2, v2, v10
160 ; GFX9-NEXT: v_sub_f32_e32 v3, v3, v11
161 ; GFX9-NEXT: s_setpc_b64 s[30:31]
163 ; GFX9-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul:
164 ; GFX9-CONTRACT: ; %bb.0: ; %entry
165 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8
167 ; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9
168 ; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10
169 ; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, -v7, -v11
170 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
172 ; GFX9-DENORM-LABEL: test_v4f32_sub_ext_neg_mul:
173 ; GFX9-DENORM: ; %bb.0: ; %entry
174 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175 ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8
176 ; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9
177 ; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10
178 ; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, -v7, -v11
179 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
181 ; GFX10-LABEL: test_v4f32_sub_ext_neg_mul:
182 ; GFX10: ; %bb.0: ; %entry
183 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v4
185 ; GFX10-NEXT: v_mul_f32_e64 v1, v1, -v5
186 ; GFX10-NEXT: v_mul_f32_e64 v2, v2, -v6
187 ; GFX10-NEXT: v_mul_f32_e64 v3, v3, -v7
188 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v8
189 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v9
190 ; GFX10-NEXT: v_sub_f32_e32 v2, v2, v10
191 ; GFX10-NEXT: v_sub_f32_e32 v3, v3, v11
192 ; GFX10-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul:
195 ; GFX10-CONTRACT: ; %bb.0: ; %entry
196 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8
198 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9
199 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10
200 ; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, -v7, -v11
201 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
203 ; GFX10-DENORM-LABEL: test_v4f32_sub_ext_neg_mul:
204 ; GFX10-DENORM: ; %bb.0: ; %entry
205 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8
207 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9
208 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10
209 ; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, -v7, -v11
210 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
212 %a = fmul <4 x float> %x, %y
213 %b = fneg <4 x float> %a
214 %c = fsub <4 x float> %b, %z
218 define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
219 ; GFX9-LABEL: test_v4f16_sub_ext_neg_mul:
220 ; GFX9: ; %bb.0: ; %entry
221 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
223 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
224 ; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
225 ; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
226 ; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
227 ; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
228 ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
229 ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
230 ; GFX9-NEXT: s_setpc_b64 s[30:31]
232 ; GFX9-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
233 ; GFX9-CONTRACT: ; %bb.0: ; %entry
234 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
236 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
237 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
239 ; GFX9-DENORM-LABEL: test_v4f16_sub_ext_neg_mul:
240 ; GFX9-DENORM: ; %bb.0: ; %entry
241 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
243 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
244 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
245 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
246 ; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
247 ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
248 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
249 ; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
250 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
252 ; GFX10-LABEL: test_v4f16_sub_ext_neg_mul:
253 ; GFX10: ; %bb.0: ; %entry
254 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
256 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
257 ; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
258 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
259 ; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
260 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
261 ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
262 ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
263 ; GFX10-NEXT: s_setpc_b64 s[30:31]
265 ; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
266 ; GFX10-CONTRACT: ; %bb.0: ; %entry
267 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
269 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
270 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
272 ; GFX10-DENORM-LABEL: test_v4f16_sub_ext_neg_mul:
273 ; GFX10-DENORM: ; %bb.0: ; %entry
274 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
276 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
277 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
278 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
279 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
280 ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
281 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
282 ; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
283 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
285 %a = fmul <4 x half> %x, %y
286 %b = fneg <4 x half> %a
287 %c = fsub <4 x half> %b, %z
291 define <4 x double> @test_v4f64_sub_ext_neg_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
292 ; GFX9-LABEL: test_v4f64_sub_ext_neg_mul:
293 ; GFX9: ; %bb.0: ; %entry
294 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
296 ; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
297 ; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
298 ; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
299 ; GFX9-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17]
300 ; GFX9-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19]
301 ; GFX9-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21]
302 ; GFX9-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23]
303 ; GFX9-NEXT: s_setpc_b64 s[30:31]
305 ; GFX9-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul:
306 ; GFX9-CONTRACT: ; %bb.0: ; %entry
307 ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17]
309 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19]
310 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21]
311 ; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23]
312 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
314 ; GFX9-DENORM-LABEL: test_v4f64_sub_ext_neg_mul:
315 ; GFX9-DENORM: ; %bb.0: ; %entry
316 ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
318 ; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
319 ; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
320 ; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
321 ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17]
322 ; GFX9-DENORM-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19]
323 ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21]
324 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23]
325 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
327 ; GFX10-LABEL: test_v4f64_sub_ext_neg_mul:
328 ; GFX10: ; %bb.0: ; %entry
329 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
331 ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
332 ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
333 ; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
334 ; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17]
335 ; GFX10-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19]
336 ; GFX10-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21]
337 ; GFX10-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23]
338 ; GFX10-NEXT: s_setpc_b64 s[30:31]
340 ; GFX10-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul:
341 ; GFX10-CONTRACT: ; %bb.0: ; %entry
342 ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17]
344 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19]
345 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21]
346 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23]
347 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
349 ; GFX10-DENORM-LABEL: test_v4f64_sub_ext_neg_mul:
350 ; GFX10-DENORM: ; %bb.0: ; %entry
351 ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
353 ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
354 ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
355 ; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15]
356 ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17]
357 ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19]
358 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21]
359 ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23]
360 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
362 %a = fmul <4 x double> %x, %y
363 %b = fneg <4 x double> %a
364 %c = fsub <4 x double> %b, %z