1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS
12 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
15 define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16 ; FMA-LABEL: test_16f32_fmadd:
18 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4
19 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5
22 ; FMA4-LABEL: test_16f32_fmadd:
24 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
25 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
28 ; AVX512-LABEL: test_16f32_fmadd:
30 ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
32 %x = fmul <16 x float> %a0, %a1
33 %res = fadd <16 x float> %x, %a2
37 define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
38 ; FMA-LABEL: test_8f64_fmadd:
40 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4
41 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5
44 ; FMA4-LABEL: test_8f64_fmadd:
46 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
47 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
50 ; AVX512-LABEL: test_8f64_fmadd:
52 ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
54 %x = fmul <8 x double> %a0, %a1
55 %res = fadd <8 x double> %x, %a2
60 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
63 define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
64 ; FMA-LABEL: test_16f32_fmsub:
66 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4
67 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5
70 ; FMA4-LABEL: test_16f32_fmsub:
72 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
73 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
76 ; AVX512-LABEL: test_16f32_fmsub:
78 ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
80 %x = fmul <16 x float> %a0, %a1
81 %res = fsub <16 x float> %x, %a2
85 define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
86 ; FMA-LABEL: test_8f64_fmsub:
88 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4
89 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5
92 ; FMA4-LABEL: test_8f64_fmsub:
94 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
95 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
98 ; AVX512-LABEL: test_8f64_fmsub:
100 ; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
102 %x = fmul <8 x double> %a0, %a1
103 %res = fsub <8 x double> %x, %a2
104 ret <8 x double> %res
108 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
111 define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
112 ; FMA-LABEL: test_16f32_fnmadd:
114 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4
115 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5
118 ; FMA4-LABEL: test_16f32_fnmadd:
120 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
121 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
124 ; AVX512-LABEL: test_16f32_fnmadd:
126 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
128 %x = fmul <16 x float> %a0, %a1
129 %res = fsub <16 x float> %a2, %x
130 ret <16 x float> %res
133 define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
134 ; FMA-LABEL: test_8f64_fnmadd:
136 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4
137 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5
140 ; FMA4-LABEL: test_8f64_fnmadd:
142 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
143 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
146 ; AVX512-LABEL: test_8f64_fnmadd:
148 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
150 %x = fmul <8 x double> %a0, %a1
151 %res = fsub <8 x double> %a2, %x
152 ret <8 x double> %res
156 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
159 define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
160 ; FMA-LABEL: test_16f32_fnmsub:
162 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
163 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5
166 ; FMA4-LABEL: test_16f32_fnmsub:
168 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
169 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
172 ; AVX512-LABEL: test_16f32_fnmsub:
174 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
176 %x = fmul <16 x float> %a0, %a1
177 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
178 %res = fsub <16 x float> %y, %a2
179 ret <16 x float> %res
182 define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
183 ; FMA-LABEL: test_8f64_fnmsub:
185 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
186 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5
189 ; FMA4-LABEL: test_8f64_fnmsub:
191 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
192 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
195 ; AVX512-LABEL: test_8f64_fnmsub:
197 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
199 %x = fmul <8 x double> %a0, %a1
200 %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
201 %res = fsub <8 x double> %y, %a2
202 ret <8 x double> %res
206 ; Load Folding Patterns
209 define <16 x float> @test_16f32_fmadd_load(ptr %a0, <16 x float> %a1, <16 x float> %a2) {
210 ; FMA-LABEL: test_16f32_fmadd_load:
212 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
213 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
216 ; FMA4-LABEL: test_16f32_fmadd_load:
218 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
219 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
222 ; AVX512-LABEL: test_16f32_fmadd_load:
224 ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
226 %x = load <16 x float>, ptr %a0
227 %y = fmul <16 x float> %x, %a1
228 %res = fadd <16 x float> %y, %a2
229 ret <16 x float> %res
232 define <8 x double> @test_8f64_fmsub_load(ptr %a0, <8 x double> %a1, <8 x double> %a2) {
233 ; FMA-LABEL: test_8f64_fmsub_load:
235 ; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2
236 ; FMA-NEXT: vfmsub132pd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3
239 ; FMA4-LABEL: test_8f64_fmsub_load:
241 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2
242 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3
245 ; AVX512-LABEL: test_8f64_fmsub_load:
247 ; AVX512-NEXT: vfmsub132pd {{.*#+}} zmm0 = (zmm0 * mem) - zmm1
249 %x = load <8 x double>, ptr %a0
250 %y = fmul <8 x double> %x, %a1
251 %res = fsub <8 x double> %y, %a2
252 ret <8 x double> %res
256 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
259 define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
260 ; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y:
262 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
263 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
264 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
265 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
266 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
267 ; FMA-INFS-NEXT: retq
269 ; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y:
270 ; FMA4-INFS: # %bb.0:
271 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
272 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
273 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
274 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
275 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
276 ; FMA4-INFS-NEXT: retq
278 ; AVX512-INFS-LABEL: test_v16f32_mul_add_x_one_y:
279 ; AVX512-INFS: # %bb.0:
280 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
281 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
282 ; AVX512-INFS-NEXT: retq
284 ; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
285 ; FMA-NOINFS: # %bb.0:
286 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2
287 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3
288 ; FMA-NOINFS-NEXT: retq
290 ; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
291 ; FMA4-NOINFS: # %bb.0:
292 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
293 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
294 ; FMA4-NOINFS-NEXT: retq
296 ; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
297 ; AVX512-NOINFS: # %bb.0:
298 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1
299 ; AVX512-NOINFS-NEXT: retq
300 %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
301 %m = fmul <16 x float> %a, %y
305 define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
306 ; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one:
308 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
309 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
310 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
311 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
312 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
313 ; FMA-INFS-NEXT: retq
315 ; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one:
316 ; FMA4-INFS: # %bb.0:
317 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
318 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
319 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
320 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
321 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
322 ; FMA4-INFS-NEXT: retq
324 ; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_one:
325 ; AVX512-INFS: # %bb.0:
326 ; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
327 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
328 ; AVX512-INFS-NEXT: retq
330 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
331 ; FMA-NOINFS: # %bb.0:
332 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2
333 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3
334 ; FMA-NOINFS-NEXT: retq
336 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
337 ; FMA4-NOINFS: # %bb.0:
338 ; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
339 ; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
340 ; FMA4-NOINFS-NEXT: retq
342 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
343 ; AVX512-NOINFS: # %bb.0:
344 ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1
345 ; AVX512-NOINFS-NEXT: retq
346 %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
347 %m = fmul <8 x double> %y, %a
351 define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
352 ; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
354 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
355 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
356 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
357 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
358 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
359 ; FMA-INFS-NEXT: retq
361 ; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
362 ; FMA4-INFS: # %bb.0:
363 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
364 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
365 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
366 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
367 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
368 ; FMA4-INFS-NEXT: retq
370 ; AVX512-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
371 ; AVX512-INFS: # %bb.0:
372 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
373 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
374 ; AVX512-INFS-NEXT: retq
376 ; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
377 ; FMA-NOINFS: # %bb.0:
378 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2
379 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3
380 ; FMA-NOINFS-NEXT: retq
382 ; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
383 ; FMA4-NOINFS: # %bb.0:
384 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
385 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
386 ; FMA4-NOINFS-NEXT: retq
388 ; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
389 ; AVX512-NOINFS: # %bb.0:
390 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1
391 ; AVX512-NOINFS-NEXT: retq
392 %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
393 %m = fmul <16 x float> %a, %y
397 define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
398 ; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
400 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
401 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
402 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
403 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
404 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
405 ; FMA-INFS-NEXT: retq
407 ; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
408 ; FMA4-INFS: # %bb.0:
409 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
410 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
411 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
412 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
413 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
414 ; FMA4-INFS-NEXT: retq
416 ; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
417 ; AVX512-INFS: # %bb.0:
418 ; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
419 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
420 ; AVX512-INFS-NEXT: retq
422 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
423 ; FMA-NOINFS: # %bb.0:
424 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2
425 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3
426 ; FMA-NOINFS-NEXT: retq
428 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
429 ; FMA4-NOINFS: # %bb.0:
430 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
431 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
432 ; FMA4-NOINFS-NEXT: retq
434 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
435 ; AVX512-NOINFS: # %bb.0:
436 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1
437 ; AVX512-NOINFS-NEXT: retq
438 %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
439 %m = fmul <8 x double> %y, %a
443 define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
444 ; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
446 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
447 ; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
448 ; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
449 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
450 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
451 ; FMA-INFS-NEXT: retq
453 ; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
454 ; FMA4-INFS: # %bb.0:
455 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
456 ; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
457 ; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
458 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
459 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
460 ; FMA4-INFS-NEXT: retq
462 ; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
463 ; AVX512-INFS: # %bb.0:
464 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
465 ; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0
466 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
467 ; AVX512-INFS-NEXT: retq
469 ; FMA-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
470 ; FMA-NOINFS: # %bb.0:
471 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2
472 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3
473 ; FMA-NOINFS-NEXT: retq
475 ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
476 ; FMA4-NOINFS: # %bb.0:
477 ; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
478 ; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
479 ; FMA4-NOINFS-NEXT: retq
481 ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
482 ; AVX512-NOINFS: # %bb.0:
483 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1
484 ; AVX512-NOINFS-NEXT: retq
485 %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
486 %m = fmul <16 x float> %s, %y
490 define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
491 ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
493 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
494 ; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
495 ; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
496 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
497 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
498 ; FMA-INFS-NEXT: retq
500 ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
501 ; FMA4-INFS: # %bb.0:
502 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
503 ; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
504 ; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
505 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
506 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
507 ; FMA4-INFS-NEXT: retq
509 ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
510 ; AVX512-INFS: # %bb.0:
511 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
512 ; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0
513 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
514 ; AVX512-INFS-NEXT: retq
516 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
517 ; FMA-NOINFS: # %bb.0:
518 ; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2
519 ; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3
520 ; FMA-NOINFS-NEXT: retq
522 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
523 ; FMA4-NOINFS: # %bb.0:
524 ; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
525 ; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
526 ; FMA4-NOINFS-NEXT: retq
528 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
529 ; AVX512-NOINFS: # %bb.0:
530 ; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1
531 ; AVX512-NOINFS-NEXT: retq
532 %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
533 %m = fmul <8 x double> %y, %s
537 define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
538 ; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
540 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
541 ; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
542 ; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
543 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
544 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
545 ; FMA-INFS-NEXT: retq
547 ; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
548 ; FMA4-INFS: # %bb.0:
549 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
550 ; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
551 ; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
552 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
553 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
554 ; FMA4-INFS-NEXT: retq
556 ; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
557 ; AVX512-INFS: # %bb.0:
558 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
559 ; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0
560 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
561 ; AVX512-INFS-NEXT: retq
563 ; FMA-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
564 ; FMA-NOINFS: # %bb.0:
565 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2
566 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3
567 ; FMA-NOINFS-NEXT: retq
569 ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
570 ; FMA4-NOINFS: # %bb.0:
571 ; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
572 ; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
573 ; FMA4-NOINFS-NEXT: retq
575 ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
576 ; AVX512-NOINFS: # %bb.0:
577 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1
578 ; AVX512-NOINFS-NEXT: retq
579 %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
580 %m = fmul <16 x float> %s, %y
584 define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
585 ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
587 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
588 ; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
589 ; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
590 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
591 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
592 ; FMA-INFS-NEXT: retq
594 ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
595 ; FMA4-INFS: # %bb.0:
596 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
597 ; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
598 ; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
599 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
600 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
601 ; FMA4-INFS-NEXT: retq
603 ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
604 ; AVX512-INFS: # %bb.0:
605 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
606 ; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0
607 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
608 ; AVX512-INFS-NEXT: retq
610 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
611 ; FMA-NOINFS: # %bb.0:
612 ; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2
613 ; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3
614 ; FMA-NOINFS-NEXT: retq
616 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
617 ; FMA4-NOINFS: # %bb.0:
618 ; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
619 ; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
620 ; FMA4-NOINFS-NEXT: retq
622 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
623 ; AVX512-NOINFS: # %bb.0:
624 ; AVX512-NOINFS-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1
625 ; AVX512-NOINFS-NEXT: retq
626 %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
627 %m = fmul <8 x double> %y, %s
631 define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
632 ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
634 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
635 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
636 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
637 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
638 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
639 ; FMA-INFS-NEXT: retq
641 ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
642 ; FMA4-INFS: # %bb.0:
643 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
644 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
645 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
646 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
647 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
648 ; FMA4-INFS-NEXT: retq
650 ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
651 ; AVX512-INFS: # %bb.0:
652 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
653 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
654 ; AVX512-INFS-NEXT: retq
656 ; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
657 ; FMA-NOINFS: # %bb.0:
658 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2
659 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3
660 ; FMA-NOINFS-NEXT: retq
662 ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
663 ; FMA4-NOINFS: # %bb.0:
664 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
665 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
666 ; FMA4-NOINFS-NEXT: retq
668 ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
669 ; AVX512-NOINFS: # %bb.0:
670 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1
671 ; AVX512-NOINFS-NEXT: retq
672 %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
673 %m = fmul <16 x float> %s, %y
677 define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
678 ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
680 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
681 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
682 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
683 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
684 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
685 ; FMA-INFS-NEXT: retq
687 ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
688 ; FMA4-INFS: # %bb.0:
689 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
690 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
691 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
692 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
693 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
694 ; FMA4-INFS-NEXT: retq
696 ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
697 ; AVX512-INFS: # %bb.0:
698 ; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
699 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
700 ; AVX512-INFS-NEXT: retq
702 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
703 ; FMA-NOINFS: # %bb.0:
704 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2
705 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3
706 ; FMA-NOINFS-NEXT: retq
708 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
709 ; FMA4-NOINFS: # %bb.0:
710 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
711 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
712 ; FMA4-NOINFS-NEXT: retq
714 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
715 ; AVX512-NOINFS: # %bb.0:
716 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1
717 ; AVX512-NOINFS-NEXT: retq
718 %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
719 %m = fmul <8 x double> %y, %s
723 define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
724 ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
726 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
727 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
728 ; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
729 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
730 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
731 ; FMA-INFS-NEXT: retq
733 ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
734 ; FMA4-INFS: # %bb.0:
735 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
736 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
737 ; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
738 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
739 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
740 ; FMA4-INFS-NEXT: retq
742 ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
743 ; AVX512-INFS: # %bb.0:
744 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
745 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
746 ; AVX512-INFS-NEXT: retq
748 ; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
749 ; FMA-NOINFS: # %bb.0:
750 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2
751 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3
752 ; FMA-NOINFS-NEXT: retq
754 ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
755 ; FMA4-NOINFS: # %bb.0:
756 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
757 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
758 ; FMA4-NOINFS-NEXT: retq
760 ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
761 ; AVX512-NOINFS: # %bb.0:
762 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1
763 ; AVX512-NOINFS-NEXT: retq
764 %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
765 %m = fmul <16 x float> %s, %y
769 define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
770 ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
772 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
773 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
774 ; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
775 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
776 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
777 ; FMA-INFS-NEXT: retq
779 ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
780 ; FMA4-INFS: # %bb.0:
781 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
782 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
783 ; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
784 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
785 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
786 ; FMA4-INFS-NEXT: retq
788 ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
789 ; AVX512-INFS: # %bb.0:
790 ; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
791 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
792 ; AVX512-INFS-NEXT: retq
794 ; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
795 ; FMA-NOINFS: # %bb.0:
796 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2
797 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3
798 ; FMA-NOINFS-NEXT: retq
800 ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
801 ; FMA4-NOINFS: # %bb.0:
802 ; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
803 ; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
804 ; FMA4-NOINFS-NEXT: retq
806 ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
807 ; AVX512-NOINFS: # %bb.0:
808 ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1
809 ; AVX512-NOINFS-NEXT: retq
810 %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
811 %m = fmul <8 x double> %y, %s
816 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
819 define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
820 ; FMA-INFS-LABEL: test_v16f32_interp:
822 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
823 ; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
824 ; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
825 ; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
826 ; FMA-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2
827 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2
828 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3
829 ; FMA-INFS-NEXT: retq
831 ; FMA4-INFS-LABEL: test_v16f32_interp:
832 ; FMA4-INFS: # %bb.0:
833 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
834 ; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
835 ; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
836 ; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
837 ; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2
838 ; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
839 ; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
840 ; FMA4-INFS-NEXT: retq
842 ; AVX512-INFS-LABEL: test_v16f32_interp:
843 ; AVX512-INFS: # %bb.0:
844 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
845 ; AVX512-INFS-NEXT: vsubps %zmm2, %zmm3, %zmm3
846 ; AVX512-INFS-NEXT: vmulps %zmm3, %zmm1, %zmm1
847 ; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1
848 ; AVX512-INFS-NEXT: retq
850 ; FMA-NOINFS-LABEL: test_v16f32_interp:
851 ; FMA-NOINFS: # %bb.0:
852 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
853 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
854 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2
855 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3
856 ; FMA-NOINFS-NEXT: retq
858 ; FMA4-NOINFS-LABEL: test_v16f32_interp:
859 ; FMA4-NOINFS: # %bb.0:
860 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
861 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
862 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
863 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
864 ; FMA4-NOINFS-NEXT: retq
866 ; AVX512-NOINFS-LABEL: test_v16f32_interp:
867 ; AVX512-NOINFS: # %bb.0:
868 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
869 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
870 ; AVX512-NOINFS-NEXT: retq
871 %t1 = fsub nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
872 %tx = fmul nsz <16 x float> %x, %t
873 %ty = fmul nsz <16 x float> %y, %t1
874 %r = fadd nsz <16 x float> %tx, %ty
878 define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
879 ; FMA-INFS-LABEL: test_v8f64_interp:
881 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
882 ; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
883 ; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
884 ; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
885 ; FMA-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2
886 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2
887 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3
888 ; FMA-INFS-NEXT: retq
890 ; FMA4-INFS-LABEL: test_v8f64_interp:
891 ; FMA4-INFS: # %bb.0:
892 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
893 ; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
894 ; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
895 ; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
896 ; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2
897 ; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
898 ; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
899 ; FMA4-INFS-NEXT: retq
901 ; AVX512-INFS-LABEL: test_v8f64_interp:
902 ; AVX512-INFS: # %bb.0:
903 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
904 ; AVX512-INFS-NEXT: vsubpd %zmm2, %zmm3, %zmm3
905 ; AVX512-INFS-NEXT: vmulpd %zmm3, %zmm1, %zmm1
906 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1
907 ; AVX512-INFS-NEXT: retq
909 ; FMA-NOINFS-LABEL: test_v8f64_interp:
910 ; FMA-NOINFS: # %bb.0:
911 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
912 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
913 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2
914 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3
915 ; FMA-NOINFS-NEXT: retq
917 ; FMA4-NOINFS-LABEL: test_v8f64_interp:
918 ; FMA4-NOINFS: # %bb.0:
919 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
920 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
921 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
922 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
923 ; FMA4-NOINFS-NEXT: retq
925 ; AVX512-NOINFS-LABEL: test_v8f64_interp:
926 ; AVX512-NOINFS: # %bb.0:
927 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
928 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
929 ; AVX512-NOINFS-NEXT: retq
930 %t1 = fsub nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
931 %tx = fmul nsz <8 x double> %x, %t
932 %ty = fmul nsz <8 x double> %y, %t1
933 %r = fadd nsz <8 x double> %tx, %ty
938 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
941 define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
942 ; FMA-LABEL: test_v16f32_fneg_fmadd:
944 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
945 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5
948 ; FMA4-LABEL: test_v16f32_fneg_fmadd:
950 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
951 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
954 ; AVX512-LABEL: test_v16f32_fneg_fmadd:
956 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
958 %mul = fmul nsz <16 x float> %a0, %a1
959 %add = fadd nsz <16 x float> %mul, %a2
960 %neg = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
961 ret <16 x float> %neg
964 define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
965 ; FMA-LABEL: test_v8f64_fneg_fmsub:
967 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4
968 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5
971 ; FMA4-LABEL: test_v8f64_fneg_fmsub:
973 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
974 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
977 ; AVX512-LABEL: test_v8f64_fneg_fmsub:
979 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
981 %mul = fmul nsz <8 x double> %a0, %a1
982 %sub = fsub nsz <8 x double> %mul, %a2
983 %neg = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
984 ret <8 x double> %neg
987 define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
988 ; FMA-LABEL: test_v16f32_fneg_fnmadd:
990 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4
991 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5
994 ; FMA4-LABEL: test_v16f32_fneg_fnmadd:
996 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
997 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
1000 ; AVX512-LABEL: test_v16f32_fneg_fnmadd:
1002 ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
1004 %mul = fmul nsz <16 x float> %a0, %a1
1005 %neg0 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1006 %add = fadd nsz <16 x float> %neg0, %a2
1007 %neg1 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
1008 ret <16 x float> %neg1
1011 define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
1012 ; FMA-LABEL: test_v8f64_fneg_fnmsub:
1014 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4
1015 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5
1018 ; FMA4-LABEL: test_v8f64_fneg_fnmsub:
1020 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
1021 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
1024 ; AVX512-LABEL: test_v8f64_fneg_fnmsub:
1026 ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
1028 %mul = fmul nsz <8 x double> %a0, %a1
1029 %neg0 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1030 %sub = fsub nsz <8 x double> %neg0, %a2
1031 %neg1 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1032 ret <8 x double> %neg1
1036 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1039 define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
1040 ; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1042 ; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1043 ; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1046 ; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1048 ; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1049 ; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
1052 ; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1054 ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1056 %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
1057 %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
1058 %a = fadd <16 x float> %m0, %m1
1063 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1066 define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
1067 ; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1069 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
1070 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
1073 ; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1075 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
1076 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
1079 ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1081 ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
1083 %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
1084 %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
1085 %a = fadd <16 x float> %m1, %y
1089 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1091 define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
1092 ; FMA-LABEL: test_v16f32_fneg_fmul:
1094 ; FMA-NEXT: vxorps %xmm4, %xmm4, %xmm4
1095 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
1096 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm4
1099 ; FMA4-LABEL: test_v16f32_fneg_fmul:
1101 ; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4
1102 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
1103 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
1106 ; AVX512-LABEL: test_v16f32_fneg_fmul:
1108 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
1109 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
1111 %m = fmul nsz <16 x float> %x, %y
1112 %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
1116 define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
1117 ; FMA-LABEL: test_v8f64_fneg_fmul:
1119 ; FMA-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1120 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
1121 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm4
1124 ; FMA4-LABEL: test_v8f64_fneg_fmul:
1126 ; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1127 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
1128 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
1131 ; AVX512-LABEL: test_v8f64_fneg_fmul:
1133 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1134 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
1136 %m = fmul nsz <8 x double> %x, %y
1137 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
1141 define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
1142 ; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
1144 ; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1145 ; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
1146 ; FMA-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1147 ; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
1148 ; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
1151 ; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
1153 ; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1154 ; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
1155 ; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1156 ; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
1157 ; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
1160 ; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
1162 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
1163 ; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1165 %m = fmul <8 x double> %x, %y
1166 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
1170 attributes #0 = { "unsafe-fp-math"="true" }