1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA
3 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA
4 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA
6 attributes #0 = { nounwind }
8 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
9 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
10 ; FMA-LABEL: test_x86_fmadd_baa_ss:
12 ; FMA-NEXT: vmovaps (%rdx), %xmm0
13 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
14 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
16 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
20 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
21 ; FMA-LABEL: test_x86_fmadd_aba_ss:
23 ; FMA-NEXT: vmovaps (%rcx), %xmm0
24 ; FMA-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
26 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
30 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
31 ; FMA-LABEL: test_x86_fmadd_bba_ss:
33 ; FMA-NEXT: vmovaps (%rdx), %xmm0
34 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
36 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
40 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
41 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
42 ; FMA-LABEL: test_x86_fmadd_baa_ps:
44 ; FMA-NEXT: vmovaps (%rcx), %xmm0
45 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
47 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
51 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
52 ; FMA-LABEL: test_x86_fmadd_aba_ps:
54 ; FMA-NEXT: vmovaps (%rcx), %xmm0
55 ; FMA-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
57 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
61 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
62 ; FMA-LABEL: test_x86_fmadd_bba_ps:
64 ; FMA-NEXT: vmovaps (%rdx), %xmm0
65 ; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
67 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
71 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
72 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
73 ; FMA-LABEL: test_x86_fmadd_baa_ps_y:
75 ; FMA-NEXT: vmovaps (%rcx), %ymm0
76 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
78 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
82 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
83 ; FMA-LABEL: test_x86_fmadd_aba_ps_y:
85 ; FMA-NEXT: vmovaps (%rcx), %ymm0
86 ; FMA-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
88 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
92 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
93 ; FMA-LABEL: test_x86_fmadd_bba_ps_y:
95 ; FMA-NEXT: vmovaps (%rdx), %ymm0
96 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
98 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
102 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
103 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
104 ; FMA-LABEL: test_x86_fmadd_baa_sd:
106 ; FMA-NEXT: vmovapd (%rdx), %xmm0
107 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
108 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
110 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
111 ret <2 x double> %res
114 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
115 ; FMA-LABEL: test_x86_fmadd_aba_sd:
117 ; FMA-NEXT: vmovapd (%rcx), %xmm0
118 ; FMA-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
120 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
121 ret <2 x double> %res
124 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
125 ; FMA-LABEL: test_x86_fmadd_bba_sd:
127 ; FMA-NEXT: vmovapd (%rdx), %xmm0
128 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
130 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
131 ret <2 x double> %res
134 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
135 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
136 ; FMA-LABEL: test_x86_fmadd_baa_pd:
138 ; FMA-NEXT: vmovapd (%rcx), %xmm0
139 ; FMA-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
141 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
142 ret <2 x double> %res
145 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
146 ; FMA-LABEL: test_x86_fmadd_aba_pd:
148 ; FMA-NEXT: vmovapd (%rcx), %xmm0
149 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
151 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
152 ret <2 x double> %res
155 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
156 ; FMA-LABEL: test_x86_fmadd_bba_pd:
158 ; FMA-NEXT: vmovapd (%rdx), %xmm0
159 ; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
161 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
162 ret <2 x double> %res
165 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
166 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
167 ; FMA-LABEL: test_x86_fmadd_baa_pd_y:
169 ; FMA-NEXT: vmovapd (%rcx), %ymm0
170 ; FMA-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
172 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
173 ret <4 x double> %res
176 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
177 ; FMA-LABEL: test_x86_fmadd_aba_pd_y:
179 ; FMA-NEXT: vmovapd (%rcx), %ymm0
180 ; FMA-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
182 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
183 ret <4 x double> %res
186 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
187 ; FMA-LABEL: test_x86_fmadd_bba_pd_y:
189 ; FMA-NEXT: vmovapd (%rdx), %ymm0
190 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
192 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
193 ret <4 x double> %res
197 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
198 define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
199 ; FMA-LABEL: test_x86_fnmadd_baa_ss:
201 ; FMA-NEXT: vmovaps (%rdx), %xmm0
202 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
203 ; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
205 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
209 define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
210 ; FMA-LABEL: test_x86_fnmadd_aba_ss:
212 ; FMA-NEXT: vmovaps (%rcx), %xmm0
213 ; FMA-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
215 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
219 define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
220 ; FMA-LABEL: test_x86_fnmadd_bba_ss:
222 ; FMA-NEXT: vmovaps (%rdx), %xmm0
223 ; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
225 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
229 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
230 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
231 ; FMA-LABEL: test_x86_fnmadd_baa_ps:
233 ; FMA-NEXT: vmovaps (%rcx), %xmm0
234 ; FMA-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
236 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
240 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
241 ; FMA-LABEL: test_x86_fnmadd_aba_ps:
243 ; FMA-NEXT: vmovaps (%rcx), %xmm0
244 ; FMA-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
246 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
250 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
251 ; FMA-LABEL: test_x86_fnmadd_bba_ps:
253 ; FMA-NEXT: vmovaps (%rdx), %xmm0
254 ; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
256 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
260 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
261 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
262 ; FMA-LABEL: test_x86_fnmadd_baa_ps_y:
264 ; FMA-NEXT: vmovaps (%rcx), %ymm0
265 ; FMA-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
267 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
271 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
272 ; FMA-LABEL: test_x86_fnmadd_aba_ps_y:
274 ; FMA-NEXT: vmovaps (%rcx), %ymm0
275 ; FMA-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
277 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
281 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
282 ; FMA-LABEL: test_x86_fnmadd_bba_ps_y:
284 ; FMA-NEXT: vmovaps (%rdx), %ymm0
285 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
287 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
291 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
292 define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
293 ; FMA-LABEL: test_x86_fnmadd_baa_sd:
295 ; FMA-NEXT: vmovapd (%rdx), %xmm0
296 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
297 ; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
299 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
300 ret <2 x double> %res
303 define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
304 ; FMA-LABEL: test_x86_fnmadd_aba_sd:
306 ; FMA-NEXT: vmovapd (%rcx), %xmm0
307 ; FMA-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
309 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
310 ret <2 x double> %res
313 define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
314 ; FMA-LABEL: test_x86_fnmadd_bba_sd:
316 ; FMA-NEXT: vmovapd (%rdx), %xmm0
317 ; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
319 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
320 ret <2 x double> %res
323 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
324 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
325 ; FMA-LABEL: test_x86_fnmadd_baa_pd:
327 ; FMA-NEXT: vmovapd (%rcx), %xmm0
328 ; FMA-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
330 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
331 ret <2 x double> %res
334 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
335 ; FMA-LABEL: test_x86_fnmadd_aba_pd:
337 ; FMA-NEXT: vmovapd (%rcx), %xmm0
338 ; FMA-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
340 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
341 ret <2 x double> %res
344 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
345 ; FMA-LABEL: test_x86_fnmadd_bba_pd:
347 ; FMA-NEXT: vmovapd (%rdx), %xmm0
348 ; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
350 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
351 ret <2 x double> %res
354 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
355 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
356 ; FMA-LABEL: test_x86_fnmadd_baa_pd_y:
358 ; FMA-NEXT: vmovapd (%rcx), %ymm0
359 ; FMA-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
361 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
362 ret <4 x double> %res
365 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
366 ; FMA-LABEL: test_x86_fnmadd_aba_pd_y:
368 ; FMA-NEXT: vmovapd (%rcx), %ymm0
369 ; FMA-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
371 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
372 ret <4 x double> %res
375 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
376 ; FMA-LABEL: test_x86_fnmadd_bba_pd_y:
378 ; FMA-NEXT: vmovapd (%rdx), %ymm0
379 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
381 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
382 ret <4 x double> %res
385 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
386 define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
387 ; FMA-LABEL: test_x86_fmsub_baa_ss:
389 ; FMA-NEXT: vmovaps (%rdx), %xmm0
390 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
391 ; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
397 define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
398 ; FMA-LABEL: test_x86_fmsub_aba_ss:
400 ; FMA-NEXT: vmovaps (%rcx), %xmm0
401 ; FMA-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
407 define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
408 ; FMA-LABEL: test_x86_fmsub_bba_ss:
410 ; FMA-NEXT: vmovaps (%rdx), %xmm0
411 ; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
417 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
418 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
419 ; FMA-LABEL: test_x86_fmsub_baa_ps:
421 ; FMA-NEXT: vmovaps (%rcx), %xmm0
422 ; FMA-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
428 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
429 ; FMA-LABEL: test_x86_fmsub_aba_ps:
431 ; FMA-NEXT: vmovaps (%rcx), %xmm0
432 ; FMA-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
438 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
439 ; FMA-LABEL: test_x86_fmsub_bba_ps:
441 ; FMA-NEXT: vmovaps (%rdx), %xmm0
442 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
448 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
449 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
450 ; FMA-LABEL: test_x86_fmsub_baa_ps_y:
452 ; FMA-NEXT: vmovaps (%rcx), %ymm0
453 ; FMA-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
459 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
460 ; FMA-LABEL: test_x86_fmsub_aba_ps_y:
462 ; FMA-NEXT: vmovaps (%rcx), %ymm0
463 ; FMA-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
469 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
470 ; FMA-LABEL: test_x86_fmsub_bba_ps_y:
472 ; FMA-NEXT: vmovaps (%rdx), %ymm0
473 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
479 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
480 define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
481 ; FMA-LABEL: test_x86_fmsub_baa_sd:
483 ; FMA-NEXT: vmovapd (%rdx), %xmm0
484 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
485 ; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
488 ret <2 x double> %res
491 define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
492 ; FMA-LABEL: test_x86_fmsub_aba_sd:
494 ; FMA-NEXT: vmovapd (%rcx), %xmm0
495 ; FMA-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
498 ret <2 x double> %res
501 define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
502 ; FMA-LABEL: test_x86_fmsub_bba_sd:
504 ; FMA-NEXT: vmovapd (%rdx), %xmm0
505 ; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
508 ret <2 x double> %res
511 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
512 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
513 ; FMA-LABEL: test_x86_fmsub_baa_pd:
515 ; FMA-NEXT: vmovapd (%rcx), %xmm0
516 ; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
519 ret <2 x double> %res
522 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
523 ; FMA-LABEL: test_x86_fmsub_aba_pd:
525 ; FMA-NEXT: vmovapd (%rcx), %xmm0
526 ; FMA-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
529 ret <2 x double> %res
532 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
533 ; FMA-LABEL: test_x86_fmsub_bba_pd:
535 ; FMA-NEXT: vmovapd (%rdx), %xmm0
536 ; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
539 ret <2 x double> %res
542 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
543 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
544 ; FMA-LABEL: test_x86_fmsub_baa_pd_y:
546 ; FMA-NEXT: vmovapd (%rcx), %ymm0
547 ; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
550 ret <4 x double> %res
553 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
554 ; FMA-LABEL: test_x86_fmsub_aba_pd_y:
556 ; FMA-NEXT: vmovapd (%rcx), %ymm0
557 ; FMA-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
560 ret <4 x double> %res
563 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
564 ; FMA-LABEL: test_x86_fmsub_bba_pd_y:
566 ; FMA-NEXT: vmovapd (%rdx), %ymm0
567 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
570 ret <4 x double> %res
574 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
575 define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
576 ; FMA-LABEL: test_x86_fnmsub_baa_ss:
578 ; FMA-NEXT: vmovaps (%rdx), %xmm0
579 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
580 ; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
586 define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
587 ; FMA-LABEL: test_x86_fnmsub_aba_ss:
589 ; FMA-NEXT: vmovaps (%rcx), %xmm0
590 ; FMA-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
596 define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
597 ; FMA-LABEL: test_x86_fnmsub_bba_ss:
599 ; FMA-NEXT: vmovaps (%rdx), %xmm0
600 ; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
606 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
607 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
608 ; FMA-LABEL: test_x86_fnmsub_baa_ps:
610 ; FMA-NEXT: vmovaps (%rcx), %xmm0
611 ; FMA-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
617 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
618 ; FMA-LABEL: test_x86_fnmsub_aba_ps:
620 ; FMA-NEXT: vmovaps (%rcx), %xmm0
621 ; FMA-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
627 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
628 ; FMA-LABEL: test_x86_fnmsub_bba_ps:
630 ; FMA-NEXT: vmovaps (%rdx), %xmm0
631 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
637 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
638 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
639 ; FMA-LABEL: test_x86_fnmsub_baa_ps_y:
641 ; FMA-NEXT: vmovaps (%rcx), %ymm0
642 ; FMA-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
648 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
649 ; FMA-LABEL: test_x86_fnmsub_aba_ps_y:
651 ; FMA-NEXT: vmovaps (%rcx), %ymm0
652 ; FMA-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
658 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
659 ; FMA-LABEL: test_x86_fnmsub_bba_ps_y:
661 ; FMA-NEXT: vmovaps (%rdx), %ymm0
662 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
668 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
669 define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
670 ; FMA-LABEL: test_x86_fnmsub_baa_sd:
672 ; FMA-NEXT: vmovapd (%rdx), %xmm0
673 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
674 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
677 ret <2 x double> %res
680 define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
681 ; FMA-LABEL: test_x86_fnmsub_aba_sd:
683 ; FMA-NEXT: vmovapd (%rcx), %xmm0
684 ; FMA-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
687 ret <2 x double> %res
690 define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
691 ; FMA-LABEL: test_x86_fnmsub_bba_sd:
693 ; FMA-NEXT: vmovapd (%rdx), %xmm0
694 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
697 ret <2 x double> %res
700 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
701 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
702 ; FMA-LABEL: test_x86_fnmsub_baa_pd:
704 ; FMA-NEXT: vmovapd (%rcx), %xmm0
705 ; FMA-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
708 ret <2 x double> %res
711 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
712 ; FMA-LABEL: test_x86_fnmsub_aba_pd:
714 ; FMA-NEXT: vmovapd (%rcx), %xmm0
715 ; FMA-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
718 ret <2 x double> %res
721 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
722 ; FMA-LABEL: test_x86_fnmsub_bba_pd:
724 ; FMA-NEXT: vmovapd (%rdx), %xmm0
725 ; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
728 ret <2 x double> %res
731 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
732 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
733 ; FMA-LABEL: test_x86_fnmsub_baa_pd_y:
735 ; FMA-NEXT: vmovapd (%rcx), %ymm0
736 ; FMA-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
739 ret <4 x double> %res
742 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
743 ; FMA-LABEL: test_x86_fnmsub_aba_pd_y:
745 ; FMA-NEXT: vmovapd (%rcx), %ymm0
746 ; FMA-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
749 ret <4 x double> %res
752 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
753 ; FMA-LABEL: test_x86_fnmsub_bba_pd_y:
755 ; FMA-NEXT: vmovapd (%rdx), %ymm0
756 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
759 ret <4 x double> %res