1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4
4 attributes #0 = { nounwind }
6 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
7 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
8 ; FMA4-LABEL: test_x86_fmadd_baa_ss:
10 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
11 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
13 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
17 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
18 ; FMA4-LABEL: test_x86_fmadd_aba_ss:
20 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
21 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
23 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
27 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
28 ; FMA4-LABEL: test_x86_fmadd_bba_ss:
30 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
31 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
33 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
37 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
38 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
39 ; FMA4-LABEL: test_x86_fmadd_baa_ps:
41 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
42 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
44 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
48 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
49 ; FMA4-LABEL: test_x86_fmadd_aba_ps:
51 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
52 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
54 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
58 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
59 ; FMA4-LABEL: test_x86_fmadd_bba_ps:
61 ; FMA4-NEXT: vmovaps (%rdx), %xmm0
62 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
64 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
68 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
69 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
70 ; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
72 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
73 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
75 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
79 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
80 ; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
82 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
83 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
85 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
89 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
90 ; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
92 ; FMA4-NEXT: vmovaps (%rdx), %ymm0
93 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
95 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
99 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
100 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
101 ; FMA4-LABEL: test_x86_fmadd_baa_sd:
103 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
104 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
106 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
107 ret <2 x double> %res
110 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
111 ; FMA4-LABEL: test_x86_fmadd_aba_sd:
113 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
114 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
116 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
117 ret <2 x double> %res
120 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
121 ; FMA4-LABEL: test_x86_fmadd_bba_sd:
123 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
124 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
126 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
127 ret <2 x double> %res
130 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
131 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
132 ; FMA4-LABEL: test_x86_fmadd_baa_pd:
134 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
135 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
137 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
138 ret <2 x double> %res
141 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
142 ; FMA4-LABEL: test_x86_fmadd_aba_pd:
144 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
145 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
147 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
148 ret <2 x double> %res
151 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
152 ; FMA4-LABEL: test_x86_fmadd_bba_pd:
154 ; FMA4-NEXT: vmovapd (%rdx), %xmm0
155 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
157 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
158 ret <2 x double> %res
161 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
162 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
163 ; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
165 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
166 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
168 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
169 ret <4 x double> %res
172 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
173 ; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
175 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
176 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
178 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
179 ret <4 x double> %res
182 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
183 ; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
185 ; FMA4-NEXT: vmovapd (%rdx), %ymm0
186 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
188 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
189 ret <4 x double> %res
192 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
193 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
194 ; FMA4-LABEL: test_x86_fnmadd_baa_ps:
196 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
197 ; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
199 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
203 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
204 ; FMA4-LABEL: test_x86_fnmadd_aba_ps:
206 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
207 ; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
209 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
213 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
214 ; FMA4-LABEL: test_x86_fnmadd_bba_ps:
216 ; FMA4-NEXT: vmovaps (%rdx), %xmm0
217 ; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
219 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
223 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
224 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
225 ; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
227 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
228 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
230 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
234 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
235 ; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
237 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
238 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
240 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
244 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
245 ; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
247 ; FMA4-NEXT: vmovaps (%rdx), %ymm0
248 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
250 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
254 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
255 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
256 ; FMA4-LABEL: test_x86_fnmadd_baa_pd:
258 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
259 ; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
261 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
262 ret <2 x double> %res
265 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
266 ; FMA4-LABEL: test_x86_fnmadd_aba_pd:
268 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
269 ; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
271 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
272 ret <2 x double> %res
275 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
276 ; FMA4-LABEL: test_x86_fnmadd_bba_pd:
278 ; FMA4-NEXT: vmovapd (%rdx), %xmm0
279 ; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
281 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
282 ret <2 x double> %res
285 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
286 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
287 ; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
289 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
290 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
292 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
293 ret <4 x double> %res
296 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
297 ; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
299 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
300 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
302 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
303 ret <4 x double> %res
306 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
307 ; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
309 ; FMA4-NEXT: vmovapd (%rdx), %ymm0
310 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
312 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
313 ret <4 x double> %res
316 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
317 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
318 ; FMA4-LABEL: test_x86_fmsub_baa_ps:
320 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
321 ; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
323 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
327 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
328 ; FMA4-LABEL: test_x86_fmsub_aba_ps:
330 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
331 ; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
333 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
337 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
338 ; FMA4-LABEL: test_x86_fmsub_bba_ps:
340 ; FMA4-NEXT: vmovaps (%rdx), %xmm0
341 ; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
343 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
347 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
348 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
349 ; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
351 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
352 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
354 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
358 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
359 ; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
361 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
362 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
364 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
368 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
369 ; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
371 ; FMA4-NEXT: vmovaps (%rdx), %ymm0
372 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
374 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
378 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
379 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
380 ; FMA4-LABEL: test_x86_fmsub_baa_pd:
382 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
383 ; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
385 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
386 ret <2 x double> %res
389 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
390 ; FMA4-LABEL: test_x86_fmsub_aba_pd:
392 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
393 ; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
395 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
396 ret <2 x double> %res
399 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
400 ; FMA4-LABEL: test_x86_fmsub_bba_pd:
402 ; FMA4-NEXT: vmovapd (%rdx), %xmm0
403 ; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
405 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
406 ret <2 x double> %res
409 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
410 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
411 ; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
413 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
414 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
416 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
417 ret <4 x double> %res
420 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
421 ; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
423 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
424 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
426 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
427 ret <4 x double> %res
430 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
431 ; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
433 ; FMA4-NEXT: vmovapd (%rdx), %ymm0
434 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
436 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
437 ret <4 x double> %res
440 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
441 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
442 ; FMA4-LABEL: test_x86_fnmsub_baa_ps:
444 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
445 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
447 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
451 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
452 ; FMA4-LABEL: test_x86_fnmsub_aba_ps:
454 ; FMA4-NEXT: vmovaps (%rcx), %xmm0
455 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
457 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
461 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
462 ; FMA4-LABEL: test_x86_fnmsub_bba_ps:
464 ; FMA4-NEXT: vmovaps (%rdx), %xmm0
465 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
467 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
471 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
472 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
473 ; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
475 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
476 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
478 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
482 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
483 ; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
485 ; FMA4-NEXT: vmovaps (%rcx), %ymm0
486 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
488 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
492 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
493 ; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
495 ; FMA4-NEXT: vmovaps (%rdx), %ymm0
496 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
498 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
502 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
503 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
504 ; FMA4-LABEL: test_x86_fnmsub_baa_pd:
506 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
507 ; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
509 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
510 ret <2 x double> %res
513 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
514 ; FMA4-LABEL: test_x86_fnmsub_aba_pd:
516 ; FMA4-NEXT: vmovapd (%rcx), %xmm0
517 ; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
519 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
520 ret <2 x double> %res
523 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
524 ; FMA4-LABEL: test_x86_fnmsub_bba_pd:
526 ; FMA4-NEXT: vmovapd (%rdx), %xmm0
527 ; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
529 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
530 ret <2 x double> %res
533 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
534 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
535 ; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
537 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
538 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
540 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
541 ret <4 x double> %res
544 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
545 ; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
547 ; FMA4-NEXT: vmovapd (%rcx), %ymm0
548 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
550 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
551 ret <4 x double> %res
554 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
555 ; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
557 ; FMA4-NEXT: vmovapd (%rdx), %ymm0
558 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
560 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
561 ret <4 x double> %res