1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
3 ; CHECK-LABEL: fmaddsubpd_loop_128:
4 ; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
5 ; CHECK: vmovapd %xmm2, %xmm0
7 define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
12 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
13 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
14 %cmp = icmp slt i32 %i.0, %iter
15 br i1 %cmp, label %for.body, label %for.end
21 %0 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
22 %inc = add nsw i32 %i.0, 1
26 ret <2 x double> %c.addr.0
29 ; CHECK-LABEL: fmsubaddpd_loop_128:
30 ; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
31 ; CHECK: vmovapd %xmm2, %xmm0
33 define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
38 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
39 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
40 %cmp = icmp slt i32 %i.0, %iter
41 br i1 %cmp, label %for.body, label %for.end
47 %0 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
48 %inc = add nsw i32 %i.0, 1
52 ret <2 x double> %c.addr.0
55 ; CHECK-LABEL: fmaddpd_loop_128:
56 ; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
57 ; CHECK: vmovapd %xmm2, %xmm0
59 define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
64 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
65 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
66 %cmp = icmp slt i32 %i.0, %iter
67 br i1 %cmp, label %for.body, label %for.end
73 %0 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
74 %inc = add nsw i32 %i.0, 1
78 ret <2 x double> %c.addr.0
81 ; CHECK-LABEL: fmsubpd_loop_128:
82 ; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
83 ; CHECK: vmovapd %xmm2, %xmm0
85 define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
90 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
91 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
92 %cmp = icmp slt i32 %i.0, %iter
93 br i1 %cmp, label %for.body, label %for.end
99 %0 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
100 %inc = add nsw i32 %i.0, 1
104 ret <2 x double> %c.addr.0
107 ; CHECK-LABEL: fnmaddpd_loop_128:
108 ; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
109 ; CHECK: vmovapd %xmm2, %xmm0
111 define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
116 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
117 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
118 %cmp = icmp slt i32 %i.0, %iter
119 br i1 %cmp, label %for.body, label %for.end
125 %0 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
126 %inc = add nsw i32 %i.0, 1
130 ret <2 x double> %c.addr.0
133 ; CHECK-LABEL: fnmsubpd_loop_128:
134 ; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
135 ; CHECK: vmovapd %xmm2, %xmm0
137 define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
142 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
143 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
144 %cmp = icmp slt i32 %i.0, %iter
145 br i1 %cmp, label %for.body, label %for.end
151 %0 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
152 %inc = add nsw i32 %i.0, 1
156 ret <2 x double> %c.addr.0
159 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
160 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
161 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
162 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
163 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
164 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
167 ; CHECK-LABEL: fmaddsubps_loop_128:
168 ; CHECK: vfmaddsub231ps %xmm1, %xmm0, %xmm2
169 ; CHECK: vmovaps %xmm2, %xmm0
171 define <4 x float> @fmaddsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
176 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
177 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
178 %cmp = icmp slt i32 %i.0, %iter
179 br i1 %cmp, label %for.body, label %for.end
185 %0 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
186 %inc = add nsw i32 %i.0, 1
190 ret <4 x float> %c.addr.0
193 ; CHECK-LABEL: fmsubaddps_loop_128:
194 ; CHECK: vfmsubadd231ps %xmm1, %xmm0, %xmm2
195 ; CHECK: vmovaps %xmm2, %xmm0
197 define <4 x float> @fmsubaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
202 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
203 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
204 %cmp = icmp slt i32 %i.0, %iter
205 br i1 %cmp, label %for.body, label %for.end
211 %0 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
212 %inc = add nsw i32 %i.0, 1
216 ret <4 x float> %c.addr.0
219 ; CHECK-LABEL: fmaddps_loop_128:
220 ; CHECK: vfmadd231ps %xmm1, %xmm0, %xmm2
221 ; CHECK: vmovaps %xmm2, %xmm0
223 define <4 x float> @fmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
228 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
229 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
230 %cmp = icmp slt i32 %i.0, %iter
231 br i1 %cmp, label %for.body, label %for.end
237 %0 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
238 %inc = add nsw i32 %i.0, 1
242 ret <4 x float> %c.addr.0
245 ; CHECK-LABEL: fmsubps_loop_128:
246 ; CHECK: vfmsub231ps %xmm1, %xmm0, %xmm2
247 ; CHECK: vmovaps %xmm2, %xmm0
249 define <4 x float> @fmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
254 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
255 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
256 %cmp = icmp slt i32 %i.0, %iter
257 br i1 %cmp, label %for.body, label %for.end
263 %0 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
264 %inc = add nsw i32 %i.0, 1
268 ret <4 x float> %c.addr.0
271 ; CHECK-LABEL: fnmaddps_loop_128:
272 ; CHECK: vfnmadd231ps %xmm1, %xmm0, %xmm2
273 ; CHECK: vmovaps %xmm2, %xmm0
275 define <4 x float> @fnmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
280 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
281 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
282 %cmp = icmp slt i32 %i.0, %iter
283 br i1 %cmp, label %for.body, label %for.end
289 %0 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
290 %inc = add nsw i32 %i.0, 1
294 ret <4 x float> %c.addr.0
297 ; CHECK-LABEL: fnmsubps_loop_128:
298 ; CHECK: vfnmsub231ps %xmm1, %xmm0, %xmm2
299 ; CHECK: vmovaps %xmm2, %xmm0
301 define <4 x float> @fnmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
306 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
307 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
308 %cmp = icmp slt i32 %i.0, %iter
309 br i1 %cmp, label %for.body, label %for.end
315 %0 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
316 %inc = add nsw i32 %i.0, 1
320 ret <4 x float> %c.addr.0
323 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
324 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
325 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
326 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
327 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
328 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
330 ; CHECK-LABEL: fmaddsubpd_loop_256:
331 ; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
332 ; CHECK: vmovapd %ymm2, %ymm0
334 define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
339 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
340 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
341 %cmp = icmp slt i32 %i.0, %iter
342 br i1 %cmp, label %for.body, label %for.end
348 %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
349 %inc = add nsw i32 %i.0, 1
353 ret <4 x double> %c.addr.0
356 ; CHECK-LABEL: fmsubaddpd_loop_256:
357 ; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
358 ; CHECK: vmovapd %ymm2, %ymm0
360 define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
365 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
366 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
367 %cmp = icmp slt i32 %i.0, %iter
368 br i1 %cmp, label %for.body, label %for.end
374 %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
375 %inc = add nsw i32 %i.0, 1
379 ret <4 x double> %c.addr.0
382 ; CHECK-LABEL: fmaddpd_loop_256:
383 ; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
384 ; CHECK: vmovapd %ymm2, %ymm0
386 define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
391 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
392 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
393 %cmp = icmp slt i32 %i.0, %iter
394 br i1 %cmp, label %for.body, label %for.end
400 %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
401 %inc = add nsw i32 %i.0, 1
405 ret <4 x double> %c.addr.0
408 ; CHECK-LABEL: fmsubpd_loop_256:
409 ; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
410 ; CHECK: vmovapd %ymm2, %ymm0
412 define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
417 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
418 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
419 %cmp = icmp slt i32 %i.0, %iter
420 br i1 %cmp, label %for.body, label %for.end
426 %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
427 %inc = add nsw i32 %i.0, 1
431 ret <4 x double> %c.addr.0
434 ; CHECK-LABEL: fnmaddpd_loop_256:
435 ; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
436 ; CHECK: vmovapd %ymm2, %ymm0
438 define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
443 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
444 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
445 %cmp = icmp slt i32 %i.0, %iter
446 br i1 %cmp, label %for.body, label %for.end
452 %0 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
453 %inc = add nsw i32 %i.0, 1
457 ret <4 x double> %c.addr.0
460 ; CHECK-LABEL: fnmsubpd_loop_256:
461 ; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
462 ; CHECK: vmovapd %ymm2, %ymm0
464 define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
469 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
470 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
471 %cmp = icmp slt i32 %i.0, %iter
472 br i1 %cmp, label %for.body, label %for.end
478 %0 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
479 %inc = add nsw i32 %i.0, 1
483 ret <4 x double> %c.addr.0
486 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
487 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
488 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
489 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
490 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
491 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
494 ; CHECK-LABEL: fmaddsubps_loop_256:
495 ; CHECK: vfmaddsub231ps %ymm1, %ymm0, %ymm2
496 ; CHECK: vmovaps %ymm2, %ymm0
498 define <8 x float> @fmaddsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
503 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
504 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
505 %cmp = icmp slt i32 %i.0, %iter
506 br i1 %cmp, label %for.body, label %for.end
512 %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
513 %inc = add nsw i32 %i.0, 1
517 ret <8 x float> %c.addr.0
520 ; CHECK-LABEL: fmsubaddps_loop_256:
521 ; CHECK: vfmsubadd231ps %ymm1, %ymm0, %ymm2
522 ; CHECK: vmovaps %ymm2, %ymm0
524 define <8 x float> @fmsubaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
529 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
530 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
531 %cmp = icmp slt i32 %i.0, %iter
532 br i1 %cmp, label %for.body, label %for.end
538 %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
539 %inc = add nsw i32 %i.0, 1
543 ret <8 x float> %c.addr.0
546 ; CHECK-LABEL: fmaddps_loop_256:
547 ; CHECK: vfmadd231ps %ymm1, %ymm0, %ymm2
548 ; CHECK: vmovaps %ymm2, %ymm0
550 define <8 x float> @fmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
555 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
556 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
557 %cmp = icmp slt i32 %i.0, %iter
558 br i1 %cmp, label %for.body, label %for.end
564 %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
565 %inc = add nsw i32 %i.0, 1
569 ret <8 x float> %c.addr.0
572 ; CHECK-LABEL: fmsubps_loop_256:
573 ; CHECK: vfmsub231ps %ymm1, %ymm0, %ymm2
574 ; CHECK: vmovaps %ymm2, %ymm0
576 define <8 x float> @fmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
581 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
582 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
583 %cmp = icmp slt i32 %i.0, %iter
584 br i1 %cmp, label %for.body, label %for.end
590 %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
591 %inc = add nsw i32 %i.0, 1
595 ret <8 x float> %c.addr.0
598 ; CHECK-LABEL: fnmaddps_loop_256:
599 ; CHECK: vfnmadd231ps %ymm1, %ymm0, %ymm2
600 ; CHECK: vmovaps %ymm2, %ymm0
602 define <8 x float> @fnmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
607 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
608 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
609 %cmp = icmp slt i32 %i.0, %iter
610 br i1 %cmp, label %for.body, label %for.end
616 %0 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
617 %inc = add nsw i32 %i.0, 1
621 ret <8 x float> %c.addr.0
624 ; CHECK-LABEL: fnmsubps_loop_256:
625 ; CHECK: vfnmsub231ps %ymm1, %ymm0, %ymm2
626 ; CHECK: vmovaps %ymm2, %ymm0
628 define <8 x float> @fnmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
633 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
634 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
635 %cmp = icmp slt i32 %i.0, %iter
636 br i1 %cmp, label %for.body, label %for.end
642 %0 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
643 %inc = add nsw i32 %i.0, 1
647 ret <8 x float> %c.addr.0
650 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
651 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
652 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
653 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
654 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
655 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)