[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / test / CodeGen / X86 / fma.ll
blob03de1533e1d64c3b2ea275ed0499224b2a5dd75f
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+avx,+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32
3 ; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+avx,-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL32
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA64
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL64
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=+avx512f,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL
8 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32
9 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefixes=FMACALL32,FMACALL32_BDVER2
11 define float @test_f32(float %a, float %b, float %c) #0 {
12 ; FMA32-LABEL: test_f32:
13 ; FMA32:       ## %bb.0:
14 ; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
15 ; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
16 ; FMA32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
17 ; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
18 ; FMA32-NEXT:    ## xmm1 = mem[0],zero,zero,zero
19 ; FMA32-NEXT:    vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10]
20 ; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
21 ; FMA32-NEXT:    vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24]
22 ; FMA32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
23 ; FMA32-NEXT:    popl %eax ## encoding: [0x58]
24 ; FMA32-NEXT:    retl ## encoding: [0xc3]
26 ; FMACALL32-LABEL: test_f32:
27 ; FMACALL32:       ## %bb.0:
28 ; FMACALL32-NEXT:    jmp _fmaf ## TAILCALL
29 ; FMACALL32-NEXT:    ## encoding: [0xeb,A]
30 ; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
32 ; FMA64-LABEL: test_f32:
33 ; FMA64:       ## %bb.0:
34 ; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
35 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
36 ; FMA64-NEXT:    retq ## encoding: [0xc3]
38 ; FMACALL64-LABEL: test_f32:
39 ; FMACALL64:       ## %bb.0:
40 ; FMACALL64-NEXT:    jmp _fmaf ## TAILCALL
41 ; FMACALL64-NEXT:    ## encoding: [0xeb,A]
42 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
44 ; AVX512-LABEL: test_f32:
45 ; AVX512:       ## %bb.0:
46 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
47 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
48 ; AVX512-NEXT:    retq ## encoding: [0xc3]
50 ; AVX512VL-LABEL: test_f32:
51 ; AVX512VL:       ## %bb.0:
52 ; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
53 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
54 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
55   %call = call float @llvm.fma.f32(float %a, float %b, float %c)
56   ret float %call
59 define float @test_f32_reassoc(float %a, float %b, float %c) #0 {
60 ; FMA32-LABEL: test_f32_reassoc:
61 ; FMA32:       ## %bb.0:
62 ; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
63 ; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
64 ; FMA32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
65 ; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
66 ; FMA32-NEXT:    ## xmm1 = mem[0],zero,zero,zero
67 ; FMA32-NEXT:    vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10]
68 ; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
69 ; FMA32-NEXT:    vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24]
70 ; FMA32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
71 ; FMA32-NEXT:    popl %eax ## encoding: [0x58]
72 ; FMA32-NEXT:    retl ## encoding: [0xc3]
74 ; FMACALL32-LABEL: test_f32_reassoc:
75 ; FMACALL32:       ## %bb.0:
76 ; FMACALL32-NEXT:    pushl %eax ## encoding: [0x50]
77 ; FMACALL32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
78 ; FMACALL32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
79 ; FMACALL32-NEXT:    vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c]
80 ; FMACALL32-NEXT:    vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10]
81 ; FMACALL32-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
82 ; FMACALL32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
83 ; FMACALL32-NEXT:    popl %eax ## encoding: [0x58]
84 ; FMACALL32-NEXT:    retl ## encoding: [0xc3]
86 ; FMA64-LABEL: test_f32_reassoc:
87 ; FMA64:       ## %bb.0:
88 ; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
89 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
90 ; FMA64-NEXT:    retq ## encoding: [0xc3]
92 ; FMACALL64-LABEL: test_f32_reassoc:
93 ; FMACALL64:       ## %bb.0:
94 ; FMACALL64-NEXT:    mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
95 ; FMACALL64-NEXT:    addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2]
96 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
98 ; AVX512-LABEL: test_f32_reassoc:
99 ; AVX512:       ## %bb.0:
100 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
101 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
102 ; AVX512-NEXT:    retq ## encoding: [0xc3]
104 ; AVX512VL-LABEL: test_f32_reassoc:
105 ; AVX512VL:       ## %bb.0:
106 ; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
107 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
108 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
109   %call = call reassoc float @llvm.fma.f32(float %a, float %b, float %c)
110   ret float %call
113 define double @test_f64(double %a, double %b, double %c) #0 {
114 ; FMA32-LABEL: test_f64:
115 ; FMA32:       ## %bb.0: ## %entry
116 ; FMA32-NEXT:    subl $12, %esp ## encoding: [0x83,0xec,0x0c]
117 ; FMA32-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x10]
118 ; FMA32-NEXT:    ## xmm0 = mem[0],zero
119 ; FMA32-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x18]
120 ; FMA32-NEXT:    ## xmm1 = mem[0],zero
121 ; FMA32-NEXT:    vfmadd213sd {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0xf9,0xa9,0x4c,0x24,0x20]
122 ; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
123 ; FMA32-NEXT:    vmovsd %xmm1, (%esp) ## encoding: [0xc5,0xfb,0x11,0x0c,0x24]
124 ; FMA32-NEXT:    fldl (%esp) ## encoding: [0xdd,0x04,0x24]
125 ; FMA32-NEXT:    addl $12, %esp ## encoding: [0x83,0xc4,0x0c]
126 ; FMA32-NEXT:    retl ## encoding: [0xc3]
128 ; FMACALL32-LABEL: test_f64:
129 ; FMACALL32:       ## %bb.0: ## %entry
130 ; FMACALL32-NEXT:    jmp _fma ## TAILCALL
131 ; FMACALL32-NEXT:    ## encoding: [0xeb,A]
132 ; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1
134 ; FMA64-LABEL: test_f64:
135 ; FMA64:       ## %bb.0: ## %entry
136 ; FMA64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
137 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
138 ; FMA64-NEXT:    retq ## encoding: [0xc3]
140 ; FMACALL64-LABEL: test_f64:
141 ; FMACALL64:       ## %bb.0: ## %entry
142 ; FMACALL64-NEXT:    jmp _fma ## TAILCALL
143 ; FMACALL64-NEXT:    ## encoding: [0xeb,A]
144 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1
146 ; AVX512-LABEL: test_f64:
147 ; AVX512:       ## %bb.0: ## %entry
148 ; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
149 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
150 ; AVX512-NEXT:    retq ## encoding: [0xc3]
152 ; AVX512VL-LABEL: test_f64:
153 ; AVX512VL:       ## %bb.0: ## %entry
154 ; AVX512VL-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
155 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
156 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
157 entry:
158   %call = call double @llvm.fma.f64(double %a, double %b, double %c)
159   ret double %call
162 define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) #0 {
163 ; FMA32-LABEL: test_f80:
164 ; FMA32:       ## %bb.0: ## %entry
165 ; FMA32-NEXT:    subl $60, %esp ## encoding: [0x83,0xec,0x3c]
166 ; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40]
167 ; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50]
168 ; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60]
169 ; FMA32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20]
170 ; FMA32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10]
171 ; FMA32-NEXT:    fstpt (%esp) ## encoding: [0xdb,0x3c,0x24]
172 ; FMA32-NEXT:    calll _fmal ## encoding: [0xe8,A,A,A,A]
173 ; FMA32-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4
174 ; FMA32-NEXT:    addl $60, %esp ## encoding: [0x83,0xc4,0x3c]
175 ; FMA32-NEXT:    retl ## encoding: [0xc3]
177 ; FMACALL32-LABEL: test_f80:
178 ; FMACALL32:       ## %bb.0: ## %entry
179 ; FMACALL32-NEXT:    subl $60, %esp ## encoding: [0x83,0xec,0x3c]
180 ; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40]
181 ; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50]
182 ; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60]
183 ; FMACALL32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20]
184 ; FMACALL32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10]
185 ; FMACALL32-NEXT:    fstpt (%esp) ## encoding: [0xdb,0x3c,0x24]
186 ; FMACALL32-NEXT:    calll _fmal ## encoding: [0xe8,A,A,A,A]
187 ; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4
188 ; FMACALL32-NEXT:    addl $60, %esp ## encoding: [0x83,0xc4,0x3c]
189 ; FMACALL32-NEXT:    retl ## encoding: [0xc3]
191 ; FMA64-LABEL: test_f80:
192 ; FMA64:       ## %bb.0: ## %entry
193 ; FMA64-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
194 ; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
195 ; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
196 ; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
197 ; FMA64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
198 ; FMA64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
199 ; FMA64-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
200 ; FMA64-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
201 ; FMA64-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
202 ; FMA64-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
203 ; FMA64-NEXT:    retq ## encoding: [0xc3]
205 ; FMACALL64-LABEL: test_f80:
206 ; FMACALL64:       ## %bb.0: ## %entry
207 ; FMACALL64-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
208 ; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
209 ; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
210 ; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
211 ; FMACALL64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
212 ; FMACALL64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
213 ; FMACALL64-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
214 ; FMACALL64-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
215 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
216 ; FMACALL64-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
217 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
219 ; AVX512-LABEL: test_f80:
220 ; AVX512:       ## %bb.0: ## %entry
221 ; AVX512-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
222 ; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
223 ; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
224 ; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
225 ; AVX512-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
226 ; AVX512-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
227 ; AVX512-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
228 ; AVX512-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
229 ; AVX512-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
230 ; AVX512-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
231 ; AVX512-NEXT:    retq ## encoding: [0xc3]
233 ; AVX512VL-LABEL: test_f80:
234 ; AVX512VL:       ## %bb.0: ## %entry
235 ; AVX512VL-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
236 ; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
237 ; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
238 ; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
239 ; AVX512VL-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
240 ; AVX512VL-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
241 ; AVX512VL-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
242 ; AVX512VL-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
243 ; AVX512VL-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
244 ; AVX512VL-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
245 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
246 entry:
247   %call = call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
248   ret x86_fp80 %call
251 define float @test_f32_cst() #0 {
252 ; FMA32-LABEL: test_f32_cst:
253 ; FMA32:       ## %bb.0: ## %entry
254 ; FMA32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
255 ; FMA32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
256 ; FMA32-NEXT:    retl ## encoding: [0xc3]
258 ; FMACALL32-LABEL: test_f32_cst:
259 ; FMACALL32:       ## %bb.0: ## %entry
260 ; FMACALL32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
261 ; FMACALL32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
262 ; FMACALL32-NEXT:    retl ## encoding: [0xc3]
264 ; FMA64-LABEL: test_f32_cst:
265 ; FMA64:       ## %bb.0: ## %entry
266 ; FMA64-NEXT:    vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
267 ; FMA64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
268 ; FMA64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
269 ; FMA64-NEXT:    retq ## encoding: [0xc3]
271 ; FMACALL64-LABEL: test_f32_cst:
272 ; FMACALL64:       ## %bb.0: ## %entry
273 ; FMACALL64-NEXT:    movss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
274 ; FMACALL64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
275 ; FMACALL64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
276 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
278 ; AVX512-LABEL: test_f32_cst:
279 ; AVX512:       ## %bb.0: ## %entry
280 ; AVX512-NEXT:    vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
281 ; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
282 ; AVX512-NEXT:    ## xmm0 = mem[0],zero,zero,zero
283 ; AVX512-NEXT:    retq ## encoding: [0xc3]
285 ; AVX512VL-LABEL: test_f32_cst:
286 ; AVX512VL:       ## %bb.0: ## %entry
287 ; AVX512VL-NEXT:    vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
288 ; AVX512VL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
289 ; AVX512VL-NEXT:    ## xmm0 = mem[0],zero,zero,zero
290 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
291 entry:
292   %call = call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0)
293   ret float %call
296 define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
297 ; FMA32-LABEL: test_v4f32:
298 ; FMA32:       ## %bb.0: ## %entry
299 ; FMA32-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
300 ; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
301 ; FMA32-NEXT:    retl ## encoding: [0xc3]
303 ; FMA64-LABEL: test_v4f32:
304 ; FMA64:       ## %bb.0: ## %entry
305 ; FMA64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
306 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
307 ; FMA64-NEXT:    retq ## encoding: [0xc3]
309 ; FMACALL64-LABEL: test_v4f32:
310 ; FMACALL64:       ## %bb.0: ## %entry
311 ; FMACALL64-NEXT:    subq $88, %rsp ## encoding: [0x48,0x83,0xec,0x58]
312 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
313 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x30]
314 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
315 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
316 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
317 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
318 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
319 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
320 ; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
321 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
322 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
323 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
324 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
325 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
326 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
327 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
328 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
329 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
330 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
331 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
332 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
333 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
334 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
335 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
336 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
337 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
338 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
339 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
340 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
341 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
342 ; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
343 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
344 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
345 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
346 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
347 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
348 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
349 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
350 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
351 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
352 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
353 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
354 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
355 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
356 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x40]
357 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
358 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
359 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
360 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
361 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
362 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
363 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
364 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
365 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
366 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
367 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
368 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
369 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
370 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
371 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
372 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
373 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
374 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
375 ; FMACALL64-NEXT:    unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
376 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x0c,0x24]
377 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
378 ; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
379 ; FMACALL64-NEXT:    addq $88, %rsp ## encoding: [0x48,0x83,0xc4,0x58]
380 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
382 ; AVX512-LABEL: test_v4f32:
383 ; AVX512:       ## %bb.0: ## %entry
384 ; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
385 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
386 ; AVX512-NEXT:    retq ## encoding: [0xc3]
388 ; AVX512VL-LABEL: test_v4f32:
389 ; AVX512VL:       ## %bb.0: ## %entry
390 ; AVX512VL-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
391 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
392 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
394 ; FMACALL32_BDVER2-LABEL: test_v4f32:
395 ; FMACALL32_BDVER2:       ## %bb.0: ## %entry
396 ; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
397 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
398 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40]
399 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
400 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30]
401 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
402 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20]
403 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x08,0x02]
404 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x4c,0x24,0x04,0x02]
405 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
406 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
407 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
408 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
409 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x60]
410 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
411 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
412 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
413 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30]
414 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
415 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20]
416 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
417 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
418 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
419 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
420 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
421 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
422 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x54]
423 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
424 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
425 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
426 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
427 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
428 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
429 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
430 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20]
431 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
432 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
433 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
434 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
435 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
436 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
437 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30]
438 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
439 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20]
440 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
441 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
442 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
443 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c]
444 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
445 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x54]
446 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18]
447 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
448 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x60]
449 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14]
450 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
451 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
452 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10]
453 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x1c]
454 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero,zero,zero
455 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x18,0x10]
456 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
457 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x14,0x20]
458 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
459 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x10,0x30]
460 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
461 ; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
462 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
463 entry:
464   %call = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
465   ret <4 x float> %call
468 define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #0 {
469 ; FMA32-LABEL: test_v8f32:
470 ; FMA32:       ## %bb.0: ## %entry
471 ; FMA32-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
472 ; FMA32-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
473 ; FMA32-NEXT:    retl ## encoding: [0xc3]
475 ; FMA64-LABEL: test_v8f32:
476 ; FMA64:       ## %bb.0: ## %entry
477 ; FMA64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
478 ; FMA64-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
479 ; FMA64-NEXT:    retq ## encoding: [0xc3]
481 ; FMACALL64-LABEL: test_v8f32:
482 ; FMACALL64:       ## %bb.0: ## %entry
483 ; FMACALL64-NEXT:    subq $136, %rsp ## encoding: [0x48,0x81,0xec,0x88,0x00,0x00,0x00]
484 ; FMACALL64-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
485 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x6c,0x24,0x50]
486 ; FMACALL64-NEXT:    movaps %xmm4, (%rsp) ## 16-byte Spill
487 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x24,0x24]
488 ; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
489 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x40]
490 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
491 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x60]
492 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
493 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
494 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
495 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
496 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
497 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
498 ; FMACALL64-NEXT:    movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca]
499 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm1 ## encoding: [0x0f,0xc6,0xca,0xff]
500 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3],xmm2[3,3]
501 ; FMACALL64-NEXT:    movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4]
502 ; FMACALL64-NEXT:    shufps $255, %xmm4, %xmm2 ## encoding: [0x0f,0xc6,0xd4,0xff]
503 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3],xmm4[3,3]
504 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
505 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
506 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
507 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
508 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
509 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
510 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
511 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
512 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
513 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
514 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
515 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
516 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
517 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
518 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
519 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
520 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
521 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
522 ; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
523 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x20]
524 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
525 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
526 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x70]
527 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
528 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
529 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
530 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
531 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
532 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
533 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
534 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
535 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
536 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
537 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
538 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
539 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
540 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
541 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
542 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
543 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
544 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
545 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
546 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
547 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
548 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
549 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
550 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
551 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
552 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
553 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
554 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
555 ; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
556 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x70]
557 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
558 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
559 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x20]
560 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
561 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
562 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
563 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
564 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
565 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
566 ; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
567 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
568 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
569 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
570 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
571 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
572 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
573 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
574 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
575 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
576 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
577 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
578 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
579 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
580 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
581 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
582 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
583 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
584 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
585 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
586 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
587 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
588 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
589 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
590 ; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
591 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
592 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
593 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
594 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
595 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
596 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
597 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
598 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
599 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
600 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
601 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
602 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
603 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
604 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
605 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
606 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
607 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
608 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
609 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
610 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
611 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
612 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
613 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
614 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
615 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
616 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
617 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
618 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
619 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
620 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
621 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
622 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
623 ; FMACALL64-NEXT:    unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
624 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x0c,0x24]
625 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
626 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
627 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
628 ; FMACALL64-NEXT:    addq $136, %rsp ## encoding: [0x48,0x81,0xc4,0x88,0x00,0x00,0x00]
629 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
631 ; AVX512-LABEL: test_v8f32:
632 ; AVX512:       ## %bb.0: ## %entry
633 ; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
634 ; AVX512-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
635 ; AVX512-NEXT:    retq ## encoding: [0xc3]
637 ; AVX512VL-LABEL: test_v8f32:
638 ; AVX512VL:       ## %bb.0: ## %entry
639 ; AVX512VL-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
640 ; AVX512VL-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
641 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
643 ; FMACALL32_BDVER2-LABEL: test_v8f32:
644 ; FMACALL32_BDVER2:       ## %bb.0: ## %entry
645 ; FMACALL32_BDVER2-NEXT:    subl $284, %esp ## encoding: [0x81,0xec,0x1c,0x01,0x00,0x00]
646 ; FMACALL32_BDVER2-NEXT:    ## imm = 0x11C
647 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01]
648 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
649 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xf0,0x00,0x00,0x00]
650 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01]
651 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
652 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xd0,0x00,0x00,0x00]
653 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
654 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
655 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0xb0,0x00,0x00,0x00]
656 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
657 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x50]
658 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x5c,0x24,0x08,0x02]
659 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
660 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40]
661 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
662 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
663 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30]
664 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
665 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
666 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
667 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
668 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
669 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xa4,0x00,0x00,0x00]
670 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
671 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
672 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
673 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40]
674 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
675 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30]
676 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
677 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
678 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
679 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
680 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
681 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
682 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x98,0x00,0x00,0x00]
683 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
684 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
685 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
686 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
687 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
688 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
689 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
690 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
691 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
692 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
693 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
694 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
695 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x8c,0x00,0x00,0x00]
696 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
697 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
698 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
699 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
700 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
701 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
702 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
703 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
704 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
705 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
706 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
707 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
708 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
709 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00]
710 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
711 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
712 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
713 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
714 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
715 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
716 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
717 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
718 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
719 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
720 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
721 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
722 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
723 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x74]
724 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
725 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
726 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
727 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
728 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
729 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
730 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
731 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
732 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
733 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
734 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
735 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
736 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
737 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x68]
738 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
739 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
740 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
741 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
742 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
743 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
744 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
745 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
746 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
747 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
748 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
749 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
750 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
751 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
752 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
753 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40]
754 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
755 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30]
756 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
757 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
758 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
759 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c]
760 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
761 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x68]
762 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28]
763 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
764 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x74]
765 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24]
766 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
767 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00]
768 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20]
769 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
770 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x8c,0x00,0x00,0x00]
771 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c]
772 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
773 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x98,0x00,0x00,0x00]
774 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18]
775 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
776 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xa4,0x00,0x00,0x00]
777 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14]
778 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
779 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
780 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10]
781 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x2c]
782 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero,zero,zero
783 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x28,0x10]
784 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
785 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x1c]
786 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero,zero,zero
787 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x18,0x10]
788 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
789 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x24,0x20]
790 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
791 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x14,0x20]
792 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
793 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x20,0x30]
794 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
795 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x10,0x30]
796 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
797 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
798 ; FMACALL32_BDVER2-NEXT:    addl $284, %esp ## encoding: [0x81,0xc4,0x1c,0x01,0x00,0x00]
799 ; FMACALL32_BDVER2-NEXT:    ## imm = 0x11C
800 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
801 entry:
802   %call = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
803   ret <8 x float> %call
806 define <16 x float> @test_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) #0 {
807 ; FMA32-LABEL: test_v16f32:
808 ; FMA32:       ## %bb.0: ## %entry
809 ; FMA32-NEXT:    pushl %ebp ## encoding: [0x55]
810 ; FMA32-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
811 ; FMA32-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
812 ; FMA32-NEXT:    subl $32, %esp ## encoding: [0x83,0xec,0x20]
813 ; FMA32-NEXT:    vfmadd213ps 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0x45,0x08]
814 ; FMA32-NEXT:    ## ymm0 = (ymm2 * ymm0) + mem
815 ; FMA32-NEXT:    vfmadd213ps 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0x4d,0x28]
816 ; FMA32-NEXT:    ## ymm1 = (ymm3 * ymm1) + mem
817 ; FMA32-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
818 ; FMA32-NEXT:    popl %ebp ## encoding: [0x5d]
819 ; FMA32-NEXT:    retl ## encoding: [0xc3]
821 ; FMA64-LABEL: test_v16f32:
822 ; FMA64:       ## %bb.0: ## %entry
823 ; FMA64-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0xc4]
824 ; FMA64-NEXT:    ## ymm0 = (ymm2 * ymm0) + ymm4
825 ; FMA64-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0xcd]
826 ; FMA64-NEXT:    ## ymm1 = (ymm3 * ymm1) + ymm5
827 ; FMA64-NEXT:    retq ## encoding: [0xc3]
829 ; FMACALL64-LABEL: test_v16f32:
830 ; FMACALL64:       ## %bb.0: ## %entry
831 ; FMACALL64-NEXT:    subq $168, %rsp ## encoding: [0x48,0x81,0xec,0xa8,0x00,0x00,0x00]
832 ; FMACALL64-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
833 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0xbc,0x24,0x80,0x00,0x00,0x00]
834 ; FMACALL64-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
835 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x74,0x24,0x20]
836 ; FMACALL64-NEXT:    movaps %xmm5, (%rsp) ## 16-byte Spill
837 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x2c,0x24]
838 ; FMACALL64-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
839 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x64,0x24,0x10]
840 ; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
841 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x70]
842 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
843 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x40]
844 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
845 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
846 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
847 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
848 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
849 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
850 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
851 ; FMACALL64-NEXT:    movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc]
852 ; FMACALL64-NEXT:    shufps $255, %xmm4, %xmm1 ## encoding: [0x0f,0xc6,0xcc,0xff]
853 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3],xmm4[3,3]
854 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
855 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
856 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
857 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
858 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
859 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x60]
860 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
861 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
862 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
863 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
864 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
865 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
866 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
867 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
868 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
869 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
870 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
871 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
872 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
873 ; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
874 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x60]
875 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
876 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
877 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x84,0x24,0x90,0x00,0x00,0x00]
878 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
879 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
880 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
881 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
882 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
883 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
884 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
885 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
886 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x60]
887 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
888 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
889 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
890 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
891 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
892 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
893 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
894 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
895 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
896 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
897 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
898 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
899 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
900 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
901 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
902 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
903 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
904 ; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
905 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x8c,0x24,0x90,0x00,0x00,0x00]
906 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
907 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
908 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x60]
909 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
910 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
911 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
912 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
913 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
914 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
915 ; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
916 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
917 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
918 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
919 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
920 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
921 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
922 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
923 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
924 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
925 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
926 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
927 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
928 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
929 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
930 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
931 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
932 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
933 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
934 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
935 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
936 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
937 ; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
938 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x10]
939 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
940 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
941 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
942 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
943 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
944 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
945 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
946 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
947 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
948 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
949 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
950 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
951 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
952 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
953 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
954 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
955 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
956 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
957 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
958 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
959 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
960 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
961 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
962 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
963 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
964 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
965 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
966 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
967 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
968 ; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
969 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x50]
970 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
971 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
972 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
973 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
974 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
975 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
976 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
977 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
978 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
979 ; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
980 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
981 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
982 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
983 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
984 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
985 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
986 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
987 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
988 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
989 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
990 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
991 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
992 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
993 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
994 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
995 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
996 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
997 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
998 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
999 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1000 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1001 ; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
1002 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
1003 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1004 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1005 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1006 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1007 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1008 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1009 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1010 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1011 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1012 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1013 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1014 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1015 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1016 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1017 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
1018 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
1019 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1020 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1021 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
1022 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
1023 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1024 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
1025 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
1026 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1027 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1028 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1029 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1030 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
1031 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1032 ; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
1033 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x30]
1034 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
1035 ; FMACALL64-NEXT:    movaps %xmm1, (%rsp) ## 16-byte Spill
1036 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x0c,0x24]
1037 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1038 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1039 ; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
1040 ; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
1041 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1042 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1043 ; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
1044 ; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
1045 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1046 ; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
1047 ; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
1048 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1049 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1050 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1051 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1052 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1053 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1054 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1055 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1056 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1057 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1058 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1059 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1060 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1061 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1062 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1063 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1064 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1065 ; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
1066 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x20]
1067 ; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1068 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1069 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1070 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1071 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1072 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1073 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1074 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1075 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1076 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1077 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1078 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x40]
1079 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1080 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1081 ; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
1082 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
1083 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1084 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1085 ; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
1086 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
1087 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1088 ; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
1089 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
1090 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1091 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1092 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
1093 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x5c,0x24,0x40]
1094 ; FMACALL64-NEXT:    unpcklps %xmm0, %xmm3 ## encoding: [0x0f,0x14,0xd8]
1095 ; FMACALL64-NEXT:    ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
1096 ; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload
1097 ; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x5c,0x24,0x20]
1098 ; FMACALL64-NEXT:    ## xmm3 = xmm3[0],mem[0]
1099 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1100 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1101 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1102 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1103 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1104 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1105 ; FMACALL64-NEXT:    addq $168, %rsp ## encoding: [0x48,0x81,0xc4,0xa8,0x00,0x00,0x00]
1106 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1108 ; AVX512-LABEL: test_v16f32:
1109 ; AVX512:       ## %bb.0: ## %entry
1110 ; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
1111 ; AVX512-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1112 ; AVX512-NEXT:    retq ## encoding: [0xc3]
1114 ; AVX512VL-LABEL: test_v16f32:
1115 ; AVX512VL:       ## %bb.0: ## %entry
1116 ; AVX512VL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
1117 ; AVX512VL-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1118 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1120 ; FMACALL32_BDVER2-LABEL: test_v16f32:
1121 ; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1122 ; FMACALL32_BDVER2-NEXT:    pushl %ebp ## encoding: [0x55]
1123 ; FMACALL32_BDVER2-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1124 ; FMACALL32_BDVER2-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1125 ; FMACALL32_BDVER2-NEXT:    subl $448, %esp ## encoding: [0x81,0xec,0xc0,0x01,0x00,0x00]
1126 ; FMACALL32_BDVER2-NEXT:    ## imm = 0x1C0
1127 ; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm4 ## encoding: [0xc5,0xf8,0x28,0x65,0x38]
1128 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1129 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x60,0x01,0x00,0x00]
1130 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01]
1131 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1132 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x4c,0x24,0x60]
1133 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc9,0x01]
1134 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1135 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0x80,0x00,0x00,0x00]
1136 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1137 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0x80,0x01,0x00,0x00]
1138 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1139 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0xc0,0x00,0x00,0x00]
1140 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
1141 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1142 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0xb0,0x00,0x00,0x00]
1143 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
1144 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm4, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x64,0x24,0x08,0x02]
1145 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1146 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1147 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1148 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1149 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x54,0x01,0x00,0x00]
1150 ; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1151 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1152 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1153 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1154 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00]
1155 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1156 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
1157 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
1158 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1159 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1160 ; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1161 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1162 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x48,0x01,0x00,0x00]
1163 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1164 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1165 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00]
1166 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1167 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1168 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00]
1169 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1170 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1171 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1172 ; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1173 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1174 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x3c,0x01,0x00,0x00]
1175 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1176 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1177 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1178 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1179 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1180 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1181 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1182 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1183 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1184 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1185 ; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1186 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1187 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x30,0x01,0x00,0x00]
1188 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1189 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1190 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1191 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
1192 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1193 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1194 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
1195 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1196 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1197 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1198 ; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1199 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1200 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x24,0x01,0x00,0x00]
1201 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1202 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1203 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1204 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
1205 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1206 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1207 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
1208 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1209 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1210 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1211 ; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1212 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1213 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x18,0x01,0x00,0x00]
1214 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1215 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1216 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1217 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1218 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1219 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1220 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1221 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1222 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1223 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1224 ; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1225 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1226 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x0c,0x01,0x00,0x00]
1227 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1228 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1229 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1230 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1231 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1232 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x80,0x00,0x00,0x00]
1233 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1234 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1235 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1236 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1237 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1238 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60]
1239 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1240 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1241 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1242 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1243 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1244 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x00,0x01,0x00,0x00]
1245 ; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1246 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1247 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00]
1248 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1249 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60]
1250 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1251 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
1252 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
1253 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1254 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1255 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1256 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xf4,0x00,0x00,0x00]
1257 ; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1258 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1259 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00]
1260 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1261 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60]
1262 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1263 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
1264 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
1265 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1266 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1267 ; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1268 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1269 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xe8,0x00,0x00,0x00]
1270 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1271 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1272 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1273 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1274 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1275 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60]
1276 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1277 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1278 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1279 ; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1280 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1281 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00]
1282 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1283 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1284 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1285 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1286 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1287 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1288 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1289 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1290 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1291 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1292 ; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1293 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1294 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x60]
1295 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1296 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1297 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1298 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
1299 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1300 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1301 ; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
1302 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1303 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1304 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1305 ; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1306 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1307 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xdc,0x00,0x00,0x00]
1308 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1309 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1310 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1311 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
1312 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1313 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1314 ; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
1315 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1316 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1317 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1318 ; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1319 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1320 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xd0,0x00,0x00,0x00]
1321 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1322 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1323 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1324 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1325 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1326 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1327 ; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1328 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1329 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1330 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1331 ; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1332 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1333 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1334 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1335 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00]
1336 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1337 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
1338 ; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
1339 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c]
1340 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1341 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xd0,0x00,0x00,0x00]
1342 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38]
1343 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1344 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xdc,0x00,0x00,0x00]
1345 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34]
1346 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1347 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x60]
1348 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30]
1349 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1350 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00]
1351 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c]
1352 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1353 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xe8,0x00,0x00,0x00]
1354 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28]
1355 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1356 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xf4,0x00,0x00,0x00]
1357 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24]
1358 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1359 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x00,0x01,0x00,0x00]
1360 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20]
1361 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1362 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x0c,0x01,0x00,0x00]
1363 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x5c]
1364 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1365 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x18,0x01,0x00,0x00]
1366 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x58]
1367 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1368 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x24,0x01,0x00,0x00]
1369 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x54]
1370 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1371 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x30,0x01,0x00,0x00]
1372 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x50]
1373 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1374 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x3c,0x01,0x00,0x00]
1375 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x4c]
1376 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1377 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x48,0x01,0x00,0x00]
1378 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x48]
1379 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1380 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x54,0x01,0x00,0x00]
1381 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x44]
1382 ; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1383 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1384 ; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x40]
1385 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x3c]
1386 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero,zero,zero
1387 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x38,0x10]
1388 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
1389 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x2c]
1390 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero,zero,zero
1391 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x28,0x10]
1392 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
1393 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfa,0x10,0x54,0x24,0x4c]
1394 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = mem[0],zero,zero,zero
1395 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x48,0x10]
1396 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0],mem[0],xmm2[2,3]
1397 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x34,0x20]
1398 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
1399 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x24,0x20]
1400 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
1401 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x44,0x20]
1402 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1],mem[0],xmm2[3]
1403 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x30,0x30]
1404 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
1405 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x20,0x30]
1406 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
1407 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x40,0x30]
1408 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1,2],mem[0]
1409 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
1410 ; FMACALL32_BDVER2-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x5c]
1411 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero,zero,zero
1412 ; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x58,0x10]
1413 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
1414 ; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x54,0x20]
1415 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
1416 ; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x50,0x30]
1417 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
1418 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01]
1419 ; FMACALL32_BDVER2-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
1420 ; FMACALL32_BDVER2-NEXT:    popl %ebp ## encoding: [0x5d]
1421 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1422 entry:
1423   %call = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
1424   ret <16 x float> %call
1427 define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
1428 ; FMA32-LABEL: test_v2f64:
1429 ; FMA32:       ## %bb.0:
1430 ; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1431 ; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1432 ; FMA32-NEXT:    retl ## encoding: [0xc3]
1434 ; FMA64-LABEL: test_v2f64:
1435 ; FMA64:       ## %bb.0:
1436 ; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1437 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1438 ; FMA64-NEXT:    retq ## encoding: [0xc3]
1440 ; FMACALL64-LABEL: test_v2f64:
1441 ; FMACALL64:       ## %bb.0:
1442 ; FMACALL64-NEXT:    subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48]
1443 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1444 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x20]
1445 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1446 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1447 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1448 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1449 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1450 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1451 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1452 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1453 ; FMACALL64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
1454 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x04,0x24]
1455 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1456 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1457 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1458 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1459 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1460 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1461 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1462 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x20]
1463 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1464 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1465 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1466 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1467 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1468 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1469 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1470 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1471 ; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
1472 ; FMACALL64-NEXT:    addq $72, %rsp ## encoding: [0x48,0x83,0xc4,0x48]
1473 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1475 ; AVX512-LABEL: test_v2f64:
1476 ; AVX512:       ## %bb.0:
1477 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1478 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1479 ; AVX512-NEXT:    retq ## encoding: [0xc3]
1481 ; AVX512VL-LABEL: test_v2f64:
1482 ; AVX512VL:       ## %bb.0:
1483 ; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1484 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1485 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1487 ; FMACALL32_BDVER2-LABEL: test_v2f64:
1488 ; FMACALL32_BDVER2:       ## %bb.0:
1489 ; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
1490 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1491 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50]
1492 ; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1]
1493 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
1494 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1495 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30]
1496 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1497 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40]
1498 ; FMACALL32_BDVER2-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10]
1499 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1500 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1501 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1502 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1503 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
1504 ; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1505 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1506 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
1507 ; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1508 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58]
1509 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
1510 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1511 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
1512 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1513 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1514 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
1515 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28]
1516 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1517 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x20]
1518 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
1519 ; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
1520 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1521   %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1522   ret <2 x double> %call
1525 define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
1526 ; FMA32-LABEL: test_v2f64_reassoc:
1527 ; FMA32:       ## %bb.0:
1528 ; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1529 ; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1530 ; FMA32-NEXT:    retl ## encoding: [0xc3]
1532 ; FMACALL32-LABEL: test_v2f64_reassoc:
1533 ; FMACALL32:       ## %bb.0:
1534 ; FMACALL32-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x59,0xc1]
1535 ; FMACALL32-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc2]
1536 ; FMACALL32-NEXT:    retl ## encoding: [0xc3]
1538 ; FMA64-LABEL: test_v2f64_reassoc:
1539 ; FMA64:       ## %bb.0:
1540 ; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1541 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1542 ; FMA64-NEXT:    retq ## encoding: [0xc3]
1544 ; FMACALL64-LABEL: test_v2f64_reassoc:
1545 ; FMACALL64:       ## %bb.0:
1546 ; FMACALL64-NEXT:    mulpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x59,0xc1]
1547 ; FMACALL64-NEXT:    addpd %xmm2, %xmm0 ## encoding: [0x66,0x0f,0x58,0xc2]
1548 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1550 ; AVX512-LABEL: test_v2f64_reassoc:
1551 ; AVX512:       ## %bb.0:
1552 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1553 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1554 ; AVX512-NEXT:    retq ## encoding: [0xc3]
1556 ; AVX512VL-LABEL: test_v2f64_reassoc:
1557 ; AVX512VL:       ## %bb.0:
1558 ; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1559 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1560 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1561   %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1562   ret <2 x double> %call
1565 define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
1566 ; FMA32-LABEL: test_v4f64:
1567 ; FMA32:       ## %bb.0: ## %entry
1568 ; FMA32-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1569 ; FMA32-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1570 ; FMA32-NEXT:    retl ## encoding: [0xc3]
1572 ; FMA64-LABEL: test_v4f64:
1573 ; FMA64:       ## %bb.0: ## %entry
1574 ; FMA64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1575 ; FMA64-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1576 ; FMA64-NEXT:    retq ## encoding: [0xc3]
1578 ; FMACALL64-LABEL: test_v4f64:
1579 ; FMACALL64:       ## %bb.0: ## %entry
1580 ; FMACALL64-NEXT:    subq $120, %rsp ## encoding: [0x48,0x83,0xec,0x78]
1581 ; FMACALL64-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1582 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x6c,0x24,0x40]
1583 ; FMACALL64-NEXT:    movaps %xmm4, (%rsp) ## 16-byte Spill
1584 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x24,0x24]
1585 ; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1586 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x30]
1587 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1588 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x60]
1589 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1590 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x20]
1591 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1592 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
1593 ; FMACALL64-NEXT:    movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca]
1594 ; FMACALL64-NEXT:    movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4]
1595 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1596 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1597 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1598 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
1599 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1600 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1601 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1602 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1603 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1604 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
1605 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1606 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1607 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1608 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1609 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1610 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1611 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1612 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1613 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1614 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1615 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1616 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1617 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1618 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1619 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1620 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
1621 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1622 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1623 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1624 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x40]
1625 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1626 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1627 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1628 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1629 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1630 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
1631 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1632 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1633 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1634 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1635 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1636 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1637 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1638 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x40]
1639 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1640 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1641 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1642 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1643 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1644 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1645 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1646 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1647 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1648 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
1649 ; FMACALL64-NEXT:    addq $120, %rsp ## encoding: [0x48,0x83,0xc4,0x78]
1650 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1652 ; AVX512-LABEL: test_v4f64:
1653 ; AVX512:       ## %bb.0: ## %entry
1654 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1655 ; AVX512-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1656 ; AVX512-NEXT:    retq ## encoding: [0xc3]
1658 ; AVX512VL-LABEL: test_v4f64:
1659 ; AVX512VL:       ## %bb.0: ## %entry
1660 ; AVX512VL-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1661 ; AVX512VL-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1662 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1664 ; FMACALL32_BDVER2-LABEL: test_v4f64:
1665 ; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1666 ; FMACALL32_BDVER2-NEXT:    subl $236, %esp ## encoding: [0x81,0xec,0xec,0x00,0x00,0x00]
1667 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01]
1668 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1669 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xc0,0x00,0x00,0x00]
1670 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1671 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0x80,0x00,0x00,0x00]
1672 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01]
1673 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1674 ; FMACALL32_BDVER2-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1675 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xa0,0x00,0x00,0x00]
1676 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1677 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60]
1678 ; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2]
1679 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm2[0]
1680 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1681 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x70]
1682 ; FMACALL32_BDVER2-NEXT:    vmovlps %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x5c,0x24,0x10]
1683 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1684 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50]
1685 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1686 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1687 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1688 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1689 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1690 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x44]
1691 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1692 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00]
1693 ; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1694 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1695 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00]
1696 ; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1697 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xa0,0x00,0x00,0x00]
1698 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
1699 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1700 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1701 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1702 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1703 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1704 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x38]
1705 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1706 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00]
1707 ; FMACALL32_BDVER2-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10]
1708 ; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1709 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00]
1710 ; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1711 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xa0,0x00,0x00,0x00]
1712 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
1713 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1714 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1715 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1716 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1717 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1718 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x70]
1719 ; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1720 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1721 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
1722 ; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1723 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x68]
1724 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
1725 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1726 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x30]
1727 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1728 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x38]
1729 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
1730 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1731 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x44]
1732 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
1733 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1734 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1735 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x18]
1736 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x30]
1737 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1738 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x20]
1739 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero
1740 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x28]
1741 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
1742 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x18]
1743 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
1744 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
1745 ; FMACALL32_BDVER2-NEXT:    addl $236, %esp ## encoding: [0x81,0xc4,0xec,0x00,0x00,0x00]
1746 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1747 entry:
1748   %call = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
1749   ret <4 x double> %call
1752 define <8 x double> @test_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
1753 ; FMA32-LABEL: test_v8f64:
1754 ; FMA32:       ## %bb.0: ## %entry
1755 ; FMA32-NEXT:    pushl %ebp ## encoding: [0x55]
1756 ; FMA32-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1757 ; FMA32-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1758 ; FMA32-NEXT:    subl $32, %esp ## encoding: [0x83,0xec,0x20]
1759 ; FMA32-NEXT:    vfmadd213pd 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0x45,0x08]
1760 ; FMA32-NEXT:    ## ymm0 = (ymm2 * ymm0) + mem
1761 ; FMA32-NEXT:    vfmadd213pd 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0x4d,0x28]
1762 ; FMA32-NEXT:    ## ymm1 = (ymm3 * ymm1) + mem
1763 ; FMA32-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
1764 ; FMA32-NEXT:    popl %ebp ## encoding: [0x5d]
1765 ; FMA32-NEXT:    retl ## encoding: [0xc3]
1767 ; FMA64-LABEL: test_v8f64:
1768 ; FMA64:       ## %bb.0: ## %entry
1769 ; FMA64-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0xc4]
1770 ; FMA64-NEXT:    ## ymm0 = (ymm2 * ymm0) + ymm4
1771 ; FMA64-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0xcd]
1772 ; FMA64-NEXT:    ## ymm1 = (ymm3 * ymm1) + ymm5
1773 ; FMA64-NEXT:    retq ## encoding: [0xc3]
1775 ; FMACALL64-LABEL: test_v8f64:
1776 ; FMACALL64:       ## %bb.0: ## %entry
1777 ; FMACALL64-NEXT:    subq $152, %rsp ## encoding: [0x48,0x81,0xec,0x98,0x00,0x00,0x00]
1778 ; FMACALL64-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1779 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x7c,0x24,0x70]
1780 ; FMACALL64-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1781 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x74,0x24,0x20]
1782 ; FMACALL64-NEXT:    movaps %xmm5, (%rsp) ## 16-byte Spill
1783 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x2c,0x24]
1784 ; FMACALL64-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1785 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x64,0x24,0x10]
1786 ; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1787 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x60]
1788 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1789 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x50]
1790 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1791 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x40]
1792 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1793 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x84,0x24,0x80,0x00,0x00,0x00]
1794 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00]
1795 ; FMACALL64-NEXT:    movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc]
1796 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1797 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1798 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1799 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1800 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1801 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1802 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1803 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1804 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1805 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1806 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1807 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1808 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00]
1809 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1810 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1811 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1812 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1813 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1814 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1815 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1816 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1817 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1818 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
1819 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1820 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1821 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1822 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1823 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
1824 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1825 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1826 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1827 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
1828 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1829 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1830 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1831 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1832 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1833 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1834 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1835 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1836 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
1837 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1838 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1839 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1840 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1841 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1842 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1843 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1844 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1845 ; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1846 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1847 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1848 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1849 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1850 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1851 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1852 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1853 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1854 ; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1855 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1856 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1857 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1858 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1859 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1860 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1861 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1862 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1863 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1864 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1865 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1866 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1867 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1868 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1869 ; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1870 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1871 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1872 ; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1873 ; FMACALL64-NEXT:    movaps %xmm1, (%rsp) ## 16-byte Spill
1874 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x0c,0x24]
1875 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1876 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1877 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1878 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x70]
1879 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1880 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1881 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1882 ; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1883 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1884 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1885 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1886 ; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1887 ; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1888 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1889 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x70]
1890 ; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1891 ; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1892 ; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1893 ; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1894 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1895 ; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1896 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1897 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
1898 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x5c,0x24,0x20]
1899 ; FMACALL64-NEXT:    movlhps %xmm0, %xmm3 ## encoding: [0x0f,0x16,0xd8]
1900 ; FMACALL64-NEXT:    ## xmm3 = xmm3[0],xmm0[0]
1901 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1902 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
1903 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1904 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1905 ; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1906 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1907 ; FMACALL64-NEXT:    addq $152, %rsp ## encoding: [0x48,0x81,0xc4,0x98,0x00,0x00,0x00]
1908 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1910 ; AVX512-LABEL: test_v8f64:
1911 ; AVX512:       ## %bb.0: ## %entry
1912 ; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
1913 ; AVX512-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1914 ; AVX512-NEXT:    retq ## encoding: [0xc3]
1916 ; AVX512VL-LABEL: test_v8f64:
1917 ; AVX512VL:       ## %bb.0: ## %entry
1918 ; AVX512VL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
1919 ; AVX512VL-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1920 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1922 ; FMACALL32_BDVER2-LABEL: test_v8f64:
1923 ; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1924 ; FMACALL32_BDVER2-NEXT:    pushl %ebp ## encoding: [0x55]
1925 ; FMACALL32_BDVER2-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1926 ; FMACALL32_BDVER2-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1927 ; FMACALL32_BDVER2-NEXT:    subl $352, %esp ## encoding: [0x81,0xec,0x60,0x01,0x00,0x00]
1928 ; FMACALL32_BDVER2-NEXT:    ## imm = 0x160
1929 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1930 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0xe0,0x00,0x00,0x00]
1931 ; FMACALL32_BDVER2-NEXT:    vmovsd 56(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x38]
1932 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1933 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1934 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x00,0x01,0x00,0x00]
1935 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01]
1936 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1937 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0xc0,0x00,0x00,0x00]
1938 ; FMACALL32_BDVER2-NEXT:    vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1939 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00]
1940 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1941 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0x30,0x01,0x00,0x00]
1942 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1943 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01]
1944 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1945 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x40,0x01,0x00,0x00]
1946 ; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2]
1947 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm2[0]
1948 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1949 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1950 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1951 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1952 ; FMACALL32_BDVER2-NEXT:    vmovsd 48(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x30]
1953 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1954 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1955 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x94,0x00,0x00,0x00]
1956 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1957 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1958 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00]
1959 ; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1960 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xc0,0x00,0x00,0x00]
1961 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
1962 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1963 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1964 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1965 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1966 ; FMACALL32_BDVER2-NEXT:    vmovsd 40(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x28]
1967 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1968 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1969 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x88,0x00,0x00,0x00]
1970 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1971 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1972 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00]
1973 ; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1974 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xc0,0x00,0x00,0x00]
1975 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
1976 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1977 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1978 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1979 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1980 ; FMACALL32_BDVER2-NEXT:    vmovsd 32(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x20]
1981 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
1982 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1983 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xc0,0x00,0x00,0x00]
1984 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1985 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1986 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x00,0x01,0x00,0x00]
1987 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
1988 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1989 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
1990 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1991 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x20]
1992 ; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1993 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1994 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x30]
1995 ; FMACALL32_BDVER2-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x15,0xc1]
1996 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],xmm1[1]
1997 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1998 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1999 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2000 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2001 ; FMACALL32_BDVER2-NEXT:    vmovsd 24(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x18]
2002 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
2003 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2004 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xa0,0x00,0x00,0x00]
2005 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2006 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
2007 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
2008 ; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2009 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x44,0x24,0x20]
2010 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
2011 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2012 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2013 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2014 ; FMACALL32_BDVER2-NEXT:    vmovsd 16(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x10]
2015 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
2016 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2017 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x30]
2018 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2019 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
2020 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
2021 ; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2022 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0x00,0x01,0x00,0x00]
2023 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
2024 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2025 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2026 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2027 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2028 ; FMACALL32_BDVER2-NEXT:    vmovsd 8(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x08]
2029 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
2030 ; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2031 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x20]
2032 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2033 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
2034 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
2035 ; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2036 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x00,0x01,0x00,0x00]
2037 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
2038 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2039 ; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2040 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2041 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2042 ; FMACALL32_BDVER2-NEXT:    vmovsd 64(%ebp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x45,0x40]
2043 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
2044 ; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2045 ; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
2046 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x30,0x01,0x00,0x00]
2047 ; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2048 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x84,0x24,0x48,0x01,0x00,0x00]
2049 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
2050 ; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2051 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x60]
2052 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2053 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x20]
2054 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x58]
2055 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2056 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x30]
2057 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x50]
2058 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2059 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xa0,0x00,0x00,0x00]
2060 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x48]
2061 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2062 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xc0,0x00,0x00,0x00]
2063 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x9c,0x24,0x80,0x00,0x00,0x00]
2064 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2065 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x88,0x00,0x00,0x00]
2066 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x78]
2067 ; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2068 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x94,0x00,0x00,0x00]
2069 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x70]
2070 ; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2071 ; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2072 ; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x68]
2073 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x60]
2074 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
2075 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x50]
2076 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero
2077 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x58]
2078 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
2079 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x48]
2080 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
2081 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfb,0x10,0x54,0x24,0x70]
2082 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = mem[0],zero
2083 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x16,0x54,0x24,0x68]
2084 ; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1],mem[0,1]
2085 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
2086 ; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x8c,0x24,0x80,0x00,0x00,0x00]
2087 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = mem[0],zero
2088 ; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x78]
2089 ; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
2090 ; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01]
2091 ; FMACALL32_BDVER2-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
2092 ; FMACALL32_BDVER2-NEXT:    popl %ebp ## encoding: [0x5d]
2093 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
2094 entry:
2095   %call = call <8 x double> @llvm.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c)
2096   ret <8 x double> %call
2099 declare float @llvm.fma.f32(float, float, float)
2100 declare double @llvm.fma.f64(double, double, double)
2101 declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
2103 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2104 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
2105 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
2107 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
2108 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
2109 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
2111 attributes #0 = { nounwind }