1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-FMA-WIN
6 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/fma-builtins.c
8 define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
9 ; CHECK-FMA-LABEL: test_mm_fmadd_ps:
10 ; CHECK-FMA: # %bb.0: # %entry
11 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
12 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
15 ; CHECK-AVX512VL-LABEL: test_mm_fmadd_ps:
16 ; CHECK-AVX512VL: # %bb.0: # %entry
17 ; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
18 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
19 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
21 ; CHECK-FMA-WIN-LABEL: test_mm_fmadd_ps:
22 ; CHECK-FMA-WIN: # %bb.0: # %entry
23 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
24 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
25 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
26 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
27 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
29 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
33 define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
34 ; CHECK-FMA-LABEL: test_mm_fmadd_pd:
35 ; CHECK-FMA: # %bb.0: # %entry
36 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
37 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
38 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
40 ; CHECK-AVX512VL-LABEL: test_mm_fmadd_pd:
41 ; CHECK-AVX512VL: # %bb.0: # %entry
42 ; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
43 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
44 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
46 ; CHECK-FMA-WIN-LABEL: test_mm_fmadd_pd:
47 ; CHECK-FMA-WIN: # %bb.0: # %entry
48 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
49 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
50 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
51 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
52 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
54 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
58 define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
59 ; CHECK-FMA-LABEL: test_mm_fmadd_ss:
60 ; CHECK-FMA: # %bb.0: # %entry
61 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
62 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
63 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
65 ; CHECK-AVX512VL-LABEL: test_mm_fmadd_ss:
66 ; CHECK-AVX512VL: # %bb.0: # %entry
67 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
68 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
69 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
71 ; CHECK-FMA-WIN-LABEL: test_mm_fmadd_ss:
72 ; CHECK-FMA-WIN: # %bb.0: # %entry
73 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
74 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
75 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero
76 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02]
77 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
78 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
80 %0 = extractelement <4 x float> %a, i64 0
81 %1 = extractelement <4 x float> %b, i64 0
82 %2 = extractelement <4 x float> %c, i64 0
83 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
84 %4 = insertelement <4 x float> %a, float %3, i64 0
88 define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
89 ; CHECK-FMA-LABEL: test_mm_fmadd_sd:
90 ; CHECK-FMA: # %bb.0: # %entry
91 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
92 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
93 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
95 ; CHECK-AVX512VL-LABEL: test_mm_fmadd_sd:
96 ; CHECK-AVX512VL: # %bb.0: # %entry
97 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
98 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
99 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
101 ; CHECK-FMA-WIN-LABEL: test_mm_fmadd_sd:
102 ; CHECK-FMA-WIN: # %bb.0: # %entry
103 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
104 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
105 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero
106 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02]
107 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
108 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
110 %0 = extractelement <2 x double> %a, i64 0
111 %1 = extractelement <2 x double> %b, i64 0
112 %2 = extractelement <2 x double> %c, i64 0
113 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
114 %4 = insertelement <2 x double> %a, double %3, i64 0
118 define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
119 ; CHECK-FMA-LABEL: test_mm_fmsub_ps:
120 ; CHECK-FMA: # %bb.0: # %entry
121 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
122 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
123 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
125 ; CHECK-AVX512VL-LABEL: test_mm_fmsub_ps:
126 ; CHECK-AVX512VL: # %bb.0: # %entry
127 ; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
128 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
129 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
131 ; CHECK-FMA-WIN-LABEL: test_mm_fmsub_ps:
132 ; CHECK-FMA-WIN: # %bb.0: # %entry
133 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
134 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
135 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
136 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
137 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
139 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
140 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2
144 define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
145 ; CHECK-FMA-LABEL: test_mm_fmsub_pd:
146 ; CHECK-FMA: # %bb.0: # %entry
147 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
148 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
149 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
151 ; CHECK-AVX512VL-LABEL: test_mm_fmsub_pd:
152 ; CHECK-AVX512VL: # %bb.0: # %entry
153 ; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
154 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
155 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
157 ; CHECK-FMA-WIN-LABEL: test_mm_fmsub_pd:
158 ; CHECK-FMA-WIN: # %bb.0: # %entry
159 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
160 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
161 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
162 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
163 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
165 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
166 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2
170 define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
171 ; CHECK-FMA-LABEL: test_mm_fmsub_ss:
172 ; CHECK-FMA: # %bb.0: # %entry
173 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
174 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
175 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
177 ; CHECK-AVX512VL-LABEL: test_mm_fmsub_ss:
178 ; CHECK-AVX512VL: # %bb.0: # %entry
179 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
180 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
181 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
183 ; CHECK-FMA-WIN-LABEL: test_mm_fmsub_ss:
184 ; CHECK-FMA-WIN: # %bb.0: # %entry
185 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
186 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
187 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero
188 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02]
189 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
190 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
192 %0 = extractelement <4 x float> %a, i64 0
193 %1 = extractelement <4 x float> %b, i64 0
194 %.rhs.i = extractelement <4 x float> %c, i64 0
195 %2 = fsub float -0.000000e+00, %.rhs.i
196 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
197 %4 = insertelement <4 x float> %a, float %3, i64 0
201 define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
202 ; CHECK-FMA-LABEL: test_mm_fmsub_sd:
203 ; CHECK-FMA: # %bb.0: # %entry
204 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
205 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
206 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
208 ; CHECK-AVX512VL-LABEL: test_mm_fmsub_sd:
209 ; CHECK-AVX512VL: # %bb.0: # %entry
210 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
211 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
212 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
214 ; CHECK-FMA-WIN-LABEL: test_mm_fmsub_sd:
215 ; CHECK-FMA-WIN: # %bb.0: # %entry
216 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
217 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
218 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero
219 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02]
220 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
221 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
223 %0 = extractelement <2 x double> %a, i64 0
224 %1 = extractelement <2 x double> %b, i64 0
225 %.rhs.i = extractelement <2 x double> %c, i64 0
226 %2 = fsub double -0.000000e+00, %.rhs.i
227 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
228 %4 = insertelement <2 x double> %a, double %3, i64 0
232 define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
233 ; CHECK-FMA-LABEL: test_mm_fnmadd_ps:
234 ; CHECK-FMA: # %bb.0: # %entry
235 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2]
236 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
237 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
239 ; CHECK-AVX512VL-LABEL: test_mm_fnmadd_ps:
240 ; CHECK-AVX512VL: # %bb.0: # %entry
241 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
242 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
243 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
245 ; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_ps:
246 ; CHECK-FMA-WIN: # %bb.0: # %entry
247 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
248 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
249 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
250 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
251 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
253 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
254 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2
258 define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
259 ; CHECK-FMA-LABEL: test_mm_fnmadd_pd:
260 ; CHECK-FMA: # %bb.0: # %entry
261 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
262 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
263 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
265 ; CHECK-AVX512VL-LABEL: test_mm_fnmadd_pd:
266 ; CHECK-AVX512VL: # %bb.0: # %entry
267 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
268 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
269 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
271 ; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_pd:
272 ; CHECK-FMA-WIN: # %bb.0: # %entry
273 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
274 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
275 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
276 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
277 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
279 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
280 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2
284 define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
285 ; CHECK-FMA-LABEL: test_mm_fnmadd_ss:
286 ; CHECK-FMA: # %bb.0: # %entry
287 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
288 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
289 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
291 ; CHECK-AVX512VL-LABEL: test_mm_fnmadd_ss:
292 ; CHECK-AVX512VL: # %bb.0: # %entry
293 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
294 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
295 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
297 ; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_ss:
298 ; CHECK-FMA-WIN: # %bb.0: # %entry
299 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
300 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
301 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero
302 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02]
303 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
304 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
306 %0 = extractelement <4 x float> %a, i64 0
307 %.rhs.i = extractelement <4 x float> %b, i64 0
308 %1 = fsub float -0.000000e+00, %.rhs.i
309 %2 = extractelement <4 x float> %c, i64 0
310 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
311 %4 = insertelement <4 x float> %a, float %3, i64 0
315 define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
316 ; CHECK-FMA-LABEL: test_mm_fnmadd_sd:
317 ; CHECK-FMA: # %bb.0: # %entry
318 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
319 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
320 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
322 ; CHECK-AVX512VL-LABEL: test_mm_fnmadd_sd:
323 ; CHECK-AVX512VL: # %bb.0: # %entry
324 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
325 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
326 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
328 ; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_sd:
329 ; CHECK-FMA-WIN: # %bb.0: # %entry
330 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
331 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
332 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero
333 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02]
334 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
335 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
337 %0 = extractelement <2 x double> %a, i64 0
338 %.rhs.i = extractelement <2 x double> %b, i64 0
339 %1 = fsub double -0.000000e+00, %.rhs.i
340 %2 = extractelement <2 x double> %c, i64 0
341 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
342 %4 = insertelement <2 x double> %a, double %3, i64 0
346 define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
347 ; CHECK-FMA-LABEL: test_mm_fnmsub_ps:
348 ; CHECK-FMA: # %bb.0: # %entry
349 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2]
350 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
351 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
353 ; CHECK-AVX512VL-LABEL: test_mm_fnmsub_ps:
354 ; CHECK-AVX512VL: # %bb.0: # %entry
355 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
356 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
357 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
359 ; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_ps:
360 ; CHECK-FMA-WIN: # %bb.0: # %entry
361 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
362 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
363 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
364 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
365 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
367 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
368 %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
369 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2
373 define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
374 ; CHECK-FMA-LABEL: test_mm_fnmsub_pd:
375 ; CHECK-FMA: # %bb.0: # %entry
376 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
377 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
378 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
380 ; CHECK-AVX512VL-LABEL: test_mm_fnmsub_pd:
381 ; CHECK-AVX512VL: # %bb.0: # %entry
382 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
383 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
384 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
386 ; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_pd:
387 ; CHECK-FMA-WIN: # %bb.0: # %entry
388 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
389 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
390 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
391 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
392 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
394 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
395 %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
396 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2
400 define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
401 ; CHECK-FMA-LABEL: test_mm_fnmsub_ss:
402 ; CHECK-FMA: # %bb.0: # %entry
403 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
404 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
405 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
407 ; CHECK-AVX512VL-LABEL: test_mm_fnmsub_ss:
408 ; CHECK-AVX512VL: # %bb.0: # %entry
409 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
410 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
411 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
413 ; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_ss:
414 ; CHECK-FMA-WIN: # %bb.0: # %entry
415 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
416 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
417 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero
418 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02]
419 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
420 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
422 %0 = extractelement <4 x float> %a, i64 0
423 %.rhs.i = extractelement <4 x float> %b, i64 0
424 %1 = fsub float -0.000000e+00, %.rhs.i
425 %.rhs2.i = extractelement <4 x float> %c, i64 0
426 %2 = fsub float -0.000000e+00, %.rhs2.i
427 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
428 %4 = insertelement <4 x float> %a, float %3, i64 0
432 define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
433 ; CHECK-FMA-LABEL: test_mm_fnmsub_sd:
434 ; CHECK-FMA: # %bb.0: # %entry
435 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
436 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
437 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
439 ; CHECK-AVX512VL-LABEL: test_mm_fnmsub_sd:
440 ; CHECK-AVX512VL: # %bb.0: # %entry
441 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
442 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
443 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
445 ; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_sd:
446 ; CHECK-FMA-WIN: # %bb.0: # %entry
447 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
448 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
449 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero
450 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02]
451 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
452 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
454 %0 = extractelement <2 x double> %a, i64 0
455 %.rhs.i = extractelement <2 x double> %b, i64 0
456 %1 = fsub double -0.000000e+00, %.rhs.i
457 %.rhs2.i = extractelement <2 x double> %c, i64 0
458 %2 = fsub double -0.000000e+00, %.rhs2.i
459 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
460 %4 = insertelement <2 x double> %a, double %3, i64 0
464 define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
465 ; CHECK-FMA-LABEL: test_mm_fmaddsub_ps:
466 ; CHECK-FMA: # %bb.0: # %entry
467 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
468 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
469 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
471 ; CHECK-AVX512VL-LABEL: test_mm_fmaddsub_ps:
472 ; CHECK-AVX512VL: # %bb.0: # %entry
473 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
474 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
475 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
477 ; CHECK-FMA-WIN-LABEL: test_mm_fmaddsub_ps:
478 ; CHECK-FMA-WIN: # %bb.0: # %entry
479 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
480 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
481 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
482 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
483 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
485 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
486 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
487 %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2
488 %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
492 define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
493 ; CHECK-FMA-LABEL: test_mm_fmaddsub_pd:
494 ; CHECK-FMA: # %bb.0: # %entry
495 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
496 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
497 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
499 ; CHECK-AVX512VL-LABEL: test_mm_fmaddsub_pd:
500 ; CHECK-AVX512VL: # %bb.0: # %entry
501 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
502 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
503 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
505 ; CHECK-FMA-WIN-LABEL: test_mm_fmaddsub_pd:
506 ; CHECK-FMA-WIN: # %bb.0: # %entry
507 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
508 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
509 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
510 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
511 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
513 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
514 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
515 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2
516 %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3>
520 define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
521 ; CHECK-FMA-LABEL: test_mm_fmsubadd_ps:
522 ; CHECK-FMA: # %bb.0: # %entry
523 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
524 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
525 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
527 ; CHECK-AVX512VL-LABEL: test_mm_fmsubadd_ps:
528 ; CHECK-AVX512VL: # %bb.0: # %entry
529 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
530 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
531 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
533 ; CHECK-FMA-WIN-LABEL: test_mm_fmsubadd_ps:
534 ; CHECK-FMA-WIN: # %bb.0: # %entry
535 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
536 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
537 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
538 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
539 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
541 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
542 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2
543 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
544 %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
548 define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
549 ; CHECK-FMA-LABEL: test_mm_fmsubadd_pd:
550 ; CHECK-FMA: # %bb.0: # %entry
551 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
552 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
553 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
555 ; CHECK-AVX512VL-LABEL: test_mm_fmsubadd_pd:
556 ; CHECK-AVX512VL: # %bb.0: # %entry
557 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
558 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
559 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
561 ; CHECK-FMA-WIN-LABEL: test_mm_fmsubadd_pd:
562 ; CHECK-FMA-WIN: # %bb.0: # %entry
563 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
564 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
565 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
566 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
567 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
569 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
570 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2
571 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
572 %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3>
576 define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
577 ; CHECK-FMA-LABEL: test_mm256_fmadd_ps:
578 ; CHECK-FMA: # %bb.0: # %entry
579 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
580 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
581 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
583 ; CHECK-AVX512VL-LABEL: test_mm256_fmadd_ps:
584 ; CHECK-AVX512VL: # %bb.0: # %entry
585 ; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
586 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
587 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
589 ; CHECK-FMA-WIN-LABEL: test_mm256_fmadd_ps:
590 ; CHECK-FMA-WIN: # %bb.0: # %entry
591 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
592 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
593 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
594 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
595 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
597 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
601 define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
602 ; CHECK-FMA-LABEL: test_mm256_fmadd_pd:
603 ; CHECK-FMA: # %bb.0: # %entry
604 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
605 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
606 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
608 ; CHECK-AVX512VL-LABEL: test_mm256_fmadd_pd:
609 ; CHECK-AVX512VL: # %bb.0: # %entry
610 ; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
611 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
612 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
614 ; CHECK-FMA-WIN-LABEL: test_mm256_fmadd_pd:
615 ; CHECK-FMA-WIN: # %bb.0: # %entry
616 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
617 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
618 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
619 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
620 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
622 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
626 define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
627 ; CHECK-FMA-LABEL: test_mm256_fmsub_ps:
628 ; CHECK-FMA: # %bb.0: # %entry
629 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
630 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
631 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
633 ; CHECK-AVX512VL-LABEL: test_mm256_fmsub_ps:
634 ; CHECK-AVX512VL: # %bb.0: # %entry
635 ; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
636 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
637 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
639 ; CHECK-FMA-WIN-LABEL: test_mm256_fmsub_ps:
640 ; CHECK-FMA-WIN: # %bb.0: # %entry
641 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
642 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
643 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
644 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
645 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
647 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
648 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
652 define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
653 ; CHECK-FMA-LABEL: test_mm256_fmsub_pd:
654 ; CHECK-FMA: # %bb.0: # %entry
655 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
656 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
657 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
659 ; CHECK-AVX512VL-LABEL: test_mm256_fmsub_pd:
660 ; CHECK-AVX512VL: # %bb.0: # %entry
661 ; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
662 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
663 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
665 ; CHECK-FMA-WIN-LABEL: test_mm256_fmsub_pd:
666 ; CHECK-FMA-WIN: # %bb.0: # %entry
667 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
668 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
669 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
670 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
671 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
673 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
674 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2
678 define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
679 ; CHECK-FMA-LABEL: test_mm256_fnmadd_ps:
680 ; CHECK-FMA: # %bb.0: # %entry
681 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2]
682 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
683 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
685 ; CHECK-AVX512VL-LABEL: test_mm256_fnmadd_ps:
686 ; CHECK-AVX512VL: # %bb.0: # %entry
687 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
688 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
689 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
691 ; CHECK-FMA-WIN-LABEL: test_mm256_fnmadd_ps:
692 ; CHECK-FMA-WIN: # %bb.0: # %entry
693 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
694 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
695 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
696 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
697 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
699 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
700 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2
704 define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
705 ; CHECK-FMA-LABEL: test_mm256_fnmadd_pd:
706 ; CHECK-FMA: # %bb.0: # %entry
707 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
708 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
709 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
711 ; CHECK-AVX512VL-LABEL: test_mm256_fnmadd_pd:
712 ; CHECK-AVX512VL: # %bb.0: # %entry
713 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
714 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
715 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
717 ; CHECK-FMA-WIN-LABEL: test_mm256_fnmadd_pd:
718 ; CHECK-FMA-WIN: # %bb.0: # %entry
719 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
720 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
721 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
722 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
723 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
725 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a
726 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2
730 define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
731 ; CHECK-FMA-LABEL: test_mm256_fnmsub_ps:
732 ; CHECK-FMA: # %bb.0: # %entry
733 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2]
734 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
735 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
737 ; CHECK-AVX512VL-LABEL: test_mm256_fnmsub_ps:
738 ; CHECK-AVX512VL: # %bb.0: # %entry
739 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
740 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
741 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
743 ; CHECK-FMA-WIN-LABEL: test_mm256_fnmsub_ps:
744 ; CHECK-FMA-WIN: # %bb.0: # %entry
745 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
746 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
747 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
748 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
749 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
751 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
752 %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
753 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2
757 define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
758 ; CHECK-FMA-LABEL: test_mm256_fnmsub_pd:
759 ; CHECK-FMA: # %bb.0: # %entry
760 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
761 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
762 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
764 ; CHECK-AVX512VL-LABEL: test_mm256_fnmsub_pd:
765 ; CHECK-AVX512VL: # %bb.0: # %entry
766 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
767 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
768 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
770 ; CHECK-FMA-WIN-LABEL: test_mm256_fnmsub_pd:
771 ; CHECK-FMA-WIN: # %bb.0: # %entry
772 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
773 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
774 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
775 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
776 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
778 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a
779 %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
780 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2
784 define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
785 ; CHECK-FMA-LABEL: test_mm256_fmaddsub_ps:
786 ; CHECK-FMA: # %bb.0: # %entry
787 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
788 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
789 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
791 ; CHECK-AVX512VL-LABEL: test_mm256_fmaddsub_ps:
792 ; CHECK-AVX512VL: # %bb.0: # %entry
793 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
794 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
795 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
797 ; CHECK-FMA-WIN-LABEL: test_mm256_fmaddsub_ps:
798 ; CHECK-FMA-WIN: # %bb.0: # %entry
799 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
800 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
801 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
802 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
803 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
805 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
806 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
807 %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2
808 %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
812 define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
813 ; CHECK-FMA-LABEL: test_mm256_fmaddsub_pd:
814 ; CHECK-FMA: # %bb.0: # %entry
815 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
816 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
817 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
819 ; CHECK-AVX512VL-LABEL: test_mm256_fmaddsub_pd:
820 ; CHECK-AVX512VL: # %bb.0: # %entry
821 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
822 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
823 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
825 ; CHECK-FMA-WIN-LABEL: test_mm256_fmaddsub_pd:
826 ; CHECK-FMA-WIN: # %bb.0: # %entry
827 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
828 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
829 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
830 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
831 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
833 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
834 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
835 %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2
836 %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
840 define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
841 ; CHECK-FMA-LABEL: test_mm256_fmsubadd_ps:
842 ; CHECK-FMA: # %bb.0: # %entry
843 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
844 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
845 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
847 ; CHECK-AVX512VL-LABEL: test_mm256_fmsubadd_ps:
848 ; CHECK-AVX512VL: # %bb.0: # %entry
849 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
850 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
851 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
853 ; CHECK-FMA-WIN-LABEL: test_mm256_fmsubadd_ps:
854 ; CHECK-FMA-WIN: # %bb.0: # %entry
855 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
856 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
857 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
858 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
859 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
861 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
862 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
863 %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
864 %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
868 define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
869 ; CHECK-FMA-LABEL: test_mm256_fmsubadd_pd:
870 ; CHECK-FMA: # %bb.0: # %entry
871 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
872 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
873 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
875 ; CHECK-AVX512VL-LABEL: test_mm256_fmsubadd_pd:
876 ; CHECK-AVX512VL: # %bb.0: # %entry
877 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
878 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
879 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
881 ; CHECK-FMA-WIN-LABEL: test_mm256_fmsubadd_pd:
882 ; CHECK-FMA-WIN: # %bb.0: # %entry
883 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
884 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
885 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
886 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
887 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
889 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
890 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2
891 %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
892 %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
896 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
897 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1
898 declare float @llvm.fma.f32(float, float, float) #1
899 declare double @llvm.fma.f64(double, double, double) #1
900 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1
901 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1