1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK-FMA-WIN
7 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
8 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
10 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
11 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
12 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
14 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
15 ; CHECK-AVX512VL: # %bb.0:
16 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
17 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
18 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
20 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
21 ; CHECK-FMA-WIN: # %bb.0:
22 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
23 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
24 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
25 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02]
26 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
27 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
28 %1 = extractelement <4 x float> %a0, i64 0
29 %2 = extractelement <4 x float> %a1, i64 0
30 %3 = extractelement <4 x float> %a2, i64 0
31 %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
32 %5 = insertelement <4 x float> %a0, float %4, i64 0
36 define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
37 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss:
39 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca]
40 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
41 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
42 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
44 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
45 ; CHECK-AVX512VL: # %bb.0:
46 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca]
47 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
48 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
49 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
51 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss:
52 ; CHECK-FMA-WIN: # %bb.0:
53 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
54 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
55 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
56 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01]
57 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
58 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
59 %1 = extractelement <4 x float> %a1, i64 0
60 %2 = extractelement <4 x float> %a0, i64 0
61 %3 = extractelement <4 x float> %a2, i64 0
62 %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
63 %5 = insertelement <4 x float> %a1, float %4, i64 0
67 define <4 x float> @test_x86_fma_vfmadd_ss_231(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
68 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss_231:
70 ; CHECK-FMA-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
71 ; CHECK-FMA-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
72 ; CHECK-FMA-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
73 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
75 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss_231:
76 ; CHECK-AVX512VL: # %bb.0:
77 ; CHECK-AVX512VL-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
78 ; CHECK-AVX512VL-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
79 ; CHECK-AVX512VL-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
80 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
82 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss_231:
83 ; CHECK-FMA-WIN: # %bb.0:
84 ; CHECK-FMA-WIN-NEXT: vmovaps (%r8), %xmm0 # encoding: [0xc4,0xc1,0x78,0x28,0x00]
85 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
86 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc5,0xfa,0x10,0x09]
87 ; CHECK-FMA-WIN-NEXT: vfmadd231ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xb9,0x02]
88 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * mem) + xmm0
89 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
90 %1 = extractelement <4 x float> %a0, i64 0
91 %2 = extractelement <4 x float> %a1, i64 0
92 %3 = extractelement <4 x float> %a2, i64 0
93 %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
94 %5 = insertelement <4 x float> %a2, float %4, i64 0
98 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
99 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
100 ; CHECK-FMA: # %bb.0:
101 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
102 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
103 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
105 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
106 ; CHECK-AVX512VL: # %bb.0:
107 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
108 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
109 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
111 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
112 ; CHECK-FMA-WIN: # %bb.0:
113 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
114 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
115 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
116 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02]
117 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
118 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
119 %1 = extractelement <2 x double> %a0, i64 0
120 %2 = extractelement <2 x double> %a1, i64 0
121 %3 = extractelement <2 x double> %a2, i64 0
122 %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
123 %5 = insertelement <2 x double> %a0, double %4, i64 0
127 define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
128 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd:
129 ; CHECK-FMA: # %bb.0:
130 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
131 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
132 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
133 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
135 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
136 ; CHECK-AVX512VL: # %bb.0:
137 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
138 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
139 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
140 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
142 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd:
143 ; CHECK-FMA-WIN: # %bb.0:
144 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
145 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
146 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
147 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01]
148 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
149 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
150 %1 = extractelement <2 x double> %a1, i64 0
151 %2 = extractelement <2 x double> %a0, i64 0
152 %3 = extractelement <2 x double> %a2, i64 0
153 %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
154 %5 = insertelement <2 x double> %a1, double %4, i64 0
158 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
159 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
160 ; CHECK-FMA: # %bb.0:
161 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
162 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
163 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
165 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps:
166 ; CHECK-AVX512VL: # %bb.0:
167 ; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
168 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
169 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
171 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps:
172 ; CHECK-FMA-WIN: # %bb.0:
173 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
174 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
175 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
176 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
177 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
178 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
182 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
183 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
184 ; CHECK-FMA: # %bb.0:
185 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
186 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
187 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
189 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd:
190 ; CHECK-AVX512VL: # %bb.0:
191 ; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
192 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
193 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
195 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd:
196 ; CHECK-FMA-WIN: # %bb.0:
197 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
198 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
199 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
200 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
201 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
202 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
206 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
207 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
208 ; CHECK-FMA: # %bb.0:
209 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
210 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
211 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
213 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256:
214 ; CHECK-AVX512VL: # %bb.0:
215 ; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
216 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
217 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
219 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256:
220 ; CHECK-FMA-WIN: # %bb.0:
221 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
222 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
223 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
224 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
225 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
226 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
230 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
231 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
232 ; CHECK-FMA: # %bb.0:
233 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
234 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
235 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
237 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256:
238 ; CHECK-AVX512VL: # %bb.0:
239 ; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
240 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
241 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
243 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256:
244 ; CHECK-FMA-WIN: # %bb.0:
245 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
246 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
247 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
248 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
249 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
250 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
255 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
256 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
257 ; CHECK-FMA: # %bb.0:
258 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
259 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
260 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
262 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
263 ; CHECK-AVX512VL: # %bb.0:
264 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
265 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
266 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
268 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
269 ; CHECK-FMA-WIN: # %bb.0:
270 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
271 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
272 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
273 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02]
274 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
275 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
276 %1 = extractelement <4 x float> %a0, i64 0
277 %2 = extractelement <4 x float> %a1, i64 0
278 %3 = extractelement <4 x float> %a2, i64 0
279 %4 = fsub float -0.000000e+00, %3
280 %5 = call float @llvm.fma.f32(float %1, float %2, float %4)
281 %6 = insertelement <4 x float> %a0, float %5, i64 0
285 define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
286 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss:
287 ; CHECK-FMA: # %bb.0:
288 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca]
289 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
290 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
291 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
293 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
294 ; CHECK-AVX512VL: # %bb.0:
295 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca]
296 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
297 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
298 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
300 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss:
301 ; CHECK-FMA-WIN: # %bb.0:
302 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
303 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
304 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
305 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01]
306 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
307 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
308 %1 = extractelement <4 x float> %a1, i64 0
309 %2 = extractelement <4 x float> %a0, i64 0
310 %3 = extractelement <4 x float> %a2, i64 0
311 %4 = fsub float -0.000000e+00, %3
312 %5 = call float @llvm.fma.f32(float %1, float %2, float %4)
313 %6 = insertelement <4 x float> %a1, float %5, i64 0
317 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
318 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
319 ; CHECK-FMA: # %bb.0:
320 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
321 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
322 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
324 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
325 ; CHECK-AVX512VL: # %bb.0:
326 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
327 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
328 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
330 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
331 ; CHECK-FMA-WIN: # %bb.0:
332 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
333 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
334 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
335 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02]
336 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
337 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
338 %1 = extractelement <2 x double> %a0, i64 0
339 %2 = extractelement <2 x double> %a1, i64 0
340 %3 = extractelement <2 x double> %a2, i64 0
341 %4 = fsub double -0.000000e+00, %3
342 %5 = call double @llvm.fma.f64(double %1, double %2, double %4)
343 %6 = insertelement <2 x double> %a0, double %5, i64 0
347 define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
348 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd:
349 ; CHECK-FMA: # %bb.0:
350 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca]
351 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
352 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
353 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
355 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
356 ; CHECK-AVX512VL: # %bb.0:
357 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca]
358 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
359 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
360 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
362 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd:
363 ; CHECK-FMA-WIN: # %bb.0:
364 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
365 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
366 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
367 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01]
368 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
369 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
370 %1 = extractelement <2 x double> %a1, i64 0
371 %2 = extractelement <2 x double> %a0, i64 0
372 %3 = extractelement <2 x double> %a2, i64 0
373 %4 = fsub double -0.000000e+00, %3
374 %5 = call double @llvm.fma.f64(double %1, double %2, double %4)
375 %6 = insertelement <2 x double> %a1, double %5, i64 0
379 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
380 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
381 ; CHECK-FMA: # %bb.0:
382 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
383 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
384 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
386 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps:
387 ; CHECK-AVX512VL: # %bb.0:
388 ; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
389 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
390 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
392 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps:
393 ; CHECK-FMA-WIN: # %bb.0:
394 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
395 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
396 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
397 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
398 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
399 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
400 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
404 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
405 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
406 ; CHECK-FMA: # %bb.0:
407 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
408 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
409 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
411 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd:
412 ; CHECK-AVX512VL: # %bb.0:
413 ; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
414 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
415 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
417 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd:
418 ; CHECK-FMA-WIN: # %bb.0:
419 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
420 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
421 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
422 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
423 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
424 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
425 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
429 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
430 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
431 ; CHECK-FMA: # %bb.0:
432 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
433 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
434 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
436 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256:
437 ; CHECK-AVX512VL: # %bb.0:
438 ; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
439 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
440 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
442 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256:
443 ; CHECK-FMA-WIN: # %bb.0:
444 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
445 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
446 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
447 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
448 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
449 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
450 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
454 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
455 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
456 ; CHECK-FMA: # %bb.0:
457 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
458 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
459 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
461 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256:
462 ; CHECK-AVX512VL: # %bb.0:
463 ; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
464 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
465 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
467 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256:
468 ; CHECK-FMA-WIN: # %bb.0:
469 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
470 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
471 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
472 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
473 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
474 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
475 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
480 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
481 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
482 ; CHECK-FMA: # %bb.0:
483 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
484 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
485 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
487 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
488 ; CHECK-AVX512VL: # %bb.0:
489 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
490 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
491 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
493 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
494 ; CHECK-FMA-WIN: # %bb.0:
495 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
496 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
497 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
498 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02]
499 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
500 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
501 %1 = extractelement <4 x float> %a0, i64 0
502 %2 = extractelement <4 x float> %a1, i64 0
503 %3 = extractelement <4 x float> %a2, i64 0
504 %4 = fsub float -0.000000e+00, %2
505 %5 = call float @llvm.fma.f32(float %1, float %4, float %3)
506 %6 = insertelement <4 x float> %a0, float %5, i64 0
510 define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
511 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss:
512 ; CHECK-FMA: # %bb.0:
513 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca]
514 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
515 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
516 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
518 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
519 ; CHECK-AVX512VL: # %bb.0:
520 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca]
521 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
522 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
523 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
525 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss:
526 ; CHECK-FMA-WIN: # %bb.0:
527 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
528 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
529 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
530 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01]
531 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
532 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
533 %1 = extractelement <4 x float> %a1, i64 0
534 %2 = extractelement <4 x float> %a0, i64 0
535 %3 = extractelement <4 x float> %a2, i64 0
536 %4 = fsub float -0.000000e+00, %2
537 %5 = call float @llvm.fma.f32(float %1, float %4, float %3)
538 %6 = insertelement <4 x float> %a1, float %5, i64 0
542 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
543 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
544 ; CHECK-FMA: # %bb.0:
545 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
546 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
547 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
549 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
550 ; CHECK-AVX512VL: # %bb.0:
551 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
552 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
553 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
555 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
556 ; CHECK-FMA-WIN: # %bb.0:
557 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
558 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
559 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
560 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02]
561 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
562 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
563 %1 = extractelement <2 x double> %a0, i64 0
564 %2 = extractelement <2 x double> %a1, i64 0
565 %3 = extractelement <2 x double> %a2, i64 0
566 %4 = fsub double -0.000000e+00, %2
567 %5 = call double @llvm.fma.f64(double %1, double %4, double %3)
568 %6 = insertelement <2 x double> %a0, double %5, i64 0
572 define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
573 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd:
574 ; CHECK-FMA: # %bb.0:
575 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca]
576 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
577 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
578 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
580 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
581 ; CHECK-AVX512VL: # %bb.0:
582 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca]
583 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
584 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
585 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
587 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd:
588 ; CHECK-FMA-WIN: # %bb.0:
589 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
590 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
591 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
592 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01]
593 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
594 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
595 %1 = extractelement <2 x double> %a1, i64 0
596 %2 = extractelement <2 x double> %a0, i64 0
597 %3 = extractelement <2 x double> %a2, i64 0
598 %4 = fsub double -0.000000e+00, %2
599 %5 = call double @llvm.fma.f64(double %1, double %4, double %3)
600 %6 = insertelement <2 x double> %a1, double %5, i64 0
604 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
605 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
606 ; CHECK-FMA: # %bb.0:
607 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2]
608 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
609 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
611 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps:
612 ; CHECK-AVX512VL: # %bb.0:
613 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
614 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
615 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
617 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps:
618 ; CHECK-FMA-WIN: # %bb.0:
619 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
620 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
621 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
622 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
623 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
624 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
625 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
629 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
630 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
631 ; CHECK-FMA: # %bb.0:
632 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
633 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
634 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
636 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd:
637 ; CHECK-AVX512VL: # %bb.0:
638 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
639 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
640 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
642 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd:
643 ; CHECK-FMA-WIN: # %bb.0:
644 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
645 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
646 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
647 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
648 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
649 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
650 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
654 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
655 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
656 ; CHECK-FMA: # %bb.0:
657 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2]
658 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
659 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
661 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256:
662 ; CHECK-AVX512VL: # %bb.0:
663 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
664 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
665 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
667 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256:
668 ; CHECK-FMA-WIN: # %bb.0:
669 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
670 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
671 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
672 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
673 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
674 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
675 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
679 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
680 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
681 ; CHECK-FMA: # %bb.0:
682 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
683 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
684 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
686 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256:
687 ; CHECK-AVX512VL: # %bb.0:
688 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
689 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
690 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
692 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256:
693 ; CHECK-FMA-WIN: # %bb.0:
694 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
695 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
696 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
697 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
698 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
699 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
700 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
705 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
706 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
707 ; CHECK-FMA: # %bb.0:
708 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
709 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
710 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
712 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
713 ; CHECK-AVX512VL: # %bb.0:
714 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
715 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
716 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
718 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
719 ; CHECK-FMA-WIN: # %bb.0:
720 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
721 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
722 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
723 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02]
724 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
725 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
726 %1 = extractelement <4 x float> %a0, i64 0
727 %2 = extractelement <4 x float> %a1, i64 0
728 %3 = extractelement <4 x float> %a2, i64 0
729 %4 = fsub float -0.000000e+00, %2
730 %5 = fsub float -0.000000e+00, %3
731 %6 = call float @llvm.fma.f32(float %1, float %4, float %5)
732 %7 = insertelement <4 x float> %a0, float %6, i64 0
736 define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
737 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss:
738 ; CHECK-FMA: # %bb.0:
739 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca]
740 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
741 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
742 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
744 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
745 ; CHECK-AVX512VL: # %bb.0:
746 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca]
747 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
748 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
749 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
751 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss:
752 ; CHECK-FMA-WIN: # %bb.0:
753 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
754 ; CHECK-FMA-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
755 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7a,0x10,0x08]
756 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01]
757 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
758 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
759 %1 = extractelement <4 x float> %a1, i64 0
760 %2 = extractelement <4 x float> %a0, i64 0
761 %3 = extractelement <4 x float> %a2, i64 0
762 %4 = fsub float -0.000000e+00, %2
763 %5 = fsub float -0.000000e+00, %3
764 %6 = call float @llvm.fma.f32(float %1, float %4, float %5)
765 %7 = insertelement <4 x float> %a1, float %6, i64 0
769 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
770 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
771 ; CHECK-FMA: # %bb.0:
772 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
773 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
774 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
776 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
777 ; CHECK-AVX512VL: # %bb.0:
778 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
779 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
780 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
782 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
783 ; CHECK-FMA-WIN: # %bb.0:
784 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
785 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
786 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
787 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02]
788 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
789 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
790 %1 = extractelement <2 x double> %a0, i64 0
791 %2 = extractelement <2 x double> %a1, i64 0
792 %3 = extractelement <2 x double> %a2, i64 0
793 %4 = fsub double -0.000000e+00, %2
794 %5 = fsub double -0.000000e+00, %3
795 %6 = call double @llvm.fma.f64(double %1, double %4, double %5)
796 %7 = insertelement <2 x double> %a0, double %6, i64 0
800 define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
801 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd:
802 ; CHECK-FMA: # %bb.0:
803 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
804 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
805 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
806 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
808 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
809 ; CHECK-AVX512VL: # %bb.0:
810 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
811 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
812 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
813 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
815 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd:
816 ; CHECK-FMA-WIN: # %bb.0:
817 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
818 ; CHECK-FMA-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
819 ; CHECK-FMA-WIN-NEXT: # encoding: [0xc4,0xc1,0x7b,0x10,0x08]
820 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01]
821 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
822 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
823 %1 = extractelement <2 x double> %a1, i64 0
824 %2 = extractelement <2 x double> %a0, i64 0
825 %3 = extractelement <2 x double> %a2, i64 0
826 %4 = fsub double -0.000000e+00, %2
827 %5 = fsub double -0.000000e+00, %3
828 %6 = call double @llvm.fma.f64(double %1, double %4, double %5)
829 %7 = insertelement <2 x double> %a1, double %6, i64 0
833 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
834 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
835 ; CHECK-FMA: # %bb.0:
836 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2]
837 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
838 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
840 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps:
841 ; CHECK-AVX512VL: # %bb.0:
842 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
843 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
844 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
846 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps:
847 ; CHECK-FMA-WIN: # %bb.0:
848 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
849 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
850 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
851 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
852 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
853 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
854 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
855 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
859 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
860 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
861 ; CHECK-FMA: # %bb.0:
862 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
863 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
864 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
866 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd:
867 ; CHECK-AVX512VL: # %bb.0:
868 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
869 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
870 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
872 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd:
873 ; CHECK-FMA-WIN: # %bb.0:
874 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
875 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
876 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
877 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
878 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
879 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
880 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
881 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
885 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
886 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
887 ; CHECK-FMA: # %bb.0:
888 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2]
889 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
890 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
892 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256:
893 ; CHECK-AVX512VL: # %bb.0:
894 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
895 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
896 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
898 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256:
899 ; CHECK-FMA-WIN: # %bb.0:
900 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
901 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
902 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
903 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
904 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
905 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
906 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
907 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
911 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
912 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
913 ; CHECK-FMA: # %bb.0:
914 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
915 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
916 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
918 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256:
919 ; CHECK-AVX512VL: # %bb.0:
920 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
921 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
922 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
924 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256:
925 ; CHECK-FMA-WIN: # %bb.0:
926 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
927 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
928 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
929 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
930 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
931 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
932 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
933 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
938 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
939 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
940 ; CHECK-FMA: # %bb.0:
941 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
942 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
943 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
945 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps:
946 ; CHECK-AVX512VL: # %bb.0:
947 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
948 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
949 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
951 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps:
952 ; CHECK-FMA-WIN: # %bb.0:
953 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
954 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
955 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
956 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
957 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
958 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
959 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
960 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
961 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
965 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
966 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
967 ; CHECK-FMA: # %bb.0:
968 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
969 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
970 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
972 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd:
973 ; CHECK-AVX512VL: # %bb.0:
974 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
975 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
976 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
978 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd:
979 ; CHECK-FMA-WIN: # %bb.0:
980 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
981 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
982 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
983 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
984 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
985 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
986 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
987 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
988 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
992 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
993 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
994 ; CHECK-FMA: # %bb.0:
995 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
996 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
997 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
999 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256:
1000 ; CHECK-AVX512VL: # %bb.0:
1001 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
1002 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
1003 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1005 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256:
1006 ; CHECK-FMA-WIN: # %bb.0:
1007 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
1008 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
1009 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
1010 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
1011 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1012 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
1013 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
1014 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
1015 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1019 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1020 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
1021 ; CHECK-FMA: # %bb.0:
1022 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
1023 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
1024 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1026 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256:
1027 ; CHECK-AVX512VL: # %bb.0:
1028 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
1029 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
1030 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1032 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256:
1033 ; CHECK-FMA-WIN: # %bb.0:
1034 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
1035 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
1036 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
1037 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
1038 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1039 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
1040 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
1041 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
1042 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1047 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1048 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
1049 ; CHECK-FMA: # %bb.0:
1050 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
1051 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
1052 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1054 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps:
1055 ; CHECK-AVX512VL: # %bb.0:
1056 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
1057 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
1058 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1060 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps:
1061 ; CHECK-FMA-WIN: # %bb.0:
1062 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
1063 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
1064 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
1065 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
1066 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1067 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
1068 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
1069 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
1070 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1074 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
1075 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
1076 ; CHECK-FMA: # %bb.0:
1077 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
1078 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
1079 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1081 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd:
1082 ; CHECK-AVX512VL: # %bb.0:
1083 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
1084 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
1085 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1087 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd:
1088 ; CHECK-FMA-WIN: # %bb.0:
1089 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
1090 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
1091 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
1092 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
1093 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1094 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
1095 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
1096 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
1097 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
1101 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
1102 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
1103 ; CHECK-FMA: # %bb.0:
1104 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
1105 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
1106 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1108 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256:
1109 ; CHECK-AVX512VL: # %bb.0:
1110 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
1111 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
1112 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1114 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256:
1115 ; CHECK-FMA-WIN: # %bb.0:
1116 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
1117 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
1118 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
1119 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
1120 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1121 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
1122 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
1123 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
1124 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1128 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1129 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
1130 ; CHECK-FMA: # %bb.0:
1131 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
1132 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
1133 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1135 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256:
1136 ; CHECK-AVX512VL: # %bb.0:
1137 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
1138 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
1139 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1141 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256:
1142 ; CHECK-FMA-WIN: # %bb.0:
1143 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
1144 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
1145 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
1146 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
1147 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
1148 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
1149 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
1150 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
1151 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1155 declare float @llvm.fma.f32(float, float, float)
1156 declare double @llvm.fma.f64(double, double, double)
1157 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
1158 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
1159 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
1160 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
1162 attributes #0 = { nounwind }