1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mcpu=core-avx2 -show-mc-encoding | FileCheck %s --check-prefix=AVX2
3 ; RUN: llc < %s -disable-peephole -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=AVX512
5 target triple = "x86_64-unknown-unknown"
7 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
8 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
9 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
10 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
12 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
13 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
14 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
15 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
17 define void @fmadd_aab_ss(ptr %a, ptr %b) {
18 ; AVX2-LABEL: fmadd_aab_ss:
20 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
21 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
22 ; AVX2-NEXT: vfmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xa9,0x06]
23 ; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) + mem
24 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
25 ; AVX2-NEXT: retq # encoding: [0xc3]
27 ; AVX512-LABEL: fmadd_aab_ss:
29 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
30 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
31 ; AVX512-NEXT: vfmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0x06]
32 ; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) + mem
33 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
34 ; AVX512-NEXT: retq # encoding: [0xc3]
35 %a.val = load float, ptr %a
36 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
37 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
38 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
39 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
41 %b.val = load float, ptr %b
42 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
43 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
44 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
45 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
47 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
49 %sr = extractelement <4 x float> %vr, i32 0
50 store float %sr, ptr %a
54 define void @fmadd_aba_ss(ptr %a, ptr %b) {
55 ; AVX2-LABEL: fmadd_aba_ss:
57 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
58 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
59 ; AVX2-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xb9,0x06]
60 ; AVX2-NEXT: # xmm0 = (xmm0 * mem) + xmm0
61 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
62 ; AVX2-NEXT: retq # encoding: [0xc3]
64 ; AVX512-LABEL: fmadd_aba_ss:
66 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
67 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
68 ; AVX512-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
69 ; AVX512-NEXT: # xmm0 = (xmm0 * mem) + xmm0
70 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
71 ; AVX512-NEXT: retq # encoding: [0xc3]
72 %a.val = load float, ptr %a
73 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
74 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
75 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
76 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
78 %b.val = load float, ptr %b
79 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
80 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
81 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
82 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
84 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
86 %sr = extractelement <4 x float> %vr, i32 0
87 store float %sr, ptr %a
91 define void @fmsub_aab_ss(ptr %a, ptr %b) {
92 ; AVX2-LABEL: fmsub_aab_ss:
94 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
95 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
96 ; AVX2-NEXT: vfmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xab,0x06]
97 ; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) - mem
98 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
99 ; AVX2-NEXT: retq # encoding: [0xc3]
101 ; AVX512-LABEL: fmsub_aab_ss:
103 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
104 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
105 ; AVX512-NEXT: vfmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0x06]
106 ; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) - mem
107 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
108 ; AVX512-NEXT: retq # encoding: [0xc3]
109 %a.val = load float, ptr %a
110 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
111 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
112 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
113 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
115 %b.val = load float, ptr %b
116 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
117 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
118 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
119 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
121 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
123 %sr = extractelement <4 x float> %vr, i32 0
124 store float %sr, ptr %a
128 define void @fmsub_aba_ss(ptr %a, ptr %b) {
129 ; AVX2-LABEL: fmsub_aba_ss:
131 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
132 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
133 ; AVX2-NEXT: vfmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbb,0x06]
134 ; AVX2-NEXT: # xmm0 = (xmm0 * mem) - xmm0
135 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
136 ; AVX2-NEXT: retq # encoding: [0xc3]
138 ; AVX512-LABEL: fmsub_aba_ss:
140 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
141 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
142 ; AVX512-NEXT: vfmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0x06]
143 ; AVX512-NEXT: # xmm0 = (xmm0 * mem) - xmm0
144 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
145 ; AVX512-NEXT: retq # encoding: [0xc3]
146 %a.val = load float, ptr %a
147 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
148 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
149 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
150 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
152 %b.val = load float, ptr %b
153 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
154 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
155 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
156 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
158 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
160 %sr = extractelement <4 x float> %vr, i32 0
161 store float %sr, ptr %a
165 define void @fnmadd_aab_ss(ptr %a, ptr %b) {
166 ; AVX2-LABEL: fnmadd_aab_ss:
168 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
169 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
170 ; AVX2-NEXT: vfnmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xad,0x06]
171 ; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) + mem
172 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
173 ; AVX2-NEXT: retq # encoding: [0xc3]
175 ; AVX512-LABEL: fnmadd_aab_ss:
177 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
178 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
179 ; AVX512-NEXT: vfnmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0x06]
180 ; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) + mem
181 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
182 ; AVX512-NEXT: retq # encoding: [0xc3]
183 %a.val = load float, ptr %a
184 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
185 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
186 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
187 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
189 %b.val = load float, ptr %b
190 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
191 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
192 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
193 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
195 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
197 %sr = extractelement <4 x float> %vr, i32 0
198 store float %sr, ptr %a
202 define void @fnmadd_aba_ss(ptr %a, ptr %b) {
203 ; AVX2-LABEL: fnmadd_aba_ss:
205 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
206 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
207 ; AVX2-NEXT: vfnmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbd,0x06]
208 ; AVX2-NEXT: # xmm0 = -(xmm0 * mem) + xmm0
209 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
210 ; AVX2-NEXT: retq # encoding: [0xc3]
212 ; AVX512-LABEL: fnmadd_aba_ss:
214 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
215 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
216 ; AVX512-NEXT: vfnmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbd,0x06]
217 ; AVX512-NEXT: # xmm0 = -(xmm0 * mem) + xmm0
218 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
219 ; AVX512-NEXT: retq # encoding: [0xc3]
220 %a.val = load float, ptr %a
221 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
222 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
223 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
224 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
226 %b.val = load float, ptr %b
227 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
228 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
229 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
230 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
232 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
234 %sr = extractelement <4 x float> %vr, i32 0
235 store float %sr, ptr %a
239 define void @fnmsub_aab_ss(ptr %a, ptr %b) {
240 ; AVX2-LABEL: fnmsub_aab_ss:
242 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
243 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
244 ; AVX2-NEXT: vfnmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xaf,0x06]
245 ; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) - mem
246 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
247 ; AVX2-NEXT: retq # encoding: [0xc3]
249 ; AVX512-LABEL: fnmsub_aab_ss:
251 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
252 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
253 ; AVX512-NEXT: vfnmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0x06]
254 ; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) - mem
255 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
256 ; AVX512-NEXT: retq # encoding: [0xc3]
257 %a.val = load float, ptr %a
258 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
259 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
260 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
261 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
263 %b.val = load float, ptr %b
264 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
265 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
266 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
267 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
269 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
271 %sr = extractelement <4 x float> %vr, i32 0
272 store float %sr, ptr %a
276 define void @fnmsub_aba_ss(ptr %a, ptr %b) {
277 ; AVX2-LABEL: fnmsub_aba_ss:
279 ; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
280 ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
281 ; AVX2-NEXT: vfnmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbf,0x06]
282 ; AVX2-NEXT: # xmm0 = -(xmm0 * mem) - xmm0
283 ; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
284 ; AVX2-NEXT: retq # encoding: [0xc3]
286 ; AVX512-LABEL: fnmsub_aba_ss:
288 ; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
289 ; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
290 ; AVX512-NEXT: vfnmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0x06]
291 ; AVX512-NEXT: # xmm0 = -(xmm0 * mem) - xmm0
292 ; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
293 ; AVX512-NEXT: retq # encoding: [0xc3]
294 %a.val = load float, ptr %a
295 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
296 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
297 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
298 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
300 %b.val = load float, ptr %b
301 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
302 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
303 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
304 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
306 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
308 %sr = extractelement <4 x float> %vr, i32 0
309 store float %sr, ptr %a
313 define void @fmadd_aab_sd(ptr %a, ptr %b) {
314 ; AVX2-LABEL: fmadd_aab_sd:
316 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
317 ; AVX2-NEXT: # xmm0 = mem[0],zero
318 ; AVX2-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xa9,0x06]
319 ; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) + mem
320 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
321 ; AVX2-NEXT: retq # encoding: [0xc3]
323 ; AVX512-LABEL: fmadd_aab_sd:
325 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
326 ; AVX512-NEXT: # xmm0 = mem[0],zero
327 ; AVX512-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0x06]
328 ; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) + mem
329 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
330 ; AVX512-NEXT: retq # encoding: [0xc3]
331 %a.val = load double, ptr %a
332 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
333 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
335 %b.val = load double, ptr %b
336 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
337 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
339 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
341 %sr = extractelement <2 x double> %vr, i32 0
342 store double %sr, ptr %a
346 define void @fmadd_aba_sd(ptr %a, ptr %b) {
347 ; AVX2-LABEL: fmadd_aba_sd:
349 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
350 ; AVX2-NEXT: # xmm0 = mem[0],zero
351 ; AVX2-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
352 ; AVX2-NEXT: # xmm0 = (xmm0 * mem) + xmm0
353 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
354 ; AVX2-NEXT: retq # encoding: [0xc3]
356 ; AVX512-LABEL: fmadd_aba_sd:
358 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
359 ; AVX512-NEXT: # xmm0 = mem[0],zero
360 ; AVX512-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
361 ; AVX512-NEXT: # xmm0 = (xmm0 * mem) + xmm0
362 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
363 ; AVX512-NEXT: retq # encoding: [0xc3]
364 %a.val = load double, ptr %a
365 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
366 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
368 %b.val = load double, ptr %b
369 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
370 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
372 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
374 %sr = extractelement <2 x double> %vr, i32 0
375 store double %sr, ptr %a
379 define void @fmsub_aab_sd(ptr %a, ptr %b) {
380 ; AVX2-LABEL: fmsub_aab_sd:
382 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
383 ; AVX2-NEXT: # xmm0 = mem[0],zero
384 ; AVX2-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xab,0x06]
385 ; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) - mem
386 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
387 ; AVX2-NEXT: retq # encoding: [0xc3]
389 ; AVX512-LABEL: fmsub_aab_sd:
391 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
392 ; AVX512-NEXT: # xmm0 = mem[0],zero
393 ; AVX512-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0x06]
394 ; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) - mem
395 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
396 ; AVX512-NEXT: retq # encoding: [0xc3]
397 %a.val = load double, ptr %a
398 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
399 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
401 %b.val = load double, ptr %b
402 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
403 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
405 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
407 %sr = extractelement <2 x double> %vr, i32 0
408 store double %sr, ptr %a
412 define void @fmsub_aba_sd(ptr %a, ptr %b) {
413 ; AVX2-LABEL: fmsub_aba_sd:
415 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
416 ; AVX2-NEXT: # xmm0 = mem[0],zero
417 ; AVX2-NEXT: vfmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbb,0x06]
418 ; AVX2-NEXT: # xmm0 = (xmm0 * mem) - xmm0
419 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
420 ; AVX2-NEXT: retq # encoding: [0xc3]
422 ; AVX512-LABEL: fmsub_aba_sd:
424 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
425 ; AVX512-NEXT: # xmm0 = mem[0],zero
426 ; AVX512-NEXT: vfmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0x06]
427 ; AVX512-NEXT: # xmm0 = (xmm0 * mem) - xmm0
428 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
429 ; AVX512-NEXT: retq # encoding: [0xc3]
430 %a.val = load double, ptr %a
431 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
432 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
434 %b.val = load double, ptr %b
435 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
436 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
438 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
440 %sr = extractelement <2 x double> %vr, i32 0
441 store double %sr, ptr %a
445 define void @fnmadd_aab_sd(ptr %a, ptr %b) {
446 ; AVX2-LABEL: fnmadd_aab_sd:
448 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
449 ; AVX2-NEXT: # xmm0 = mem[0],zero
450 ; AVX2-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xad,0x06]
451 ; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) + mem
452 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
453 ; AVX2-NEXT: retq # encoding: [0xc3]
455 ; AVX512-LABEL: fnmadd_aab_sd:
457 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
458 ; AVX512-NEXT: # xmm0 = mem[0],zero
459 ; AVX512-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0x06]
460 ; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) + mem
461 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
462 ; AVX512-NEXT: retq # encoding: [0xc3]
463 %a.val = load double, ptr %a
464 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
465 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
467 %b.val = load double, ptr %b
468 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
469 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
471 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
473 %sr = extractelement <2 x double> %vr, i32 0
474 store double %sr, ptr %a
478 define void @fnmadd_aba_sd(ptr %a, ptr %b) {
479 ; AVX2-LABEL: fnmadd_aba_sd:
481 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
482 ; AVX2-NEXT: # xmm0 = mem[0],zero
483 ; AVX2-NEXT: vfnmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbd,0x06]
484 ; AVX2-NEXT: # xmm0 = -(xmm0 * mem) + xmm0
485 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
486 ; AVX2-NEXT: retq # encoding: [0xc3]
488 ; AVX512-LABEL: fnmadd_aba_sd:
490 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
491 ; AVX512-NEXT: # xmm0 = mem[0],zero
492 ; AVX512-NEXT: vfnmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbd,0x06]
493 ; AVX512-NEXT: # xmm0 = -(xmm0 * mem) + xmm0
494 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
495 ; AVX512-NEXT: retq # encoding: [0xc3]
496 %a.val = load double, ptr %a
497 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
498 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
500 %b.val = load double, ptr %b
501 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
502 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
504 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
506 %sr = extractelement <2 x double> %vr, i32 0
507 store double %sr, ptr %a
511 define void @fnmsub_aab_sd(ptr %a, ptr %b) {
512 ; AVX2-LABEL: fnmsub_aab_sd:
514 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
515 ; AVX2-NEXT: # xmm0 = mem[0],zero
516 ; AVX2-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xaf,0x06]
517 ; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) - mem
518 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
519 ; AVX2-NEXT: retq # encoding: [0xc3]
521 ; AVX512-LABEL: fnmsub_aab_sd:
523 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
524 ; AVX512-NEXT: # xmm0 = mem[0],zero
525 ; AVX512-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0x06]
526 ; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) - mem
527 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
528 ; AVX512-NEXT: retq # encoding: [0xc3]
529 %a.val = load double, ptr %a
530 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
531 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
533 %b.val = load double, ptr %b
534 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
535 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
537 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
539 %sr = extractelement <2 x double> %vr, i32 0
540 store double %sr, ptr %a
544 define void @fnmsub_aba_sd(ptr %a, ptr %b) {
545 ; AVX2-LABEL: fnmsub_aba_sd:
547 ; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
548 ; AVX2-NEXT: # xmm0 = mem[0],zero
549 ; AVX2-NEXT: vfnmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbf,0x06]
550 ; AVX2-NEXT: # xmm0 = -(xmm0 * mem) - xmm0
551 ; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
552 ; AVX2-NEXT: retq # encoding: [0xc3]
554 ; AVX512-LABEL: fnmsub_aba_sd:
556 ; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
557 ; AVX512-NEXT: # xmm0 = mem[0],zero
558 ; AVX512-NEXT: vfnmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0x06]
559 ; AVX512-NEXT: # xmm0 = -(xmm0 * mem) - xmm0
560 ; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
561 ; AVX512-NEXT: retq # encoding: [0xc3]
562 %a.val = load double, ptr %a
563 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
564 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
566 %b.val = load double, ptr %b
567 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
568 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
570 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
572 %sr = extractelement <2 x double> %vr, i32 0
573 store double %sr, ptr %a