1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma -show-mc-encoding | FileCheck %s
4 define <2 x double> @combine_scalar_mask_fmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
5 ; CHECK-LABEL: combine_scalar_mask_fmadd_f32:
6 ; CHECK: # %bb.0: # %entry
7 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
9 ; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + xmm2
10 ; CHECK-NEXT: retq # encoding: [0xc3]
12 %0 = bitcast <2 x double> %a to <4 x float>
13 %1 = bitcast <2 x double> %b to <4 x float>
14 %2 = bitcast <2 x double> %c to <4 x float>
15 %3 = extractelement <4 x float> %0, i64 0
16 %4 = extractelement <4 x float> %1, i64 0
17 %5 = extractelement <4 x float> %2, i64 0
18 %6 = fmul fast float %4, %3
19 %7 = fadd fast float %6, %5
20 %8 = bitcast i8 %k to <8 x i1>
21 %9 = extractelement <8 x i1> %8, i64 0
22 %10 = select i1 %9, float %7, float %3
23 %11 = insertelement <4 x float> %0, float %10, i64 0
24 %12 = bitcast <4 x float> %11 to <2 x double>
28 define <2 x double> @combine_scalar_mask_fmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
29 ; CHECK-LABEL: combine_scalar_mask_fmadd_f64:
30 ; CHECK: # %bb.0: # %entry
31 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
32 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
33 ; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + xmm2
34 ; CHECK-NEXT: retq # encoding: [0xc3]
36 %0 = extractelement <2 x double> %a, i64 0
37 %1 = extractelement <2 x double> %b, i64 0
38 %2 = extractelement <2 x double> %c, i64 0
39 %3 = fmul fast double %1, %0
40 %4 = fadd fast double %3, %2
41 %5 = bitcast i8 %k to <8 x i1>
42 %6 = extractelement <8 x i1> %5, i64 0
43 %7 = select i1 %6, double %4, double %0
44 %8 = insertelement <2 x double> %a, double %7, i64 0
48 define <2 x double> @combine_scalar_maskz_fmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
49 ; CHECK-LABEL: combine_scalar_maskz_fmadd_32:
50 ; CHECK: # %bb.0: # %entry
51 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
52 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
53 ; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
54 ; CHECK-NEXT: retq # encoding: [0xc3]
56 %0 = bitcast <2 x double> %a to <4 x float>
57 %1 = bitcast <2 x double> %b to <4 x float>
58 %2 = bitcast <2 x double> %c to <4 x float>
59 %3 = extractelement <4 x float> %0, i64 0
60 %4 = extractelement <4 x float> %1, i64 0
61 %5 = extractelement <4 x float> %2, i64 0
62 %6 = fmul fast float %4, %3
63 %7 = fadd fast float %6, %5
64 %8 = bitcast i8 %k to <8 x i1>
65 %9 = extractelement <8 x i1> %8, i64 0
66 %10 = select i1 %9, float %7, float 0.000000e+00
67 %11 = insertelement <4 x float> %0, float %10, i64 0
68 %12 = bitcast <4 x float> %11 to <2 x double>
72 define <2 x double> @combine_scalar_maskz_fmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
73 ; CHECK-LABEL: combine_scalar_maskz_fmadd_64:
74 ; CHECK: # %bb.0: # %entry
75 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
76 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
77 ; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
78 ; CHECK-NEXT: retq # encoding: [0xc3]
80 %0 = extractelement <2 x double> %a, i64 0
81 %1 = extractelement <2 x double> %b, i64 0
82 %2 = extractelement <2 x double> %c, i64 0
83 %3 = fmul fast double %1, %0
84 %4 = fadd fast double %3, %2
85 %5 = bitcast i8 %k to <8 x i1>
86 %6 = extractelement <8 x i1> %5, i64 0
87 %7 = select i1 %6, double %4, double 0.000000e+00
88 %8 = insertelement <2 x double> %a, double %7, i64 0
92 define <2 x double> @combine_scalar_mask3_fmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
93 ; CHECK-LABEL: combine_scalar_mask3_fmadd_32:
94 ; CHECK: # %bb.0: # %entry
95 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
96 ; CHECK-NEXT: vfmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xb9,0xd0]
97 ; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) + xmm2
98 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
99 ; CHECK-NEXT: retq # encoding: [0xc3]
101 %0 = bitcast <2 x double> %a to <4 x float>
102 %1 = bitcast <2 x double> %b to <4 x float>
103 %2 = bitcast <2 x double> %c to <4 x float>
104 %3 = extractelement <4 x float> %0, i64 0
105 %4 = extractelement <4 x float> %1, i64 0
106 %5 = extractelement <4 x float> %2, i64 0
107 %6 = fmul fast float %4, %3
108 %7 = fadd fast float %6, %5
109 %8 = bitcast i8 %k to <8 x i1>
110 %9 = extractelement <8 x i1> %8, i64 0
111 %10 = select i1 %9, float %7, float %5
112 %11 = insertelement <4 x float> %2, float %10, i64 0
113 %12 = bitcast <4 x float> %11 to <2 x double>
117 define <2 x double> @combine_scalar_mask3_fmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
118 ; CHECK-LABEL: combine_scalar_mask3_fmadd_64:
119 ; CHECK: # %bb.0: # %entry
120 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
121 ; CHECK-NEXT: vfmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb9,0xd0]
122 ; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) + xmm2
123 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
124 ; CHECK-NEXT: retq # encoding: [0xc3]
126 %0 = extractelement <2 x double> %a, i64 0
127 %1 = extractelement <2 x double> %b, i64 0
128 %2 = extractelement <2 x double> %c, i64 0
129 %3 = fmul fast double %1, %0
130 %4 = fadd fast double %3, %2
131 %5 = bitcast i8 %k to <8 x i1>
132 %6 = extractelement <8 x i1> %5, i64 0
133 %7 = select i1 %6, double %4, double %2
134 %8 = insertelement <2 x double> %c, double %7, i64 0
138 define <2 x double> @combine_scalar_mask_fmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
139 ; CHECK-LABEL: combine_scalar_mask_fmsub_f32:
140 ; CHECK: # %bb.0: # %entry
141 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
142 ; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
143 ; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) - xmm2
144 ; CHECK-NEXT: retq # encoding: [0xc3]
146 %0 = bitcast <2 x double> %a to <4 x float>
147 %1 = bitcast <2 x double> %b to <4 x float>
148 %2 = bitcast <2 x double> %c to <4 x float>
149 %3 = extractelement <4 x float> %0, i64 0
150 %4 = extractelement <4 x float> %1, i64 0
151 %5 = extractelement <4 x float> %2, i64 0
152 %6 = fmul fast float %4, %3
153 %7 = fsub fast float %6, %5
154 %8 = bitcast i8 %k to <8 x i1>
155 %9 = extractelement <8 x i1> %8, i64 0
156 %10 = select i1 %9, float %7, float %3
157 %11 = insertelement <4 x float> %0, float %10, i64 0
158 %12 = bitcast <4 x float> %11 to <2 x double>
162 define <2 x double> @combine_scalar_mask_fmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
163 ; CHECK-LABEL: combine_scalar_mask_fmsub_f64:
164 ; CHECK: # %bb.0: # %entry
165 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
166 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
167 ; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) - xmm2
168 ; CHECK-NEXT: retq # encoding: [0xc3]
170 %0 = extractelement <2 x double> %a, i64 0
171 %1 = extractelement <2 x double> %b, i64 0
172 %2 = extractelement <2 x double> %c, i64 0
173 %3 = fmul fast double %1, %0
174 %4 = fsub fast double %3, %2
175 %5 = bitcast i8 %k to <8 x i1>
176 %6 = extractelement <8 x i1> %5, i64 0
177 %7 = select i1 %6, double %4, double %0
178 %8 = insertelement <2 x double> %a, double %7, i64 0
182 define <2 x double> @combine_scalar_maskz_fmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
183 ; CHECK-LABEL: combine_scalar_maskz_fmsub_32:
184 ; CHECK: # %bb.0: # %entry
185 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
186 ; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
187 ; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
188 ; CHECK-NEXT: retq # encoding: [0xc3]
190 %0 = bitcast <2 x double> %a to <4 x float>
191 %1 = bitcast <2 x double> %b to <4 x float>
192 %2 = bitcast <2 x double> %c to <4 x float>
193 %3 = extractelement <4 x float> %0, i64 0
194 %4 = extractelement <4 x float> %1, i64 0
195 %5 = extractelement <4 x float> %2, i64 0
196 %6 = fmul fast float %4, %3
197 %7 = fsub fast float %6, %5
198 %8 = bitcast i8 %k to <8 x i1>
199 %9 = extractelement <8 x i1> %8, i64 0
200 %10 = select i1 %9, float %7, float 0.000000e+00
201 %11 = insertelement <4 x float> %0, float %10, i64 0
202 %12 = bitcast <4 x float> %11 to <2 x double>
206 define <2 x double> @combine_scalar_maskz_fmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
207 ; CHECK-LABEL: combine_scalar_maskz_fmsub_64:
208 ; CHECK: # %bb.0: # %entry
209 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
210 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
211 ; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
212 ; CHECK-NEXT: retq # encoding: [0xc3]
214 %0 = extractelement <2 x double> %a, i64 0
215 %1 = extractelement <2 x double> %b, i64 0
216 %2 = extractelement <2 x double> %c, i64 0
217 %3 = fmul fast double %1, %0
218 %4 = fsub fast double %3, %2
219 %5 = bitcast i8 %k to <8 x i1>
220 %6 = extractelement <8 x i1> %5, i64 0
221 %7 = select i1 %6, double %4, double 0.000000e+00
222 %8 = insertelement <2 x double> %a, double %7, i64 0
226 define <2 x double> @combine_scalar_mask3_fmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
227 ; CHECK-LABEL: combine_scalar_mask3_fmsub_32:
228 ; CHECK: # %bb.0: # %entry
229 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
230 ; CHECK-NEXT: vfmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbb,0xd0]
231 ; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) - xmm2
232 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
233 ; CHECK-NEXT: retq # encoding: [0xc3]
235 %0 = bitcast <2 x double> %a to <4 x float>
236 %1 = bitcast <2 x double> %b to <4 x float>
237 %2 = bitcast <2 x double> %c to <4 x float>
238 %3 = extractelement <4 x float> %0, i64 0
239 %4 = extractelement <4 x float> %1, i64 0
240 %5 = extractelement <4 x float> %2, i64 0
241 %6 = fmul fast float %4, %3
242 %7 = fsub fast float %6, %5
243 %8 = bitcast i8 %k to <8 x i1>
244 %9 = extractelement <8 x i1> %8, i64 0
245 %10 = select i1 %9, float %7, float %5
246 %11 = insertelement <4 x float> %2, float %10, i64 0
247 %12 = bitcast <4 x float> %11 to <2 x double>
251 define <2 x double> @combine_scalar_mask3_fmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
252 ; CHECK-LABEL: combine_scalar_mask3_fmsub_64:
253 ; CHECK: # %bb.0: # %entry
254 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
255 ; CHECK-NEXT: vfmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbb,0xd0]
256 ; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) - xmm2
257 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
258 ; CHECK-NEXT: retq # encoding: [0xc3]
260 %0 = extractelement <2 x double> %a, i64 0
261 %1 = extractelement <2 x double> %b, i64 0
262 %2 = extractelement <2 x double> %c, i64 0
263 %3 = fmul fast double %1, %0
264 %4 = fsub fast double %3, %2
265 %5 = bitcast i8 %k to <8 x i1>
266 %6 = extractelement <8 x i1> %5, i64 0
267 %7 = select i1 %6, double %4, double %2
268 %8 = insertelement <2 x double> %c, double %7, i64 0
272 define <2 x double> @combine_scalar_mask_fnmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
273 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f32:
274 ; CHECK: # %bb.0: # %entry
275 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
276 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
277 ; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
278 ; CHECK-NEXT: retq # encoding: [0xc3]
280 %0 = bitcast <2 x double> %a to <4 x float>
281 %1 = bitcast <2 x double> %b to <4 x float>
282 %2 = bitcast <2 x double> %c to <4 x float>
283 %3 = extractelement <4 x float> %0, i64 0
284 %4 = extractelement <4 x float> %1, i64 0
285 %5 = extractelement <4 x float> %2, i64 0
286 %6 = fmul fast float %4, %3
287 %7 = fsub fast float %5, %6
288 %8 = bitcast i8 %k to <8 x i1>
289 %9 = extractelement <8 x i1> %8, i64 0
290 %10 = select i1 %9, float %7, float %3
291 %11 = insertelement <4 x float> %0, float %10, i64 0
292 %12 = bitcast <4 x float> %11 to <2 x double>
296 define <2 x double> @combine_scalar_mask_fnmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
297 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f64:
298 ; CHECK: # %bb.0: # %entry
299 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
300 ; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
301 ; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
302 ; CHECK-NEXT: retq # encoding: [0xc3]
304 %0 = extractelement <2 x double> %a, i64 0
305 %1 = extractelement <2 x double> %b, i64 0
306 %2 = extractelement <2 x double> %c, i64 0
307 %3 = fmul fast double %1, %0
308 %4 = fsub fast double %2, %3
309 %5 = bitcast i8 %k to <8 x i1>
310 %6 = extractelement <8 x i1> %5, i64 0
311 %7 = select i1 %6, double %4, double %0
312 %8 = insertelement <2 x double> %a, double %7, i64 0
316 define <2 x double> @combine_scalar_maskz_fnmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
317 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_32:
318 ; CHECK: # %bb.0: # %entry
319 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
320 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
321 ; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
322 ; CHECK-NEXT: retq # encoding: [0xc3]
324 %0 = bitcast <2 x double> %a to <4 x float>
325 %1 = bitcast <2 x double> %b to <4 x float>
326 %2 = bitcast <2 x double> %c to <4 x float>
327 %3 = extractelement <4 x float> %0, i64 0
328 %4 = extractelement <4 x float> %1, i64 0
329 %5 = extractelement <4 x float> %2, i64 0
330 %6 = fmul fast float %4, %3
331 %7 = fsub fast float %5, %6
332 %8 = bitcast i8 %k to <8 x i1>
333 %9 = extractelement <8 x i1> %8, i64 0
334 %10 = select i1 %9, float %7, float 0.000000e+00
335 %11 = insertelement <4 x float> %0, float %10, i64 0
336 %12 = bitcast <4 x float> %11 to <2 x double>
340 define <2 x double> @combine_scalar_maskz_fnmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
341 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_64:
342 ; CHECK: # %bb.0: # %entry
343 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
344 ; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
345 ; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
346 ; CHECK-NEXT: retq # encoding: [0xc3]
348 %0 = extractelement <2 x double> %a, i64 0
349 %1 = extractelement <2 x double> %b, i64 0
350 %2 = extractelement <2 x double> %c, i64 0
351 %3 = fmul fast double %1, %0
352 %4 = fsub fast double %2, %3
353 %5 = bitcast i8 %k to <8 x i1>
354 %6 = extractelement <8 x i1> %5, i64 0
355 %7 = select i1 %6, double %4, double 0.000000e+00
356 %8 = insertelement <2 x double> %a, double %7, i64 0
360 define <2 x double> @combine_scalar_mask3_fnmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
361 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_32:
362 ; CHECK: # %bb.0: # %entry
363 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
364 ; CHECK-NEXT: vfnmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbd,0xd0]
365 ; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) + xmm2
366 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
367 ; CHECK-NEXT: retq # encoding: [0xc3]
369 %0 = bitcast <2 x double> %a to <4 x float>
370 %1 = bitcast <2 x double> %b to <4 x float>
371 %2 = bitcast <2 x double> %c to <4 x float>
372 %3 = extractelement <4 x float> %0, i64 0
373 %4 = extractelement <4 x float> %1, i64 0
374 %5 = extractelement <4 x float> %2, i64 0
375 %6 = fmul fast float %4, %3
376 %7 = fsub fast float %5, %6
377 %8 = bitcast i8 %k to <8 x i1>
378 %9 = extractelement <8 x i1> %8, i64 0
379 %10 = select i1 %9, float %7, float %5
380 %11 = insertelement <4 x float> %2, float %10, i64 0
381 %12 = bitcast <4 x float> %11 to <2 x double>
385 define <2 x double> @combine_scalar_mask3_fnmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
386 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_64:
387 ; CHECK: # %bb.0: # %entry
388 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
389 ; CHECK-NEXT: vfnmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbd,0xd0]
390 ; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) + xmm2
391 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
392 ; CHECK-NEXT: retq # encoding: [0xc3]
394 %0 = extractelement <2 x double> %a, i64 0
395 %1 = extractelement <2 x double> %b, i64 0
396 %2 = extractelement <2 x double> %c, i64 0
397 %3 = fmul fast double %1, %0
398 %4 = fsub fast double %2, %3
399 %5 = bitcast i8 %k to <8 x i1>
400 %6 = extractelement <8 x i1> %5, i64 0
401 %7 = select i1 %6, double %4, double %2
402 %8 = insertelement <2 x double> %c, double %7, i64 0
406 define <2 x double> @combine_scalar_mask_fnmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
407 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f32:
408 ; CHECK: # %bb.0: # %entry
409 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
410 ; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
411 ; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
412 ; CHECK-NEXT: retq # encoding: [0xc3]
414 %0 = bitcast <2 x double> %a to <4 x float>
415 %1 = bitcast <2 x double> %b to <4 x float>
416 %2 = bitcast <2 x double> %c to <4 x float>
417 %3 = extractelement <4 x float> %0, i64 0
418 %4 = extractelement <4 x float> %1, i64 0
419 %5 = extractelement <4 x float> %2, i64 0
420 %sub = fsub fast float -0.000000e+00, %5
421 %6 = fmul fast float %4, %3
422 %7 = fsub fast float %sub, %6
423 %8 = bitcast i8 %k to <8 x i1>
424 %9 = extractelement <8 x i1> %8, i64 0
425 %10 = select i1 %9, float %7, float %3
426 %11 = insertelement <4 x float> %0, float %10, i64 0
427 %12 = bitcast <4 x float> %11 to <2 x double>
431 define <2 x double> @combine_scalar_mask_fnmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
432 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f64:
433 ; CHECK: # %bb.0: # %entry
434 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
435 ; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
436 ; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
437 ; CHECK-NEXT: retq # encoding: [0xc3]
439 %0 = extractelement <2 x double> %a, i64 0
440 %1 = extractelement <2 x double> %b, i64 0
441 %2 = extractelement <2 x double> %c, i64 0
442 %sub = fsub fast double -0.000000e+00, %2
443 %3 = fmul fast double %1, %0
444 %4 = fsub fast double %sub, %3
445 %5 = bitcast i8 %k to <8 x i1>
446 %6 = extractelement <8 x i1> %5, i64 0
447 %7 = select i1 %6, double %4, double %0
448 %8 = insertelement <2 x double> %a, double %7, i64 0
452 define <2 x double> @combine_scalar_maskz_fnmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
453 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_32:
454 ; CHECK: # %bb.0: # %entry
455 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
456 ; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
457 ; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
458 ; CHECK-NEXT: retq # encoding: [0xc3]
460 %0 = bitcast <2 x double> %a to <4 x float>
461 %1 = bitcast <2 x double> %b to <4 x float>
462 %2 = bitcast <2 x double> %c to <4 x float>
463 %3 = extractelement <4 x float> %0, i64 0
464 %4 = extractelement <4 x float> %1, i64 0
465 %5 = extractelement <4 x float> %2, i64 0
466 %sub = fsub fast float -0.000000e+00, %5
467 %6 = fmul fast float %4, %3
468 %7 = fsub fast float %sub, %6
469 %8 = bitcast i8 %k to <8 x i1>
470 %9 = extractelement <8 x i1> %8, i64 0
471 %10 = select i1 %9, float %7, float 0.000000e+00
472 %11 = insertelement <4 x float> %0, float %10, i64 0
473 %12 = bitcast <4 x float> %11 to <2 x double>
477 define <2 x double> @combine_scalar_maskz_fnmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
478 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_64:
479 ; CHECK: # %bb.0: # %entry
480 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
481 ; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
482 ; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
483 ; CHECK-NEXT: retq # encoding: [0xc3]
485 %0 = extractelement <2 x double> %a, i64 0
486 %1 = extractelement <2 x double> %b, i64 0
487 %2 = extractelement <2 x double> %c, i64 0
488 %sub = fsub fast double -0.000000e+00, %2
489 %3 = fmul fast double %1, %0
490 %4 = fsub fast double %sub, %3
491 %5 = bitcast i8 %k to <8 x i1>
492 %6 = extractelement <8 x i1> %5, i64 0
493 %7 = select i1 %6, double %4, double 0.000000e+00
494 %8 = insertelement <2 x double> %a, double %7, i64 0
498 define <2 x double> @combine_scalar_mask3_fnmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
499 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_32:
500 ; CHECK: # %bb.0: # %entry
501 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
502 ; CHECK-NEXT: vfnmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbf,0xd0]
503 ; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) - xmm2
504 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
505 ; CHECK-NEXT: retq # encoding: [0xc3]
507 %0 = bitcast <2 x double> %a to <4 x float>
508 %1 = bitcast <2 x double> %b to <4 x float>
509 %2 = bitcast <2 x double> %c to <4 x float>
510 %3 = extractelement <4 x float> %0, i64 0
511 %4 = extractelement <4 x float> %1, i64 0
512 %5 = extractelement <4 x float> %2, i64 0
513 %sub = fsub fast float -0.000000e+00, %5
514 %6 = fmul fast float %4, %3
515 %7 = fsub fast float %sub, %6
516 %8 = bitcast i8 %k to <8 x i1>
517 %9 = extractelement <8 x i1> %8, i64 0
518 %10 = select i1 %9, float %7, float %5
519 %11 = insertelement <4 x float> %2, float %10, i64 0
520 %12 = bitcast <4 x float> %11 to <2 x double>
524 define <2 x double> @combine_scalar_mask3_fnmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
525 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_64:
526 ; CHECK: # %bb.0: # %entry
527 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
528 ; CHECK-NEXT: vfnmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbf,0xd0]
529 ; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) - xmm2
530 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
531 ; CHECK-NEXT: retq # encoding: [0xc3]
533 %0 = extractelement <2 x double> %a, i64 0
534 %1 = extractelement <2 x double> %b, i64 0
535 %2 = extractelement <2 x double> %c, i64 0
536 %sub = fsub fast double -0.000000e+00, %2
537 %3 = fmul fast double %1, %0
538 %4 = fsub fast double %sub, %3
539 %5 = bitcast i8 %k to <8 x i1>
540 %6 = extractelement <8 x i1> %5, i64 0
541 %7 = select i1 %6, double %4, double %2
542 %8 = insertelement <2 x double> %c, double %7, i64 0
546 ; Don't fold into (fmul x, c1+c2) if reassoc not set
547 define float @fma_const_fmul(float %x) {
548 ; CHECK-LABEL: fma_const_fmul:
550 ; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0x0d,A,A,A,A]
551 ; CHECK-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
552 ; CHECK-NEXT: vfmadd132ss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x99,0x05,A,A,A,A]
553 ; CHECK-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
554 ; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
555 ; CHECK-NEXT: retq # encoding: [0xc3]
556 %mul1 = fmul contract float %x, 10.0
557 %mul2 = fmul contract float %x, 11.0
558 %add1 = fadd contract float %mul1, %mul2
562 ; Fold (fmul (fadd x, 1.0), y) -> (fma x, y, y) without FP specific command-line
564 define float @combine_fmul_distributive(float %x, float %y) {
565 ; CHECK-LABEL: combine_fmul_distributive:
567 ; CHECK-NEXT: vfmadd231ss %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xb9,0xc0]
568 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm0
569 ; CHECK-NEXT: retq # encoding: [0xc3]
570 %fadd = fadd ninf float %y, 1.0
571 %fmul = fmul contract float %fadd, %x