1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2,fma | FileCheck %s
4 define float @fneg_v4f32(<4 x float> %x) nounwind {
5 ; CHECK-LABEL: fneg_v4f32:
7 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
8 ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0
10 %v = fneg <4 x float> %x
11 %r = extractelement <4 x float> %v, i32 0
15 define double @fneg_v4f64(<4 x double> %x) nounwind {
16 ; CHECK-LABEL: fneg_v4f64:
18 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
19 ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0
20 ; CHECK-NEXT: vzeroupper
22 %v = fneg <4 x double> %x
23 %r = extractelement <4 x double> %v, i32 0
27 define float @fadd_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
28 ; CHECK-LABEL: fadd_v4f32:
30 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
32 %v = fadd <4 x float> %x, %y
33 %r = extractelement <4 x float> %v, i32 0
37 define double @fadd_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
38 ; CHECK-LABEL: fadd_v4f64:
40 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
41 ; CHECK-NEXT: vzeroupper
43 %v = fadd <4 x double> %x, %y
44 %r = extractelement <4 x double> %v, i32 0
48 define float @fsub_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
49 ; CHECK-LABEL: fsub_v4f32:
51 ; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0
53 %v = fsub <4 x float> %x, %y
54 %r = extractelement <4 x float> %v, i32 0
58 define double @fsub_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
59 ; CHECK-LABEL: fsub_v4f64:
61 ; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0
62 ; CHECK-NEXT: vzeroupper
64 %v = fsub <4 x double> %x, %y
65 %r = extractelement <4 x double> %v, i32 0
69 define float @fmul_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
70 ; CHECK-LABEL: fmul_v4f32:
72 ; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0
74 %v = fmul <4 x float> %x, %y
75 %r = extractelement <4 x float> %v, i32 0
79 define double @fmul_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
80 ; CHECK-LABEL: fmul_v4f64:
82 ; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0
83 ; CHECK-NEXT: vzeroupper
85 %v = fmul <4 x double> %x, %y
86 %r = extractelement <4 x double> %v, i32 0
90 define float @fdiv_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
91 ; CHECK-LABEL: fdiv_v4f32:
93 ; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0
95 %v = fdiv <4 x float> %x, %y
96 %r = extractelement <4 x float> %v, i32 0
100 define double @fdiv_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
101 ; CHECK-LABEL: fdiv_v4f64:
103 ; CHECK-NEXT: vdivpd %xmm1, %xmm0, %xmm0
104 ; CHECK-NEXT: vzeroupper
106 %v = fdiv <4 x double> %x, %y
107 %r = extractelement <4 x double> %v, i32 0
111 define float @frem_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
112 ; CHECK-LABEL: frem_v4f32:
114 ; CHECK-NEXT: jmp fmodf # TAILCALL
115 %v = frem <4 x float> %x, %y
116 %r = extractelement <4 x float> %v, i32 0
120 define double @frem_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
121 ; CHECK-LABEL: frem_v4f64:
123 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
124 ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1
125 ; CHECK-NEXT: vzeroupper
126 ; CHECK-NEXT: jmp fmod # TAILCALL
127 %v = frem <4 x double> %x, %y
128 %r = extractelement <4 x double> %v, i32 0
132 define float @fsqrt_v4f32(<4 x float> %x) nounwind {
133 ; CHECK-LABEL: fsqrt_v4f32:
135 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0
137 %v = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
138 %r = extractelement <4 x float> %v, i32 0
142 define double @fsqrt_v4f64(<4 x double> %x) nounwind {
143 ; CHECK-LABEL: fsqrt_v4f64:
145 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
146 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
147 ; CHECK-NEXT: vzeroupper
149 %v = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %x)
150 %r = extractelement <4 x double> %v, i32 0
154 define float @fsin_v4f32(<4 x float> %x) nounwind {
155 ; CHECK-LABEL: fsin_v4f32:
157 ; CHECK-NEXT: jmp sinf # TAILCALL
158 %v = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
159 %r = extractelement <4 x float> %v, i32 0
163 define double @fsin_v4f64(<4 x double> %x) nounwind {
164 ; CHECK-LABEL: fsin_v4f64:
166 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
167 ; CHECK-NEXT: vzeroupper
168 ; CHECK-NEXT: jmp sin # TAILCALL
169 %v = call <4 x double> @llvm.sin.v4f64(<4 x double> %x)
170 %r = extractelement <4 x double> %v, i32 0
174 define float @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) nounwind {
175 ; CHECK-LABEL: fma_v4f32:
177 ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
179 %v = call <4 x float> @llvm.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z)
180 %r = extractelement <4 x float> %v, i32 0
184 define double @fma_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) nounwind {
185 ; CHECK-LABEL: fma_v4f64:
187 ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
188 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
189 ; CHECK-NEXT: vzeroupper
191 %v = call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z)
192 %r = extractelement <4 x double> %v, i32 0
196 define float @fabs_v4f32(<4 x float> %x) nounwind {
197 ; CHECK-LABEL: fabs_v4f32:
199 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
200 ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0
202 %v = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
203 %r = extractelement <4 x float> %v, i32 0
207 define double @fabs_v4f64(<4 x double> %x) nounwind {
208 ; CHECK-LABEL: fabs_v4f64:
210 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
211 ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0
212 ; CHECK-NEXT: vzeroupper
214 %v = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
215 %r = extractelement <4 x double> %v, i32 0
219 define float @fmaxnum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
220 ; CHECK-LABEL: fmaxnum_v4f32:
222 ; CHECK-NEXT: vmaxps %xmm0, %xmm1, %xmm2
223 ; CHECK-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
224 ; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
226 %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
227 %r = extractelement <4 x float> %v, i32 0
231 define double @fmaxnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
232 ; CHECK-LABEL: fmaxnum_v4f64:
234 ; CHECK-NEXT: vmaxpd %ymm0, %ymm1, %ymm2
235 ; CHECK-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
236 ; CHECK-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
237 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
238 ; CHECK-NEXT: vzeroupper
240 %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y)
241 %r = extractelement <4 x double> %v, i32 0
245 define float @fminnum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
246 ; CHECK-LABEL: fminnum_v4f32:
248 ; CHECK-NEXT: vminps %xmm0, %xmm1, %xmm2
249 ; CHECK-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
250 ; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
252 %v = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
253 %r = extractelement <4 x float> %v, i32 0
257 define double @fminnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
258 ; CHECK-LABEL: fminnum_v4f64:
260 ; CHECK-NEXT: vminpd %ymm0, %ymm1, %ymm2
261 ; CHECK-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
262 ; CHECK-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
263 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
264 ; CHECK-NEXT: vzeroupper
266 %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y)
267 %r = extractelement <4 x double> %v, i32 0
271 ;define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
272 ; %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
273 ; %r = extractelement <4 x float> %v, i32 0
277 ;define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
278 ; %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %x, <4 x double> %y)
279 ; %r = extractelement <4 x double> %v, i32 0
283 ;define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
284 ; %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
285 ; %r = extractelement <4 x float> %v, i32 0
289 ;define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
290 ; %v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %x, <4 x double> %y)
291 ; %r = extractelement <4 x double> %v, i32 0
295 define float @copysign_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
296 ; CHECK-LABEL: copysign_v4f32:
298 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
299 ; CHECK-NEXT: vandps %xmm2, %xmm1, %xmm1
300 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
301 ; CHECK-NEXT: vandps %xmm2, %xmm0, %xmm0
302 ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
304 %v = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %y)
305 %r = extractelement <4 x float> %v, i32 0
309 define double @copysign_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
310 ; CHECK-LABEL: copysign_v4f64:
312 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
313 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
314 ; CHECK-NEXT: vandps %xmm3, %xmm1, %xmm1
315 ; CHECK-NEXT: vandps %xmm2, %xmm0, %xmm0
316 ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
317 ; CHECK-NEXT: vzeroupper
319 %v = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %y)
320 %r = extractelement <4 x double> %v, i32 0
324 define float @floor_v4f32(<4 x float> %x) nounwind {
325 ; CHECK-LABEL: floor_v4f32:
327 ; CHECK-NEXT: vroundps $9, %xmm0, %xmm0
329 %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %x)
330 %r = extractelement <4 x float> %v, i32 0
334 define double @floor_v4f64(<4 x double> %x) nounwind {
335 ; CHECK-LABEL: floor_v4f64:
337 ; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0
338 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
339 ; CHECK-NEXT: vzeroupper
341 %v = call <4 x double> @llvm.floor.v4f64(<4 x double> %x)
342 %r = extractelement <4 x double> %v, i32 0
346 define float @ceil_v4f32(<4 x float> %x) nounwind {
347 ; CHECK-LABEL: ceil_v4f32:
349 ; CHECK-NEXT: vroundps $10, %xmm0, %xmm0
351 %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
352 %r = extractelement <4 x float> %v, i32 0
356 define double @ceil_v4f64(<4 x double> %x) nounwind {
357 ; CHECK-LABEL: ceil_v4f64:
359 ; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0
360 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
361 ; CHECK-NEXT: vzeroupper
363 %v = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x)
364 %r = extractelement <4 x double> %v, i32 0
368 define float @trunc_v4f32(<4 x float> %x) nounwind {
369 ; CHECK-LABEL: trunc_v4f32:
371 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
373 %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x)
374 %r = extractelement <4 x float> %v, i32 0
378 define double @trunc_v4f64(<4 x double> %x) nounwind {
379 ; CHECK-LABEL: trunc_v4f64:
381 ; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
382 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
383 ; CHECK-NEXT: vzeroupper
385 %v = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x)
386 %r = extractelement <4 x double> %v, i32 0
390 define float @rint_v4f32(<4 x float> %x) nounwind {
391 ; CHECK-LABEL: rint_v4f32:
393 ; CHECK-NEXT: vroundps $4, %xmm0, %xmm0
395 %v = call <4 x float> @llvm.rint.v4f32(<4 x float> %x)
396 %r = extractelement <4 x float> %v, i32 0
400 define double @rint_v4f64(<4 x double> %x) nounwind {
401 ; CHECK-LABEL: rint_v4f64:
403 ; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0
404 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
405 ; CHECK-NEXT: vzeroupper
407 %v = call <4 x double> @llvm.rint.v4f64(<4 x double> %x)
408 %r = extractelement <4 x double> %v, i32 0
412 define float @nearbyint_v4f32(<4 x float> %x) nounwind {
413 ; CHECK-LABEL: nearbyint_v4f32:
415 ; CHECK-NEXT: vroundps $12, %xmm0, %xmm0
417 %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x)
418 %r = extractelement <4 x float> %v, i32 0
422 define double @nearbyint_v4f64(<4 x double> %x) nounwind {
423 ; CHECK-LABEL: nearbyint_v4f64:
425 ; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0
426 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
427 ; CHECK-NEXT: vzeroupper
429 %v = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x)
430 %r = extractelement <4 x double> %v, i32 0
434 define float @round_v4f32(<4 x float> %x) nounwind {
435 ; CHECK-LABEL: round_v4f32:
437 ; CHECK-NEXT: jmp roundf # TAILCALL
438 %v = call <4 x float> @llvm.round.v4f32(<4 x float> %x)
439 %r = extractelement <4 x float> %v, i32 0
443 define double @round_v4f64(<4 x double> %x) nounwind {
444 ; CHECK-LABEL: round_v4f64:
446 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
447 ; CHECK-NEXT: vzeroupper
448 ; CHECK-NEXT: jmp round # TAILCALL
449 %v = call <4 x double> @llvm.round.v4f64(<4 x double> %x)
450 %r = extractelement <4 x double> %v, i32 0
454 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
455 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
456 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
457 declare <4 x double> @llvm.sin.v4f64(<4 x double>)
458 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
459 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
460 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
461 declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
462 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
463 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
464 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
465 declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
466 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
467 declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
468 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
469 declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
470 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
471 declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
472 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
473 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
474 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
475 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
476 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
477 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
478 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
479 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
480 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
481 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
482 declare <4 x float> @llvm.round.v4f32(<4 x float>)
483 declare <4 x double> @llvm.round.v4f64(<4 x double>)