1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s
4 define <32 x half> @vaddph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
5 ; CHECK-LABEL: vaddph_512_test:
7 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
9 %x = fadd <32 x half> %i, %j
13 define <32 x half> @vaddph_512_fold_test(<32 x half> %i, ptr %j) nounwind {
14 ; CHECK-LABEL: vaddph_512_fold_test:
16 ; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0
18 %tmp = load <32 x half>, ptr %j, align 4
19 %x = fadd <32 x half> %i, %tmp
23 define <32 x half> @vaddph_512_broadc_test(<32 x half> %a) nounwind {
24 ; CHECK-LABEL: vaddph_512_broadc_test:
26 ; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm0, %zmm0
28 %b = fadd <32 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
32 define <16 x half> @vaddph_256_broadc_test(<16 x half> %a) nounwind {
33 ; CHECK-LABEL: vaddph_256_broadc_test:
35 ; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %ymm0, %ymm0
37 %b = fadd <16 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
41 define <8 x half> @vaddph_128_broadc_test(<8 x half> %a) nounwind {
42 ; CHECK-LABEL: vaddph_128_broadc_test:
44 ; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm0, %xmm0
46 %b = fadd <8 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
50 define <32 x half> @vaddph_512_mask_test1(<32 x half> %i, <32 x half> %j, <32 x i1> %mask) nounwind readnone {
51 ; CHECK-LABEL: vaddph_512_mask_test1:
53 ; CHECK-NEXT: vpsllw $7, %ymm2, %ymm2
54 ; CHECK-NEXT: vpmovb2m %ymm2, %k1
55 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1}
57 %x = fadd <32 x half> %i, %j
58 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
62 define <32 x half> @vaddph_512_mask_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
63 ; CHECK-LABEL: vaddph_512_mask_test:
65 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
66 ; CHECK-NEXT: vcmpneq_oqph %zmm3, %zmm2, %k1
67 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1}
69 %mask = fcmp one <32 x half> %mask1, zeroinitializer
70 %x = fadd <32 x half> %i, %j
71 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
75 define <32 x half> @vaddph_512_maskz_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
76 ; CHECK-LABEL: vaddph_512_maskz_test:
78 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
79 ; CHECK-NEXT: vcmpneq_oqph %zmm3, %zmm2, %k1
80 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1} {z}
82 %mask = fcmp one <32 x half> %mask1, zeroinitializer
83 %x = fadd <32 x half> %i, %j
84 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
88 define <32 x half> @vaddph_512_mask_fold_test(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
89 ; CHECK-LABEL: vaddph_512_mask_fold_test:
91 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
92 ; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
93 ; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1}
95 %mask = fcmp one <32 x half> %mask1, zeroinitializer
96 %j = load <32 x half>, ptr %j.ptr
97 %x = fadd <32 x half> %i, %j
98 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
102 define <32 x half> @vaddph_512_maskz_fold_test(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
103 ; CHECK-LABEL: vaddph_512_maskz_fold_test:
105 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
106 ; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
107 ; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
109 %mask = fcmp one <32 x half> %mask1, zeroinitializer
110 %j = load <32 x half>, ptr %j.ptr
111 %x = fadd <32 x half> %i, %j
112 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
116 define <32 x half> @vaddph_512_maskz_fold_test_2(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
117 ; CHECK-LABEL: vaddph_512_maskz_fold_test_2:
119 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
120 ; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
121 ; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
123 %mask = fcmp one <32 x half> %mask1, zeroinitializer
124 %j = load <32 x half>, ptr %j.ptr
125 %x = fadd <32 x half> %j, %i
126 %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
130 define <32 x half> @vsubph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
131 ; CHECK-LABEL: vsubph_512_test:
133 ; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0
135 %x = fsub <32 x half> %i, %j
139 define <32 x half> @vmulph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
140 ; CHECK-LABEL: vmulph_512_test:
142 ; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
144 %x = fmul <32 x half> %i, %j
148 define <32 x half> @vdivph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
149 ; CHECK-LABEL: vdivph_512_test:
151 ; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0
153 %x = fdiv <32 x half> %i, %j
157 define <32 x half> @vdivph_512_test_fast(<32 x half> %i, <32 x half> %j) nounwind readnone {
158 ; CHECK-LABEL: vdivph_512_test_fast:
160 ; CHECK-NEXT: vrcpph %zmm1, %zmm1
161 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
163 %x = fdiv fast <32 x half> %i, %j
167 define half @add_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
168 ; CHECK-LABEL: add_sh:
170 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
171 ; CHECK-NEXT: vaddsh (%rdi), %xmm0, %xmm0
173 %x = load half, ptr %x.ptr
174 %y = fadd half %i, %j
175 %r = fadd half %x, %y
179 define half @sub_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
180 ; CHECK-LABEL: sub_sh:
182 ; CHECK-NEXT: vmovsh (%rdi), %xmm2
183 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
184 ; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0
186 %x = load half, ptr %x.ptr
187 %y = fsub half %i, %j
188 %r = fsub half %x, %y
192 define half @sub_sh_2(half %i, half %j, ptr %x.ptr) nounwind readnone {
193 ; CHECK-LABEL: sub_sh_2:
195 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
196 ; CHECK-NEXT: vsubsh (%rdi), %xmm0, %xmm0
198 %x = load half, ptr %x.ptr
199 %y = fsub half %i, %j
200 %r = fsub half %y, %x
204 define half @mul_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
205 ; CHECK-LABEL: mul_sh:
207 ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0
208 ; CHECK-NEXT: vmulsh (%rdi), %xmm0, %xmm0
210 %x = load half, ptr %x.ptr
211 %y = fmul half %i, %j
212 %r = fmul half %x, %y
216 define half @div_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
217 ; CHECK-LABEL: div_sh:
219 ; CHECK-NEXT: vmovsh (%rdi), %xmm2
220 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
221 ; CHECK-NEXT: vdivsh %xmm0, %xmm2, %xmm0
223 %x = load half, ptr %x.ptr
224 %y = fdiv half %i, %j
225 %r = fdiv half %x, %y
229 define half @div_sh_2(half %i, half %j, ptr %x.ptr) nounwind readnone {
230 ; CHECK-LABEL: div_sh_2:
232 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
233 ; CHECK-NEXT: vdivsh (%rdi), %xmm0, %xmm0
235 %x = load half, ptr %x.ptr
236 %y = fdiv half %i, %j
237 %r = fdiv half %y, %x
241 define half @div_sh_3(half %i, half %j) nounwind readnone {
242 ; CHECK-LABEL: div_sh_3:
244 ; CHECK-NEXT: vrcpsh %xmm1, %xmm1, %xmm1
245 ; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0
247 %r = fdiv fast half %i, %j
251 define i1 @cmp_une_sh(half %x, half %y) {
252 ; CHECK-LABEL: cmp_une_sh:
253 ; CHECK: ## %bb.0: ## %entry
254 ; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0
255 ; CHECK-NEXT: kmovd %k0, %eax
256 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
259 %0 = fcmp une half %x, %y
263 define i1 @cmp_oeq_sh(half %x, half %y) {
264 ; CHECK-LABEL: cmp_oeq_sh:
265 ; CHECK: ## %bb.0: ## %entry
266 ; CHECK-NEXT: vcmpeqsh %xmm1, %xmm0, %k0
267 ; CHECK-NEXT: kmovd %k0, %eax
268 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
271 %0 = fcmp oeq half %x, %y
275 define i1 @cmp_olt_sh(half %x, half %y) {
276 ; CHECK-LABEL: cmp_olt_sh:
277 ; CHECK: ## %bb.0: ## %entry
278 ; CHECK-NEXT: vucomish %xmm0, %xmm1
279 ; CHECK-NEXT: seta %al
282 %0 = fcmp olt half %x, %y
286 define <32 x i1> @cmp_ph(<32 x half> %x, <32 x half> %y) {
287 ; CHECK-LABEL: cmp_ph:
288 ; CHECK: ## %bb.0: ## %entry
289 ; CHECK-NEXT: vcmpneqph %zmm1, %zmm0, %k0
290 ; CHECK-NEXT: vpmovm2b %k0, %ymm0
293 %0 = fcmp une <32 x half> %x, %y
297 define half @fneg(half %x) {
300 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
301 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
307 define half @fneg_idiom(half %x) {
308 ; CHECK-LABEL: fneg_idiom:
310 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
311 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
313 %a = fsub half -0.0, %x
317 define half @fabs(half %x) {
320 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
321 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
323 %a = call half @llvm.fabs.f16(half %x)
326 declare half @llvm.fabs.f16(half)
328 define half @fcopysign(half %x, half %y) {
329 ; CHECK-LABEL: fcopysign:
331 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
332 ; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
334 %a = call half @llvm.copysign.f16(half %x, half %y)
337 declare half @llvm.copysign.f16(half, half)
339 define half @fround(half %x) {
340 ; CHECK-LABEL: fround:
342 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
343 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
344 ; CHECK-NEXT: vpternlogq $248, %xmm1, %xmm0, %xmm2
345 ; CHECK-NEXT: vaddsh %xmm2, %xmm0, %xmm0
346 ; CHECK-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0
348 %a = call half @llvm.round.f16(half %x)
351 declare half @llvm.round.f16(half)
353 define <8 x half> @fnegv8f16(<8 x half> %x) {
354 ; CHECK-LABEL: fnegv8f16:
356 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
357 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
359 %a = fneg <8 x half> %x
363 define <8 x half> @fneg_idiomv8f16(<8 x half> %x) {
364 ; CHECK-LABEL: fneg_idiomv8f16:
366 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
367 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
369 %a = fsub <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
373 define <8 x half> @fabsv8f16(<8 x half> %x) {
374 ; CHECK-LABEL: fabsv8f16:
376 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
377 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
379 %a = call <8 x half> @llvm.fabs.v8f16(<8 x half> %x)
382 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
384 define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
385 ; CHECK-LABEL: fcopysignv8f16:
387 ; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
389 %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
392 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
394 define <8 x half> @roundv8f16(<8 x half> %x) {
395 ; CHECK-LABEL: roundv8f16:
397 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
398 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
399 ; CHECK-NEXT: vpternlogq $248, %xmm1, %xmm0, %xmm2
400 ; CHECK-NEXT: vaddph %xmm2, %xmm0, %xmm0
401 ; CHECK-NEXT: vrndscaleph $11, %xmm0, %xmm0
403 %a = call <8 x half> @llvm.round.v8f16(<8 x half> %x)
406 declare <8 x half> @llvm.round.v8f16(<8 x half>)
408 define <16 x half> @fnegv16f16(<16 x half> %x) {
409 ; CHECK-LABEL: fnegv16f16:
411 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
412 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
414 %a = fneg <16 x half> %x
418 define <16 x half> @fneg_idiomv16f16(<16 x half> %x) {
419 ; CHECK-LABEL: fneg_idiomv16f16:
421 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
422 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
424 %a = fsub <16 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
428 define <16 x half> @fabsv16f16(<16 x half> %x) {
429 ; CHECK-LABEL: fabsv16f16:
431 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
432 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
434 %a = call <16 x half> @llvm.fabs.v16f16(<16 x half> %x)
437 declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
439 define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
440 ; CHECK-LABEL: fcopysignv16f16:
442 ; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
444 %a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
447 declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)
449 define <16 x half> @roundv16f16(<16 x half> %x) {
450 ; CHECK-LABEL: roundv16f16:
452 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
453 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
454 ; CHECK-NEXT: vpternlogq $248, %ymm1, %ymm0, %ymm2
455 ; CHECK-NEXT: vaddph %ymm2, %ymm0, %ymm0
456 ; CHECK-NEXT: vrndscaleph $11, %ymm0, %ymm0
458 %a = call <16 x half> @llvm.round.v16f16(<16 x half> %x)
461 declare <16 x half> @llvm.round.v16f16(<16 x half>)
463 define <32 x half> @fnegv32f16(<32 x half> %x) {
464 ; CHECK-LABEL: fnegv32f16:
466 ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
467 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
469 %a = fneg <32 x half> %x
473 define <32 x half> @fneg_idiomv32f16(<32 x half> %x) {
474 ; CHECK-LABEL: fneg_idiomv32f16:
476 ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
477 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
479 %a = fsub <32 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
483 define <32 x half> @fabsv32f16(<32 x half> %x) {
484 ; CHECK-LABEL: fabsv32f16:
486 ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
487 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
489 %a = call <32 x half> @llvm.fabs.v32f16(<32 x half> %x)
492 declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
494 define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
495 ; CHECK-LABEL: fcopysignv32f16:
497 ; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
499 %a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
502 declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
504 define <32 x half> @roundv32f16(<32 x half> %x) {
505 ; CHECK-LABEL: roundv32f16:
507 ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
508 ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
509 ; CHECK-NEXT: vpternlogq $248, %zmm1, %zmm0, %zmm2
510 ; CHECK-NEXT: vaddph %zmm2, %zmm0, %zmm0
511 ; CHECK-NEXT: vrndscaleph $11, %zmm0, %zmm0
513 %a = call <32 x half> @llvm.round.v32f16(<32 x half> %x)
516 declare <32 x half> @llvm.round.v32f16(<32 x half>)
518 define <8 x half> @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
519 ; CHECK-LABEL: regression_test1:
520 ; CHECK: ## %bb.0: ## %entry
521 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm2
522 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
523 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
526 %a = fsub <8 x half> %x, %y
527 %b = fadd <8 x half> %x, %y
528 %c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
532 define <8 x i16> @regression_test2(<8 x float> %x) #0 {
533 ; CHECK-LABEL: regression_test2:
534 ; CHECK: ## %bb.0: ## %entry
535 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
536 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
537 ; CHECK-NEXT: vzeroupper
540 %a = fptoui <8 x float> %x to <8 x i16>
544 define <8 x i16> @regression_test3(<8 x float> %x) #0 {
545 ; CHECK-LABEL: regression_test3:
546 ; CHECK: ## %bb.0: ## %entry
547 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
548 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
549 ; CHECK-NEXT: vzeroupper
552 %a = fptosi <8 x float> %x to <8 x i16>
556 define <8 x i16> @regression_test4(<8 x double> %x) #0 {
557 ; CHECK-LABEL: regression_test4:
558 ; CHECK: ## %bb.0: ## %entry
559 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0
560 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
561 ; CHECK-NEXT: vzeroupper
564 %a = fptoui <8 x double> %x to <8 x i16>
568 define <8 x i16> @regression_test5(<8 x double> %x) #0 {
569 ; CHECK-LABEL: regression_test5:
570 ; CHECK: ## %bb.0: ## %entry
571 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
572 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
573 ; CHECK-NEXT: vzeroupper
576 %a = fptosi <8 x double> %x to <8 x i16>
580 define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
581 ; CHECK-LABEL: fcmp_v8f16:
582 ; CHECK: ## %bb.0: ## %entry
583 ; CHECK-NEXT: vcmpeqph %xmm1, %xmm0, %k0
584 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
588 %0 = fcmp oeq <8 x half> %a, %b
592 define <16 x i1> @fcmp_v16f16(<16 x half> %a, <16 x half> %b)
593 ; CHECK-LABEL: fcmp_v16f16:
594 ; CHECK: ## %bb.0: ## %entry
595 ; CHECK-NEXT: vcmpeqph %ymm1, %ymm0, %k0
596 ; CHECK-NEXT: vpmovm2b %k0, %xmm0
597 ; CHECK-NEXT: vzeroupper
601 %0 = fcmp oeq <16 x half> %a, %b
605 define <32 x i1> @fcmp_v32f16(<32 x half> %a, <32 x half> %b)
606 ; CHECK-LABEL: fcmp_v32f16:
607 ; CHECK: ## %bb.0: ## %entry
608 ; CHECK-NEXT: vcmpeqph %zmm1, %zmm0, %k0
609 ; CHECK-NEXT: vpmovm2b %k0, %ymm0
613 %0 = fcmp oeq <32 x half> %a, %b
617 define <8 x i16> @zext_fcmp_v8f16(<8 x half> %a, <8 x half> %b)
618 ; CHECK-LABEL: zext_fcmp_v8f16:
619 ; CHECK: ## %bb.0: ## %entry
620 ; CHECK-NEXT: vcmpeqph %xmm1, %xmm0, %k0
621 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
622 ; CHECK-NEXT: vpsrlw $15, %xmm0, %xmm0
626 %0 = fcmp oeq <8 x half> %a, %b
627 %1 = zext <8 x i1> %0 to <8 x i16>
631 define <16 x i16> @zext_fcmp_v16f16(<16 x half> %a, <16 x half> %b)
632 ; CHECK-LABEL: zext_fcmp_v16f16:
633 ; CHECK: ## %bb.0: ## %entry
634 ; CHECK-NEXT: vcmpeqph %ymm1, %ymm0, %k0
635 ; CHECK-NEXT: vpmovm2w %k0, %ymm0
636 ; CHECK-NEXT: vpsrlw $15, %ymm0, %ymm0
640 %0 = fcmp oeq <16 x half> %a, %b
641 %1 = zext <16 x i1> %0 to <16 x i16>
645 define <32 x i16> @zext_fcmp_v32f16(<32 x half> %a, <32 x half> %b)
646 ; CHECK-LABEL: zext_fcmp_v32f16:
647 ; CHECK: ## %bb.0: ## %entry
648 ; CHECK-NEXT: vcmpeqph %zmm1, %zmm0, %k0
649 ; CHECK-NEXT: vpmovm2w %k0, %zmm0
650 ; CHECK-NEXT: vpsrlw $15, %zmm0, %zmm0
654 %0 = fcmp oeq <32 x half> %a, %b
655 %1 = zext <32 x i1> %0 to <32 x i16>