1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
5 define half @f32tof16(float %b) nounwind {
8 ; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
11 ; X86-LABEL: f32tof16:
13 ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
14 ; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
16 %a = fptrunc float %b to half
20 define half @f64tof16(double %b) nounwind {
21 ; X64-LABEL: f64tof16:
23 ; X64-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
26 ; X86-LABEL: f64tof16:
28 ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
29 ; X86-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
31 %a = fptrunc double %b to half
35 define <16 x half> @f32to16f16(<16 x float> %b) nounwind {
36 ; CHECK-LABEL: f32to16f16:
38 ; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
39 ; CHECK-NEXT: ret{{[l|q]}}
40 %a = fptrunc <16 x float> %b to <16 x half>
44 define <8 x half> @f32to8f16(<8 x float> %b) {
45 ; CHECK-LABEL: f32to8f16:
47 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
48 ; CHECK-NEXT: vzeroupper
49 ; CHECK-NEXT: ret{{[l|q]}}
50 %a = fptrunc <8 x float> %b to <8 x half>
54 define <4 x half> @f32to4f16(<4 x float> %b) {
55 ; CHECK-LABEL: f32to4f16:
57 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
58 ; CHECK-NEXT: ret{{[l|q]}}
59 %a = fptrunc <4 x float> %b to <4 x half>
63 define <2 x half> @f32to2f16(<2 x float> %b) {
64 ; CHECK-LABEL: f32to2f16:
66 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
67 ; CHECK-NEXT: ret{{[l|q]}}
68 %a = fptrunc <2 x float> %b to <2 x half>
72 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
73 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
75 define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) {
76 ; X64-LABEL: f32to4f16_mask:
78 ; X64-NEXT: kmovd %edi, %k1
79 ; X64-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
80 ; X64-NEXT: vmovaps %xmm1, %xmm0
83 ; X86-LABEL: f32to4f16_mask:
85 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
86 ; X86-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
87 ; X86-NEXT: vmovaps %xmm1, %xmm0
89 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %a, <8 x half> %b, i8 %mask)
93 define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) {
94 ; X64-LABEL: f32to8f16_mask:
96 ; X64-NEXT: kmovd %edi, %k1
97 ; X64-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
98 ; X64-NEXT: vmovaps %xmm1, %xmm0
99 ; X64-NEXT: vzeroupper
102 ; X86-LABEL: f32to8f16_mask:
104 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
105 ; X86-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
106 ; X86-NEXT: vmovaps %xmm1, %xmm0
107 ; X86-NEXT: vzeroupper
109 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %a, <8 x half> %b, i8 %mask)
113 define <8 x half> @f32to8f16_mask2(<8 x float> %b, <8 x i1> %mask) {
114 ; CHECK-LABEL: f32to8f16_mask2:
116 ; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1
117 ; CHECK-NEXT: vpmovw2m %xmm1, %k1
118 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
119 ; CHECK-NEXT: vzeroupper
120 ; CHECK-NEXT: ret{{[l|q]}}
121 %a = fptrunc <8 x float> %b to <8 x half>
122 %c = select <8 x i1>%mask, <8 x half>%a, <8 x half> zeroinitializer
126 define <16 x half> @f32to16f16_mask(<16 x float> %b, <16 x i1> %mask) {
127 ; CHECK-LABEL: f32to16f16_mask:
129 ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1
130 ; CHECK-NEXT: vpmovb2m %xmm1, %k1
131 ; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z}
132 ; CHECK-NEXT: ret{{[l|q]}}
133 %a = fptrunc <16 x float> %b to <16 x half>
134 %c = select <16 x i1>%mask, <16 x half>%a, <16 x half> zeroinitializer
138 define float @f16tof32(half %b) nounwind {
139 ; X64-LABEL: f16tof32:
141 ; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
144 ; X86-LABEL: f16tof32:
146 ; X86-NEXT: pushl %eax
147 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
148 ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
149 ; X86-NEXT: vmovss %xmm0, (%esp)
150 ; X86-NEXT: flds (%esp)
151 ; X86-NEXT: popl %eax
153 %a = fpext half %b to float
157 define double @f16tof64(half %b) nounwind {
158 ; X64-LABEL: f16tof64:
160 ; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
163 ; X86-LABEL: f16tof64:
165 ; X86-NEXT: pushl %ebp
166 ; X86-NEXT: movl %esp, %ebp
167 ; X86-NEXT: andl $-8, %esp
168 ; X86-NEXT: subl $8, %esp
169 ; X86-NEXT: vmovsh 8(%ebp), %xmm0
170 ; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
171 ; X86-NEXT: vmovsd %xmm0, (%esp)
172 ; X86-NEXT: fldl (%esp)
173 ; X86-NEXT: movl %ebp, %esp
174 ; X86-NEXT: popl %ebp
176 %a = fpext half %b to double
180 define <16 x float> @f16to16f32(<16 x half> %b) nounwind {
181 ; CHECK-LABEL: f16to16f32:
183 ; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
184 ; CHECK-NEXT: ret{{[l|q]}}
185 %a = fpext <16 x half> %b to <16 x float>
189 define <8 x float> @f16to8f32(<8 x half> %b) nounwind {
190 ; CHECK-LABEL: f16to8f32:
192 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
193 ; CHECK-NEXT: ret{{[l|q]}}
194 %a = fpext <8 x half> %b to <8 x float>
198 define <4 x float> @f16to4f32(<4 x half> %b) nounwind {
199 ; CHECK-LABEL: f16to4f32:
201 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
202 ; CHECK-NEXT: ret{{[l|q]}}
203 %a = fpext <4 x half> %b to <4 x float>
207 define <2 x float> @f16to2f32(<2 x half> %b) nounwind {
208 ; CHECK-LABEL: f16to2f32:
210 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
211 ; CHECK-NEXT: ret{{[l|q]}}
212 %a = fpext <2 x half> %b to <2 x float>
216 define <16 x float> @f16to16f32_mask(<16 x half> %b, <16 x float> %b1, <16 x float> %a1) {
217 ; CHECK-LABEL: f16to16f32_mask:
219 ; CHECK-NEXT: vcmpltps %zmm2, %zmm1, %k1
220 ; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z}
221 ; CHECK-NEXT: ret{{[l|q]}}
222 %a = fpext <16 x half> %b to <16 x float>
223 %mask = fcmp ogt <16 x float> %a1, %b1
224 %c = select <16 x i1> %mask, <16 x float> %a, <16 x float> zeroinitializer
228 define <8 x float> @f16to8f32_mask(<8 x half> %b, <8 x float> %b1, <8 x float> %a1) {
229 ; CHECK-LABEL: f16to8f32_mask:
231 ; CHECK-NEXT: vcmpltps %ymm2, %ymm1, %k1
232 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
233 ; CHECK-NEXT: ret{{[l|q]}}
234 %a = fpext <8 x half> %b to <8 x float>
235 %mask = fcmp ogt <8 x float> %a1, %b1
236 %c = select <8 x i1> %mask, <8 x float> %a, <8 x float> zeroinitializer
240 define <4 x float> @f16to4f32_mask(<4 x half> %b, <4 x float> %b1, <4 x float> %a1) {
241 ; CHECK-LABEL: f16to4f32_mask:
243 ; CHECK-NEXT: vcmpltps %xmm2, %xmm1, %k1
244 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
245 ; CHECK-NEXT: ret{{[l|q]}}
246 %a = fpext <4 x half> %b to <4 x float>
247 %mask = fcmp ogt <4 x float> %a1, %b1
248 %c = select <4 x i1> %mask, <4 x float> %a, <4 x float> zeroinitializer
252 define <2 x float> @f16to2f32_mask(<2 x half> %b, <2 x float> %b1, <2 x float> %a1) {
253 ; CHECK-LABEL: f16to2f32_mask:
255 ; CHECK-NEXT: vcmpltps %xmm2, %xmm1, %k1
256 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
257 ; CHECK-NEXT: ret{{[l|q]}}
258 %a = fpext <2 x half> %b to <2 x float>
259 %mask = fcmp ogt <2 x float> %a1, %b1
260 %c = select <2 x i1> %mask, <2 x float> %a, <2 x float> zeroinitializer
264 define <2 x double> @f16to2f64(<2 x half> %b) nounwind {
265 ; CHECK-LABEL: f16to2f64:
267 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
268 ; CHECK-NEXT: ret{{[l|q]}}
269 %a = fpext <2 x half> %b to <2 x double>
273 define <2 x double> @f16to2f64_mask(<2 x half> %b, <2 x double> %b1, <2 x double> %a1) {
274 ; CHECK-LABEL: f16to2f64_mask:
276 ; CHECK-NEXT: vcmpltpd %xmm2, %xmm1, %k1
277 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 {%k1} {z}
278 ; CHECK-NEXT: ret{{[l|q]}}
279 %a = fpext <2 x half> %b to <2 x double>
280 %mask = fcmp ogt <2 x double> %a1, %b1
281 %c = select <2 x i1> %mask, <2 x double> %a, <2 x double> zeroinitializer
285 define <4 x double> @f16to4f64(<4 x half> %b) nounwind {
286 ; CHECK-LABEL: f16to4f64:
288 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
289 ; CHECK-NEXT: ret{{[l|q]}}
290 %a = fpext <4 x half> %b to <4 x double>
294 define <4 x double> @f16to4f64_mask(<4 x half> %b, <4 x double> %b1, <4 x double> %a1) {
295 ; CHECK-LABEL: f16to4f64_mask:
297 ; CHECK-NEXT: vcmpltpd %ymm2, %ymm1, %k1
298 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 {%k1} {z}
299 ; CHECK-NEXT: ret{{[l|q]}}
300 %a = fpext <4 x half> %b to <4 x double>
301 %mask = fcmp ogt <4 x double> %a1, %b1
302 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
306 define <8 x double> @f16to8f64(<8 x half> %b) nounwind {
307 ; CHECK-LABEL: f16to8f64:
309 ; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
310 ; CHECK-NEXT: ret{{[l|q]}}
311 %a = fpext <8 x half> %b to <8 x double>
315 define <8 x double> @f16to8f64_mask(<8 x half> %b, <8 x double> %b1, <8 x double> %a1) {
316 ; CHECK-LABEL: f16to8f64_mask:
318 ; CHECK-NEXT: vcmpltpd %zmm2, %zmm1, %k1
319 ; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0 {%k1} {z}
320 ; CHECK-NEXT: ret{{[l|q]}}
321 %a = fpext <8 x half> %b to <8 x double>
322 %mask = fcmp ogt <8 x double> %a1, %b1
323 %c = select <8 x i1> %mask, <8 x double> %a, <8 x double> zeroinitializer
327 define <2 x half> @f64to2f16(<2 x double> %b) {
328 ; CHECK-LABEL: f64to2f16:
330 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0
331 ; CHECK-NEXT: ret{{[l|q]}}
332 %a = fptrunc <2 x double> %b to <2 x half>
336 define <4 x half> @f64to4f16(<4 x double> %b) {
337 ; CHECK-LABEL: f64to4f16:
339 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm0
340 ; CHECK-NEXT: vzeroupper
341 ; CHECK-NEXT: ret{{[l|q]}}
342 %a = fptrunc <4 x double> %b to <4 x half>
346 define <8 x half> @f64to8f16(<8 x double> %b) {
347 ; CHECK-LABEL: f64to8f16:
349 ; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm0
350 ; CHECK-NEXT: vzeroupper
351 ; CHECK-NEXT: ret{{[l|q]}}
352 %a = fptrunc <8 x double> %b to <8 x half>
356 define float @extload_f16_f32(ptr %x) {
357 ; X64-LABEL: extload_f16_f32:
359 ; X64-NEXT: vmovsh (%rdi), %xmm0
360 ; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
363 ; X86-LABEL: extload_f16_f32:
365 ; X86-NEXT: pushl %eax
366 ; X86-NEXT: .cfi_def_cfa_offset 8
367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
368 ; X86-NEXT: vmovsh (%eax), %xmm0
369 ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
370 ; X86-NEXT: vmovss %xmm0, (%esp)
371 ; X86-NEXT: flds (%esp)
372 ; X86-NEXT: popl %eax
373 ; X86-NEXT: .cfi_def_cfa_offset 4
375 %a = load half, ptr %x
376 %b = fpext half %a to float
380 define double @extload_f16_f64(ptr %x) {
381 ; X64-LABEL: extload_f16_f64:
383 ; X64-NEXT: vmovsh (%rdi), %xmm0
384 ; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
387 ; X86-LABEL: extload_f16_f64:
389 ; X86-NEXT: pushl %ebp
390 ; X86-NEXT: .cfi_def_cfa_offset 8
391 ; X86-NEXT: .cfi_offset %ebp, -8
392 ; X86-NEXT: movl %esp, %ebp
393 ; X86-NEXT: .cfi_def_cfa_register %ebp
394 ; X86-NEXT: andl $-8, %esp
395 ; X86-NEXT: subl $8, %esp
396 ; X86-NEXT: movl 8(%ebp), %eax
397 ; X86-NEXT: vmovsh (%eax), %xmm0
398 ; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
399 ; X86-NEXT: vmovsd %xmm0, (%esp)
400 ; X86-NEXT: fldl (%esp)
401 ; X86-NEXT: movl %ebp, %esp
402 ; X86-NEXT: popl %ebp
403 ; X86-NEXT: .cfi_def_cfa %esp, 4
405 %a = load half, ptr %x
406 %b = fpext half %a to double
410 define float @extload_f16_f32_optsize(ptr %x) optsize {
411 ; X64-LABEL: extload_f16_f32_optsize:
413 ; X64-NEXT: vcvtsh2ss (%rdi), %xmm0, %xmm0
416 ; X86-LABEL: extload_f16_f32_optsize:
418 ; X86-NEXT: pushl %eax
419 ; X86-NEXT: .cfi_def_cfa_offset 8
420 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
421 ; X86-NEXT: vcvtsh2ss (%eax), %xmm0, %xmm0
422 ; X86-NEXT: vmovss %xmm0, (%esp)
423 ; X86-NEXT: flds (%esp)
424 ; X86-NEXT: popl %eax
425 ; X86-NEXT: .cfi_def_cfa_offset 4
427 %a = load half, ptr %x
428 %b = fpext half %a to float
432 define double @extload_f16_f64_optsize(ptr %x) optsize {
433 ; X64-LABEL: extload_f16_f64_optsize:
435 ; X64-NEXT: vcvtsh2sd (%rdi), %xmm0, %xmm0
438 ; X86-LABEL: extload_f16_f64_optsize:
440 ; X86-NEXT: pushl %ebp
441 ; X86-NEXT: .cfi_def_cfa_offset 8
442 ; X86-NEXT: .cfi_offset %ebp, -8
443 ; X86-NEXT: movl %esp, %ebp
444 ; X86-NEXT: .cfi_def_cfa_register %ebp
445 ; X86-NEXT: andl $-8, %esp
446 ; X86-NEXT: subl $8, %esp
447 ; X86-NEXT: movl 8(%ebp), %eax
448 ; X86-NEXT: vcvtsh2sd (%eax), %xmm0, %xmm0
449 ; X86-NEXT: vmovsd %xmm0, (%esp)
450 ; X86-NEXT: fldl (%esp)
451 ; X86-NEXT: movl %ebp, %esp
452 ; X86-NEXT: popl %ebp
453 ; X86-NEXT: .cfi_def_cfa %esp, 4
455 %a = load half, ptr %x
456 %b = fpext half %a to double
460 define <16 x float> @extload_v16f16_v16f32(ptr %x) {
461 ; X64-LABEL: extload_v16f16_v16f32:
463 ; X64-NEXT: vcvtph2psx (%rdi), %zmm0
466 ; X86-LABEL: extload_v16f16_v16f32:
468 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
469 ; X86-NEXT: vcvtph2psx (%eax), %zmm0
471 %a = load <16 x half>, ptr %x
472 %b = fpext <16 x half> %a to <16 x float>
476 define <8 x float> @extload_v8f16_v8f32(ptr %x) {
477 ; X64-LABEL: extload_v8f16_v8f32:
479 ; X64-NEXT: vcvtph2psx (%rdi), %ymm0
482 ; X86-LABEL: extload_v8f16_v8f32:
484 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
485 ; X86-NEXT: vcvtph2psx (%eax), %ymm0
487 %a = load <8 x half>, ptr %x
488 %b = fpext <8 x half> %a to <8 x float>
492 define <4 x float> @extload_v4f16_v4f32(ptr %x) {
493 ; X64-LABEL: extload_v4f16_v4f32:
495 ; X64-NEXT: vcvtph2psx (%rdi), %xmm0
498 ; X86-LABEL: extload_v4f16_v4f32:
500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
501 ; X86-NEXT: vcvtph2psx (%eax), %xmm0
503 %a = load <4 x half>, ptr %x
504 %b = fpext <4 x half> %a to <4 x float>
508 define <8 x double> @extload_v8f16_v8f64(ptr %x) {
509 ; X64-LABEL: extload_v8f16_v8f64:
511 ; X64-NEXT: vcvtph2pd (%rdi), %zmm0
514 ; X86-LABEL: extload_v8f16_v8f64:
516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
517 ; X86-NEXT: vcvtph2pd (%eax), %zmm0
519 %a = load <8 x half>, ptr %x
520 %b = fpext <8 x half> %a to <8 x double>
524 define <4 x double> @extload_v4f16_v4f64(ptr %x) {
525 ; X64-LABEL: extload_v4f16_v4f64:
527 ; X64-NEXT: vcvtph2pd (%rdi), %ymm0
530 ; X86-LABEL: extload_v4f16_v4f64:
532 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
533 ; X86-NEXT: vcvtph2pd (%eax), %ymm0
535 %a = load <4 x half>, ptr %x
536 %b = fpext <4 x half> %a to <4 x double>
540 define <2 x double> @extload_v2f16_v2f64(ptr %x) {
541 ; X64-LABEL: extload_v2f16_v2f64:
543 ; X64-NEXT: vcvtph2pd (%rdi), %xmm0
546 ; X86-LABEL: extload_v2f16_v2f64:
548 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
549 ; X86-NEXT: vcvtph2pd (%eax), %xmm0
551 %a = load <2 x half>, ptr %x
552 %b = fpext <2 x half> %a to <2 x double>
556 define half @s8_to_half(i8 %x) {
557 ; X64-LABEL: s8_to_half:
559 ; X64-NEXT: movsbl %dil, %eax
560 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
563 ; X86-LABEL: s8_to_half:
565 ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
566 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
568 %a = sitofp i8 %x to half
572 define half @s16_to_half(i16 %x) {
573 ; X64-LABEL: s16_to_half:
575 ; X64-NEXT: movswl %di, %eax
576 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
579 ; X86-LABEL: s16_to_half:
581 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
582 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
584 %a = sitofp i16 %x to half
588 define half @s32_to_half(i32 %x) {
589 ; X64-LABEL: s32_to_half:
591 ; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
594 ; X86-LABEL: s32_to_half:
596 ; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
598 %a = sitofp i32 %x to half
602 define half @s64_to_half(i64 %x) {
603 ; X64-LABEL: s64_to_half:
605 ; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
608 ; X86-LABEL: s64_to_half:
610 ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
611 ; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
613 %a = sitofp i64 %x to half
617 define half @s128_to_half(i128 %x) {
618 ; X64-LABEL: s128_to_half:
620 ; X64-NEXT: pushq %rax
621 ; X64-NEXT: .cfi_def_cfa_offset 16
622 ; X64-NEXT: callq __floattihf@PLT
623 ; X64-NEXT: popq %rax
624 ; X64-NEXT: .cfi_def_cfa_offset 8
627 ; X86-LABEL: s128_to_half:
629 ; X86-NEXT: subl $16, %esp
630 ; X86-NEXT: .cfi_def_cfa_offset 20
631 ; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
632 ; X86-NEXT: vmovups %xmm0, (%esp)
633 ; X86-NEXT: calll __floattihf
634 ; X86-NEXT: addl $16, %esp
635 ; X86-NEXT: .cfi_def_cfa_offset 4
637 %a = sitofp i128 %x to half
641 define half @u8_to_half(i8 %x) {
642 ; X64-LABEL: u8_to_half:
644 ; X64-NEXT: movzbl %dil, %eax
645 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
648 ; X86-LABEL: u8_to_half:
650 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
651 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
653 %a = uitofp i8 %x to half
657 define half @u16_to_half(i16 %x) {
658 ; X64-LABEL: u16_to_half:
660 ; X64-NEXT: movzwl %di, %eax
661 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
664 ; X86-LABEL: u16_to_half:
666 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
667 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
669 %a = uitofp i16 %x to half
673 define half @u32_to_half(i32 %x) {
674 ; X64-LABEL: u32_to_half:
676 ; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
679 ; X86-LABEL: u32_to_half:
681 ; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
683 %a = uitofp i32 %x to half
687 define half @u64_to_half(i64 %x) {
688 ; X64-LABEL: u64_to_half:
690 ; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
693 ; X86-LABEL: u64_to_half:
695 ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
696 ; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
698 %a = uitofp i64 %x to half
702 define half @u128_to_half(i128 %x) {
703 ; X64-LABEL: u128_to_half:
705 ; X64-NEXT: pushq %rax
706 ; X64-NEXT: .cfi_def_cfa_offset 16
707 ; X64-NEXT: callq __floatuntihf@PLT
708 ; X64-NEXT: popq %rax
709 ; X64-NEXT: .cfi_def_cfa_offset 8
712 ; X86-LABEL: u128_to_half:
714 ; X86-NEXT: subl $16, %esp
715 ; X86-NEXT: .cfi_def_cfa_offset 20
716 ; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
717 ; X86-NEXT: vmovups %xmm0, (%esp)
718 ; X86-NEXT: calll __floatuntihf
719 ; X86-NEXT: addl $16, %esp
720 ; X86-NEXT: .cfi_def_cfa_offset 4
722 %a = uitofp i128 %x to half
726 define i8 @half_to_s8(half %x) {
727 ; X64-LABEL: half_to_s8:
729 ; X64-NEXT: vcvttsh2si %xmm0, %eax
730 ; X64-NEXT: # kill: def $al killed $al killed $eax
733 ; X86-LABEL: half_to_s8:
735 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
736 ; X86-NEXT: # kill: def $al killed $al killed $eax
738 %a = fptosi half %x to i8
742 define i16 @half_to_s16(half %x) {
743 ; X64-LABEL: half_to_s16:
745 ; X64-NEXT: vcvttsh2si %xmm0, %eax
746 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
749 ; X86-LABEL: half_to_s16:
751 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
752 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
754 %a = fptosi half %x to i16
758 define i32 @half_to_s32(half %x) {
759 ; X64-LABEL: half_to_s32:
761 ; X64-NEXT: vcvttsh2si %xmm0, %eax
764 ; X86-LABEL: half_to_s32:
766 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
768 %a = fptosi half %x to i32
772 define i64 @half_to_s64(half %x) {
773 ; X64-LABEL: half_to_s64:
775 ; X64-NEXT: vcvttsh2si %xmm0, %rax
778 ; X86-LABEL: half_to_s64:
780 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
781 ; X86-NEXT: vcvttph2qq %xmm0, %xmm0
782 ; X86-NEXT: vmovd %xmm0, %eax
783 ; X86-NEXT: vpextrd $1, %xmm0, %edx
785 %a = fptosi half %x to i64
789 define i128 @half_to_s128(half %x) {
790 ; X64-LABEL: half_to_s128:
792 ; X64-NEXT: pushq %rax
793 ; X64-NEXT: .cfi_def_cfa_offset 16
794 ; X64-NEXT: callq __fixhfti@PLT
795 ; X64-NEXT: popq %rcx
796 ; X64-NEXT: .cfi_def_cfa_offset 8
799 ; X86-LABEL: half_to_s128:
801 ; X86-NEXT: pushl %ebp
802 ; X86-NEXT: .cfi_def_cfa_offset 8
803 ; X86-NEXT: .cfi_offset %ebp, -8
804 ; X86-NEXT: movl %esp, %ebp
805 ; X86-NEXT: .cfi_def_cfa_register %ebp
806 ; X86-NEXT: pushl %esi
807 ; X86-NEXT: andl $-16, %esp
808 ; X86-NEXT: subl $48, %esp
809 ; X86-NEXT: .cfi_offset %esi, -12
810 ; X86-NEXT: movl 8(%ebp), %esi
811 ; X86-NEXT: vmovsh 12(%ebp), %xmm0
812 ; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp)
813 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
814 ; X86-NEXT: movl %eax, (%esp)
815 ; X86-NEXT: calll __fixhfti
816 ; X86-NEXT: subl $4, %esp
817 ; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
818 ; X86-NEXT: vmovaps %xmm0, (%esi)
819 ; X86-NEXT: movl %esi, %eax
820 ; X86-NEXT: leal -4(%ebp), %esp
821 ; X86-NEXT: popl %esi
822 ; X86-NEXT: popl %ebp
823 ; X86-NEXT: .cfi_def_cfa %esp, 4
825 %a = fptosi half %x to i128
829 define i8 @half_to_u8(half %x) {
830 ; X64-LABEL: half_to_u8:
832 ; X64-NEXT: vcvttsh2si %xmm0, %eax
833 ; X64-NEXT: # kill: def $al killed $al killed $eax
836 ; X86-LABEL: half_to_u8:
838 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
839 ; X86-NEXT: # kill: def $al killed $al killed $eax
841 %a = fptoui half %x to i8
845 define i16 @half_to_u16(half %x) {
846 ; X64-LABEL: half_to_u16:
848 ; X64-NEXT: vcvttsh2si %xmm0, %eax
849 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
852 ; X86-LABEL: half_to_u16:
854 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
855 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
857 %a = fptoui half %x to i16
861 define i32 @half_to_u32(half %x) {
862 ; X64-LABEL: half_to_u32:
864 ; X64-NEXT: vcvttsh2usi %xmm0, %eax
867 ; X86-LABEL: half_to_u32:
869 ; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax
871 %a = fptoui half %x to i32
875 define i64 @half_to_u64(half %x) {
876 ; X64-LABEL: half_to_u64:
878 ; X64-NEXT: vcvttsh2usi %xmm0, %rax
881 ; X86-LABEL: half_to_u64:
883 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
884 ; X86-NEXT: vcvttph2uqq %xmm0, %xmm0
885 ; X86-NEXT: vmovd %xmm0, %eax
886 ; X86-NEXT: vpextrd $1, %xmm0, %edx
888 %a = fptoui half %x to i64
892 define i128 @half_to_u128(half %x) {
893 ; X64-LABEL: half_to_u128:
895 ; X64-NEXT: pushq %rax
896 ; X64-NEXT: .cfi_def_cfa_offset 16
897 ; X64-NEXT: callq __fixunshfti@PLT
898 ; X64-NEXT: popq %rcx
899 ; X64-NEXT: .cfi_def_cfa_offset 8
902 ; X86-LABEL: half_to_u128:
904 ; X86-NEXT: pushl %ebp
905 ; X86-NEXT: .cfi_def_cfa_offset 8
906 ; X86-NEXT: .cfi_offset %ebp, -8
907 ; X86-NEXT: movl %esp, %ebp
908 ; X86-NEXT: .cfi_def_cfa_register %ebp
909 ; X86-NEXT: pushl %esi
910 ; X86-NEXT: andl $-16, %esp
911 ; X86-NEXT: subl $48, %esp
912 ; X86-NEXT: .cfi_offset %esi, -12
913 ; X86-NEXT: movl 8(%ebp), %esi
914 ; X86-NEXT: vmovsh 12(%ebp), %xmm0
915 ; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp)
916 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
917 ; X86-NEXT: movl %eax, (%esp)
918 ; X86-NEXT: calll __fixunshfti
919 ; X86-NEXT: subl $4, %esp
920 ; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
921 ; X86-NEXT: vmovaps %xmm0, (%esi)
922 ; X86-NEXT: movl %esi, %eax
923 ; X86-NEXT: leal -4(%ebp), %esp
924 ; X86-NEXT: popl %esi
925 ; X86-NEXT: popl %ebp
926 ; X86-NEXT: .cfi_def_cfa %esp, 4
928 %a = fptoui half %x to i128
932 define x86_fp80 @half_to_f80(half %x) nounwind {
933 ; X64-LABEL: half_to_f80:
935 ; X64-NEXT: pushq %rax
936 ; X64-NEXT: callq __extendhfxf2@PLT
937 ; X64-NEXT: popq %rax
940 ; X86-LABEL: half_to_f80:
942 ; X86-NEXT: pushl %eax
943 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
944 ; X86-NEXT: vmovsh %xmm0, (%esp)
945 ; X86-NEXT: calll __extendhfxf2
946 ; X86-NEXT: popl %eax
948 %a = fpext half %x to x86_fp80
952 define half @f80_to_half(x86_fp80 %x) nounwind {
953 ; X64-LABEL: f80_to_half:
955 ; X64-NEXT: subq $24, %rsp
956 ; X64-NEXT: fldt {{[0-9]+}}(%rsp)
957 ; X64-NEXT: fstpt (%rsp)
958 ; X64-NEXT: callq __truncxfhf2@PLT
959 ; X64-NEXT: addq $24, %rsp
962 ; X86-LABEL: f80_to_half:
964 ; X86-NEXT: subl $12, %esp
965 ; X86-NEXT: fldt {{[0-9]+}}(%esp)
966 ; X86-NEXT: fstpt (%esp)
967 ; X86-NEXT: calll __truncxfhf2
968 ; X86-NEXT: addl $12, %esp
970 %a = fptrunc x86_fp80 %x to half
974 ; FIXME: We're doing a two step conversion here on 32-bit.
975 ; First from f16->f32 then f32->f128. This is occuring
976 ; due to common code in LegalizeFloatTypes that thinks
977 ; there are no libcalls for f16 to any type but f32.
978 ; Changing this may break other non-x86 targets. The code
979 ; generated here should be functional.
980 define fp128 @half_to_f128(half %x) nounwind {
981 ; X64-LABEL: half_to_f128:
983 ; X64-NEXT: jmp __extendhftf2@PLT # TAILCALL
985 ; X86-LABEL: half_to_f128:
987 ; X86-NEXT: pushl %ebp
988 ; X86-NEXT: movl %esp, %ebp
989 ; X86-NEXT: pushl %esi
990 ; X86-NEXT: andl $-16, %esp
991 ; X86-NEXT: subl $48, %esp
992 ; X86-NEXT: movl 8(%ebp), %esi
993 ; X86-NEXT: vmovsh 12(%ebp), %xmm0
994 ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
995 ; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
996 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
997 ; X86-NEXT: movl %eax, (%esp)
998 ; X86-NEXT: calll __extendsftf2
999 ; X86-NEXT: subl $4, %esp
1000 ; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
1001 ; X86-NEXT: vmovaps %xmm0, (%esi)
1002 ; X86-NEXT: movl %esi, %eax
1003 ; X86-NEXT: leal -4(%ebp), %esp
1004 ; X86-NEXT: popl %esi
1005 ; X86-NEXT: popl %ebp
1007 %a = fpext half %x to fp128
1011 define half @f128_to_half(fp128 %x) nounwind {
1012 ; X64-LABEL: f128_to_half:
1014 ; X64-NEXT: pushq %rax
1015 ; X64-NEXT: callq __trunctfhf2@PLT
1016 ; X64-NEXT: popq %rax
1019 ; X86-LABEL: f128_to_half:
1021 ; X86-NEXT: subl $16, %esp
1022 ; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
1023 ; X86-NEXT: vmovups %xmm0, (%esp)
1024 ; X86-NEXT: calll __trunctfhf2
1025 ; X86-NEXT: addl $16, %esp
1027 %a = fptrunc fp128 %x to half
1031 define <8 x half> @s64tof16(<8 x i64> %a) #0 {
1032 ; CHECK-LABEL: s64tof16:
1034 ; CHECK-NEXT: vcvtqq2ph %ymm1, %xmm1
1035 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
1036 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1037 ; CHECK-NEXT: vzeroupper
1038 ; CHECK-NEXT: ret{{[l|q]}}
1039 %1 = sitofp <8 x i64> %a to <8 x half>
1043 define <8 x half> @u64tof16(<8 x i64> %a) #0 {
1044 ; CHECK-LABEL: u64tof16:
1046 ; CHECK-NEXT: vcvtuqq2ph %ymm1, %xmm1
1047 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
1048 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1049 ; CHECK-NEXT: vzeroupper
1050 ; CHECK-NEXT: ret{{[l|q]}}
1051 %1 = uitofp <8 x i64> %a to <8 x half>
1055 attributes #0 = { "min-legal-vector-width"="256" "prefer-vector-width"="256" }