1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
8 declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
9 declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
10 declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
11 declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
12 declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
13 declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
14 declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
15 declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
16 declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
17 declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
19 define half @sitofp_i1tof16(i1 %x) #0 {
20 ; SSE2-LABEL: sitofp_i1tof16:
22 ; SSE2-NEXT: pushq %rax
23 ; SSE2-NEXT: andb $1, %dil
24 ; SSE2-NEXT: negb %dil
25 ; SSE2-NEXT: movsbl %dil, %eax
26 ; SSE2-NEXT: cvtsi2ss %eax, %xmm0
27 ; SSE2-NEXT: callq __truncsfhf2@PLT
28 ; SSE2-NEXT: popq %rax
31 ; AVX-LABEL: sitofp_i1tof16:
33 ; AVX-NEXT: andb $1, %dil
35 ; AVX-NEXT: movsbl %dil, %eax
36 ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
37 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
38 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
39 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
40 ; AVX-NEXT: vmovd %xmm0, %eax
41 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
44 ; X86-LABEL: sitofp_i1tof16:
46 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
47 ; X86-NEXT: andb $1, %al
49 ; X86-NEXT: movsbl %al, %eax
50 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
53 ; X64-LABEL: sitofp_i1tof16:
55 ; X64-NEXT: andb $1, %dil
57 ; X64-NEXT: movsbl %dil, %eax
58 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
60 %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
61 metadata !"round.dynamic",
62 metadata !"fpexcept.strict") #0
66 define half @sitofp_i8tof16(i8 %x) #0 {
67 ; SSE2-LABEL: sitofp_i8tof16:
69 ; SSE2-NEXT: pushq %rax
70 ; SSE2-NEXT: movsbl %dil, %eax
71 ; SSE2-NEXT: cvtsi2ss %eax, %xmm0
72 ; SSE2-NEXT: callq __truncsfhf2@PLT
73 ; SSE2-NEXT: popq %rax
76 ; AVX-LABEL: sitofp_i8tof16:
78 ; AVX-NEXT: movsbl %dil, %eax
79 ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
80 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
81 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
82 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
83 ; AVX-NEXT: vmovd %xmm0, %eax
84 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
87 ; X86-LABEL: sitofp_i8tof16:
89 ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
90 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
93 ; X64-LABEL: sitofp_i8tof16:
95 ; X64-NEXT: movsbl %dil, %eax
96 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
98 %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
99 metadata !"round.dynamic",
100 metadata !"fpexcept.strict") #0
104 define half @sitofp_i16tof16(i16 %x) #0 {
105 ; SSE2-LABEL: sitofp_i16tof16:
107 ; SSE2-NEXT: pushq %rax
108 ; SSE2-NEXT: movswl %di, %eax
109 ; SSE2-NEXT: cvtsi2ss %eax, %xmm0
110 ; SSE2-NEXT: callq __truncsfhf2@PLT
111 ; SSE2-NEXT: popq %rax
114 ; AVX-LABEL: sitofp_i16tof16:
116 ; AVX-NEXT: movswl %di, %eax
117 ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
118 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
119 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
120 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
121 ; AVX-NEXT: vmovd %xmm0, %eax
122 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
125 ; X86-LABEL: sitofp_i16tof16:
127 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
128 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
131 ; X64-LABEL: sitofp_i16tof16:
133 ; X64-NEXT: movswl %di, %eax
134 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
136 %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
137 metadata !"round.dynamic",
138 metadata !"fpexcept.strict") #0
142 define half @sitofp_i32tof16(i32 %x) #0 {
143 ; SSE2-LABEL: sitofp_i32tof16:
145 ; SSE2-NEXT: pushq %rax
146 ; SSE2-NEXT: cvtsi2ss %edi, %xmm0
147 ; SSE2-NEXT: callq __truncsfhf2@PLT
148 ; SSE2-NEXT: popq %rax
151 ; AVX-LABEL: sitofp_i32tof16:
153 ; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
154 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
155 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
156 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
157 ; AVX-NEXT: vmovd %xmm0, %eax
158 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
161 ; X86-LABEL: sitofp_i32tof16:
163 ; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
166 ; X64-LABEL: sitofp_i32tof16:
168 ; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
170 %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
171 metadata !"round.dynamic",
172 metadata !"fpexcept.strict") #0
176 define half @sitofp_i64tof16(i64 %x) #0 {
177 ; SSE2-LABEL: sitofp_i64tof16:
179 ; SSE2-NEXT: pushq %rax
180 ; SSE2-NEXT: cvtsi2ss %rdi, %xmm0
181 ; SSE2-NEXT: callq __truncsfhf2@PLT
182 ; SSE2-NEXT: popq %rax
185 ; AVX-LABEL: sitofp_i64tof16:
187 ; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
188 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
189 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
190 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
191 ; AVX-NEXT: vmovd %xmm0, %eax
192 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
195 ; X86-LABEL: sitofp_i64tof16:
197 ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
198 ; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
201 ; X64-LABEL: sitofp_i64tof16:
203 ; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
205 %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
206 metadata !"round.dynamic",
207 metadata !"fpexcept.strict") #0
211 define half @uitofp_i1tof16(i1 %x) #0 {
212 ; SSE2-LABEL: uitofp_i1tof16:
214 ; SSE2-NEXT: pushq %rax
215 ; SSE2-NEXT: andl $1, %edi
216 ; SSE2-NEXT: cvtsi2ss %edi, %xmm0
217 ; SSE2-NEXT: callq __truncsfhf2@PLT
218 ; SSE2-NEXT: popq %rax
221 ; AVX-LABEL: uitofp_i1tof16:
223 ; AVX-NEXT: andl $1, %edi
224 ; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
225 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
226 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
227 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
228 ; AVX-NEXT: vmovd %xmm0, %eax
229 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
232 ; X86-LABEL: uitofp_i1tof16:
234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
235 ; X86-NEXT: andb $1, %al
236 ; X86-NEXT: movzbl %al, %eax
237 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
240 ; X64-LABEL: uitofp_i1tof16:
242 ; X64-NEXT: andl $1, %edi
243 ; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
245 %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
246 metadata !"round.dynamic",
247 metadata !"fpexcept.strict") #0
251 define half @uitofp_i8tof16(i8 %x) #0 {
252 ; SSE2-LABEL: uitofp_i8tof16:
254 ; SSE2-NEXT: pushq %rax
255 ; SSE2-NEXT: movzbl %dil, %eax
256 ; SSE2-NEXT: cvtsi2ss %eax, %xmm0
257 ; SSE2-NEXT: callq __truncsfhf2@PLT
258 ; SSE2-NEXT: popq %rax
261 ; AVX-LABEL: uitofp_i8tof16:
263 ; AVX-NEXT: movzbl %dil, %eax
264 ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
265 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
266 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
267 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
268 ; AVX-NEXT: vmovd %xmm0, %eax
269 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
272 ; X86-LABEL: uitofp_i8tof16:
274 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
275 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
278 ; X64-LABEL: uitofp_i8tof16:
280 ; X64-NEXT: movzbl %dil, %eax
281 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
283 %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
284 metadata !"round.dynamic",
285 metadata !"fpexcept.strict") #0
289 define half @uitofp_i16tof16(i16 %x) #0 {
290 ; SSE2-LABEL: uitofp_i16tof16:
292 ; SSE2-NEXT: pushq %rax
293 ; SSE2-NEXT: movzwl %di, %eax
294 ; SSE2-NEXT: cvtsi2ss %eax, %xmm0
295 ; SSE2-NEXT: callq __truncsfhf2@PLT
296 ; SSE2-NEXT: popq %rax
299 ; AVX-LABEL: uitofp_i16tof16:
301 ; AVX-NEXT: movzwl %di, %eax
302 ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
303 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
304 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
305 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
306 ; AVX-NEXT: vmovd %xmm0, %eax
307 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
310 ; X86-LABEL: uitofp_i16tof16:
312 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
313 ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
316 ; X64-LABEL: uitofp_i16tof16:
318 ; X64-NEXT: movzwl %di, %eax
319 ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
321 %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
322 metadata !"round.dynamic",
323 metadata !"fpexcept.strict") #0
327 define half @uitofp_i32tof16(i32 %x) #0 {
328 ; SSE2-LABEL: uitofp_i32tof16:
330 ; SSE2-NEXT: pushq %rax
331 ; SSE2-NEXT: movl %edi, %eax
332 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
333 ; SSE2-NEXT: callq __truncsfhf2@PLT
334 ; SSE2-NEXT: popq %rax
337 ; F16C-LABEL: uitofp_i32tof16:
339 ; F16C-NEXT: movl %edi, %eax
340 ; F16C-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
341 ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
342 ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
343 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
344 ; F16C-NEXT: vmovd %xmm0, %eax
345 ; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
348 ; AVX512-LABEL: uitofp_i32tof16:
350 ; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
351 ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
352 ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
353 ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
354 ; AVX512-NEXT: vmovd %xmm0, %eax
355 ; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
358 ; X86-LABEL: uitofp_i32tof16:
360 ; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
363 ; X64-LABEL: uitofp_i32tof16:
365 ; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
367 %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
368 metadata !"round.dynamic",
369 metadata !"fpexcept.strict") #0
373 define half @uitofp_i64tof16(i64 %x) #0 {
374 ; SSE2-LABEL: uitofp_i64tof16:
376 ; SSE2-NEXT: movq %rdi, %rax
377 ; SSE2-NEXT: shrq %rax
378 ; SSE2-NEXT: movl %edi, %ecx
379 ; SSE2-NEXT: andl $1, %ecx
380 ; SSE2-NEXT: orq %rax, %rcx
381 ; SSE2-NEXT: testq %rdi, %rdi
382 ; SSE2-NEXT: cmovnsq %rdi, %rcx
383 ; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
384 ; SSE2-NEXT: movaps %xmm1, %xmm0
385 ; SSE2-NEXT: addss %xmm1, %xmm0
386 ; SSE2-NEXT: js .LBB9_2
387 ; SSE2-NEXT: # %bb.1:
388 ; SSE2-NEXT: movaps %xmm1, %xmm0
389 ; SSE2-NEXT: .LBB9_2:
390 ; SSE2-NEXT: pushq %rax
391 ; SSE2-NEXT: callq __truncsfhf2@PLT
392 ; SSE2-NEXT: popq %rax
395 ; F16C-LABEL: uitofp_i64tof16:
397 ; F16C-NEXT: movq %rdi, %rax
398 ; F16C-NEXT: shrq %rax
399 ; F16C-NEXT: movl %edi, %ecx
400 ; F16C-NEXT: andl $1, %ecx
401 ; F16C-NEXT: orq %rax, %rcx
402 ; F16C-NEXT: testq %rdi, %rdi
403 ; F16C-NEXT: cmovnsq %rdi, %rcx
404 ; F16C-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
405 ; F16C-NEXT: jns .LBB9_2
406 ; F16C-NEXT: # %bb.1:
407 ; F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
408 ; F16C-NEXT: .LBB9_2:
409 ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
410 ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
411 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
412 ; F16C-NEXT: vmovd %xmm0, %eax
413 ; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
416 ; AVX512-LABEL: uitofp_i64tof16:
418 ; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
419 ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
420 ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
421 ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
422 ; AVX512-NEXT: vmovd %xmm0, %eax
423 ; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
426 ; X86-LABEL: uitofp_i64tof16:
428 ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
429 ; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
432 ; X64-LABEL: uitofp_i64tof16:
434 ; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
436 %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
437 metadata !"round.dynamic",
438 metadata !"fpexcept.strict") #0
442 attributes #0 = { strictfp nounwind }