1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
8 declare i1 @llvm.experimental.constrained.fptosi.i1.f16(half, metadata)
9 declare i8 @llvm.experimental.constrained.fptosi.i8.f16(half, metadata)
10 declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)
11 declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
12 declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
13 declare i1 @llvm.experimental.constrained.fptoui.i1.f16(half, metadata)
14 declare i8 @llvm.experimental.constrained.fptoui.i8.f16(half, metadata)
15 declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata)
16 declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
17 declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
19 define i1 @fptosi_f16toi1(half %x) #0 {
20 ; SSE2-LABEL: fptosi_f16toi1:
22 ; SSE2-NEXT: pushq %rax
23 ; SSE2-NEXT: callq __extendhfsf2@PLT
24 ; SSE2-NEXT: cvttss2si %xmm0, %eax
25 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
26 ; SSE2-NEXT: popq %rcx
29 ; AVX-LABEL: fptosi_f16toi1:
31 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
32 ; AVX-NEXT: movzwl %ax, %eax
33 ; AVX-NEXT: vmovd %eax, %xmm0
34 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
35 ; AVX-NEXT: vcvttss2si %xmm0, %eax
36 ; AVX-NEXT: # kill: def $al killed $al killed $eax
39 ; X86-LABEL: fptosi_f16toi1:
41 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
42 ; X86-NEXT: # kill: def $al killed $al killed $eax
45 ; X64-LABEL: fptosi_f16toi1:
47 ; X64-NEXT: vcvttsh2si %xmm0, %eax
48 ; X64-NEXT: # kill: def $al killed $al killed $eax
50 %result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x,
51 metadata !"fpexcept.strict") #0
55 define i8 @fptosi_f16toi8(half %x) #0 {
56 ; SSE2-LABEL: fptosi_f16toi8:
58 ; SSE2-NEXT: pushq %rax
59 ; SSE2-NEXT: callq __extendhfsf2@PLT
60 ; SSE2-NEXT: cvttss2si %xmm0, %eax
61 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
62 ; SSE2-NEXT: popq %rcx
65 ; AVX-LABEL: fptosi_f16toi8:
67 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
68 ; AVX-NEXT: movzwl %ax, %eax
69 ; AVX-NEXT: vmovd %eax, %xmm0
70 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
71 ; AVX-NEXT: vcvttss2si %xmm0, %eax
72 ; AVX-NEXT: # kill: def $al killed $al killed $eax
75 ; X86-LABEL: fptosi_f16toi8:
77 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
78 ; X86-NEXT: # kill: def $al killed $al killed $eax
81 ; X64-LABEL: fptosi_f16toi8:
83 ; X64-NEXT: vcvttsh2si %xmm0, %eax
84 ; X64-NEXT: # kill: def $al killed $al killed $eax
86 %result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x,
87 metadata !"fpexcept.strict") #0
91 define i16 @fptosi_f16toi16(half %x) #0 {
92 ; SSE2-LABEL: fptosi_f16toi16:
94 ; SSE2-NEXT: pushq %rax
95 ; SSE2-NEXT: callq __extendhfsf2@PLT
96 ; SSE2-NEXT: cvttss2si %xmm0, %eax
97 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
98 ; SSE2-NEXT: popq %rcx
101 ; AVX-LABEL: fptosi_f16toi16:
103 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
104 ; AVX-NEXT: movzwl %ax, %eax
105 ; AVX-NEXT: vmovd %eax, %xmm0
106 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
107 ; AVX-NEXT: vcvttss2si %xmm0, %eax
108 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
111 ; X86-LABEL: fptosi_f16toi16:
113 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
114 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
117 ; X64-LABEL: fptosi_f16toi16:
119 ; X64-NEXT: vcvttsh2si %xmm0, %eax
120 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
122 %result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x,
123 metadata !"fpexcept.strict") #0
127 define i32 @fptosi_f16toi32(half %x) #0 {
128 ; SSE2-LABEL: fptosi_f16toi32:
130 ; SSE2-NEXT: pushq %rax
131 ; SSE2-NEXT: callq __extendhfsf2@PLT
132 ; SSE2-NEXT: cvttss2si %xmm0, %eax
133 ; SSE2-NEXT: popq %rcx
136 ; AVX-LABEL: fptosi_f16toi32:
138 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
139 ; AVX-NEXT: movzwl %ax, %eax
140 ; AVX-NEXT: vmovd %eax, %xmm0
141 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
142 ; AVX-NEXT: vcvttss2si %xmm0, %eax
145 ; X86-LABEL: fptosi_f16toi32:
147 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
150 ; X64-LABEL: fptosi_f16toi32:
152 ; X64-NEXT: vcvttsh2si %xmm0, %eax
154 %result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x,
155 metadata !"fpexcept.strict") #0
159 define i64 @fptosi_f16toi64(half %x) #0 {
160 ; SSE2-LABEL: fptosi_f16toi64:
162 ; SSE2-NEXT: pushq %rax
163 ; SSE2-NEXT: callq __extendhfsf2@PLT
164 ; SSE2-NEXT: cvttss2si %xmm0, %rax
165 ; SSE2-NEXT: popq %rcx
168 ; AVX-LABEL: fptosi_f16toi64:
170 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
171 ; AVX-NEXT: movzwl %ax, %eax
172 ; AVX-NEXT: vmovd %eax, %xmm0
173 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
174 ; AVX-NEXT: vcvttss2si %xmm0, %rax
177 ; X86-LABEL: fptosi_f16toi64:
179 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
180 ; X86-NEXT: vcvttph2qq %xmm0, %xmm0
181 ; X86-NEXT: vmovd %xmm0, %eax
182 ; X86-NEXT: vpextrd $1, %xmm0, %edx
185 ; X64-LABEL: fptosi_f16toi64:
187 ; X64-NEXT: vcvttsh2si %xmm0, %rax
189 %result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x,
190 metadata !"fpexcept.strict") #0
194 define i1 @fptoui_f16toi1(half %x) #0 {
195 ; SSE2-LABEL: fptoui_f16toi1:
197 ; SSE2-NEXT: pushq %rax
198 ; SSE2-NEXT: callq __extendhfsf2@PLT
199 ; SSE2-NEXT: cvttss2si %xmm0, %eax
200 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
201 ; SSE2-NEXT: popq %rcx
204 ; AVX-LABEL: fptoui_f16toi1:
206 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
207 ; AVX-NEXT: movzwl %ax, %eax
208 ; AVX-NEXT: vmovd %eax, %xmm0
209 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
210 ; AVX-NEXT: vcvttss2si %xmm0, %eax
211 ; AVX-NEXT: # kill: def $al killed $al killed $eax
214 ; X86-LABEL: fptoui_f16toi1:
216 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
217 ; X86-NEXT: # kill: def $al killed $al killed $eax
220 ; X64-LABEL: fptoui_f16toi1:
222 ; X64-NEXT: vcvttsh2si %xmm0, %eax
223 ; X64-NEXT: # kill: def $al killed $al killed $eax
225 %result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x,
226 metadata !"fpexcept.strict") #0
230 define i8 @fptoui_f16toi8(half %x) #0 {
231 ; SSE2-LABEL: fptoui_f16toi8:
233 ; SSE2-NEXT: pushq %rax
234 ; SSE2-NEXT: callq __extendhfsf2@PLT
235 ; SSE2-NEXT: cvttss2si %xmm0, %eax
236 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
237 ; SSE2-NEXT: popq %rcx
240 ; AVX-LABEL: fptoui_f16toi8:
242 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
243 ; AVX-NEXT: movzwl %ax, %eax
244 ; AVX-NEXT: vmovd %eax, %xmm0
245 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
246 ; AVX-NEXT: vcvttss2si %xmm0, %eax
247 ; AVX-NEXT: # kill: def $al killed $al killed $eax
250 ; X86-LABEL: fptoui_f16toi8:
252 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
253 ; X86-NEXT: # kill: def $al killed $al killed $eax
256 ; X64-LABEL: fptoui_f16toi8:
258 ; X64-NEXT: vcvttsh2si %xmm0, %eax
259 ; X64-NEXT: # kill: def $al killed $al killed $eax
261 %result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x,
262 metadata !"fpexcept.strict") #0
266 define i16 @fptoui_f16toi16(half %x) #0 {
267 ; SSE2-LABEL: fptoui_f16toi16:
269 ; SSE2-NEXT: pushq %rax
270 ; SSE2-NEXT: callq __extendhfsf2@PLT
271 ; SSE2-NEXT: cvttss2si %xmm0, %eax
272 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
273 ; SSE2-NEXT: popq %rcx
276 ; AVX-LABEL: fptoui_f16toi16:
278 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
279 ; AVX-NEXT: movzwl %ax, %eax
280 ; AVX-NEXT: vmovd %eax, %xmm0
281 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
282 ; AVX-NEXT: vcvttss2si %xmm0, %eax
283 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
286 ; X86-LABEL: fptoui_f16toi16:
288 ; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
289 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
292 ; X64-LABEL: fptoui_f16toi16:
294 ; X64-NEXT: vcvttsh2si %xmm0, %eax
295 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
297 %result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x,
298 metadata !"fpexcept.strict") #0
302 define i32 @fptoui_f16toi32(half %x) #0 {
303 ; SSE2-LABEL: fptoui_f16toi32:
305 ; SSE2-NEXT: pushq %rax
306 ; SSE2-NEXT: callq __extendhfsf2@PLT
307 ; SSE2-NEXT: cvttss2si %xmm0, %rax
308 ; SSE2-NEXT: # kill: def $eax killed $eax killed $rax
309 ; SSE2-NEXT: popq %rcx
312 ; F16C-LABEL: fptoui_f16toi32:
314 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
315 ; F16C-NEXT: movzwl %ax, %eax
316 ; F16C-NEXT: vmovd %eax, %xmm0
317 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
318 ; F16C-NEXT: vcvttss2si %xmm0, %rax
319 ; F16C-NEXT: # kill: def $eax killed $eax killed $rax
322 ; AVX512-LABEL: fptoui_f16toi32:
324 ; AVX512-NEXT: vpextrw $0, %xmm0, %eax
325 ; AVX512-NEXT: movzwl %ax, %eax
326 ; AVX512-NEXT: vmovd %eax, %xmm0
327 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
328 ; AVX512-NEXT: vcvttss2usi %xmm0, %eax
331 ; X86-LABEL: fptoui_f16toi32:
333 ; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax
336 ; X64-LABEL: fptoui_f16toi32:
338 ; X64-NEXT: vcvttsh2usi %xmm0, %eax
340 %result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x,
341 metadata !"fpexcept.strict") #0
345 define i64 @fptoui_f16toi64(half %x) #0 {
346 ; SSE2-LABEL: fptoui_f16toi64:
348 ; SSE2-NEXT: pushq %rax
349 ; SSE2-NEXT: callq __extendhfsf2@PLT
350 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
351 ; SSE2-NEXT: comiss %xmm2, %xmm0
352 ; SSE2-NEXT: xorps %xmm1, %xmm1
353 ; SSE2-NEXT: jb .LBB9_2
354 ; SSE2-NEXT: # %bb.1:
355 ; SSE2-NEXT: movaps %xmm2, %xmm1
356 ; SSE2-NEXT: .LBB9_2:
357 ; SSE2-NEXT: subss %xmm1, %xmm0
358 ; SSE2-NEXT: cvttss2si %xmm0, %rcx
359 ; SSE2-NEXT: setae %al
360 ; SSE2-NEXT: movzbl %al, %eax
361 ; SSE2-NEXT: shlq $63, %rax
362 ; SSE2-NEXT: xorq %rcx, %rax
363 ; SSE2-NEXT: popq %rcx
366 ; F16C-LABEL: fptoui_f16toi64:
368 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
369 ; F16C-NEXT: movzwl %ax, %eax
370 ; F16C-NEXT: vmovd %eax, %xmm0
371 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
372 ; F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
373 ; F16C-NEXT: vcomiss %xmm1, %xmm0
374 ; F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2
375 ; F16C-NEXT: jb .LBB9_2
376 ; F16C-NEXT: # %bb.1:
377 ; F16C-NEXT: vmovaps %xmm1, %xmm2
378 ; F16C-NEXT: .LBB9_2:
379 ; F16C-NEXT: vsubss %xmm2, %xmm0, %xmm0
380 ; F16C-NEXT: vcvttss2si %xmm0, %rcx
381 ; F16C-NEXT: setae %al
382 ; F16C-NEXT: movzbl %al, %eax
383 ; F16C-NEXT: shlq $63, %rax
384 ; F16C-NEXT: xorq %rcx, %rax
387 ; AVX512-LABEL: fptoui_f16toi64:
389 ; AVX512-NEXT: vpextrw $0, %xmm0, %eax
390 ; AVX512-NEXT: movzwl %ax, %eax
391 ; AVX512-NEXT: vmovd %eax, %xmm0
392 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
393 ; AVX512-NEXT: vcvttss2usi %xmm0, %rax
396 ; X86-LABEL: fptoui_f16toi64:
398 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
399 ; X86-NEXT: vcvttph2uqq %xmm0, %xmm0
400 ; X86-NEXT: vmovd %xmm0, %eax
401 ; X86-NEXT: vpextrd $1, %xmm0, %edx
404 ; X64-LABEL: fptoui_f16toi64:
406 ; X64-NEXT: vcvttsh2usi %xmm0, %rax
408 %result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x,
409 metadata !"fpexcept.strict") #0
413 attributes #0 = { strictfp nounwind }