1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -O3 | FileCheck %s --check-prefixes=X86
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -O3 | FileCheck %s --check-prefixes=X64
8 declare half @llvm.experimental.constrained.ceil.f16(half, metadata)
9 declare half @llvm.experimental.constrained.floor.f16(half, metadata)
10 declare half @llvm.experimental.constrained.trunc.f16(half, metadata)
11 declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
12 declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
13 declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
14 declare half @llvm.experimental.constrained.round.f16(half, metadata)
16 define half @fceil32(half %f) #0 {
17 ; SSE2-LABEL: fceil32:
19 ; SSE2-NEXT: pushq %rax
20 ; SSE2-NEXT: callq __extendhfsf2@PLT
21 ; SSE2-NEXT: callq ceilf@PLT
22 ; SSE2-NEXT: callq __truncsfhf2@PLT
23 ; SSE2-NEXT: popq %rax
28 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
29 ; AVX-NEXT: movzwl %ax, %eax
30 ; AVX-NEXT: vmovd %eax, %xmm0
31 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
32 ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
33 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
34 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
35 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
36 ; AVX-NEXT: vmovd %xmm0, %eax
37 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
42 ; X86-NEXT: vrndscalesh $10, {{[0-9]+}}(%esp), %xmm0, %xmm0
47 ; X64-NEXT: vrndscalesh $10, %xmm0, %xmm0, %xmm0
49 %res = call half @llvm.experimental.constrained.ceil.f16(
50 half %f, metadata !"fpexcept.strict") #0
54 define half @ffloor32(half %f) #0 {
55 ; SSE2-LABEL: ffloor32:
57 ; SSE2-NEXT: pushq %rax
58 ; SSE2-NEXT: callq __extendhfsf2@PLT
59 ; SSE2-NEXT: callq floorf@PLT
60 ; SSE2-NEXT: callq __truncsfhf2@PLT
61 ; SSE2-NEXT: popq %rax
64 ; AVX-LABEL: ffloor32:
66 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
67 ; AVX-NEXT: movzwl %ax, %eax
68 ; AVX-NEXT: vmovd %eax, %xmm0
69 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
70 ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
71 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
72 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
73 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
74 ; AVX-NEXT: vmovd %xmm0, %eax
75 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
78 ; X86-LABEL: ffloor32:
80 ; X86-NEXT: vrndscalesh $9, {{[0-9]+}}(%esp), %xmm0, %xmm0
83 ; X64-LABEL: ffloor32:
85 ; X64-NEXT: vrndscalesh $9, %xmm0, %xmm0, %xmm0
87 %res = call half @llvm.experimental.constrained.floor.f16(
88 half %f, metadata !"fpexcept.strict") #0
92 define half @ftrunc32(half %f) #0 {
93 ; SSE2-LABEL: ftrunc32:
95 ; SSE2-NEXT: pushq %rax
96 ; SSE2-NEXT: callq __extendhfsf2@PLT
97 ; SSE2-NEXT: callq truncf@PLT
98 ; SSE2-NEXT: callq __truncsfhf2@PLT
99 ; SSE2-NEXT: popq %rax
102 ; AVX-LABEL: ftrunc32:
104 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
105 ; AVX-NEXT: movzwl %ax, %eax
106 ; AVX-NEXT: vmovd %eax, %xmm0
107 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
108 ; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
109 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
110 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
111 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
112 ; AVX-NEXT: vmovd %xmm0, %eax
113 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
116 ; X86-LABEL: ftrunc32:
118 ; X86-NEXT: vrndscalesh $11, {{[0-9]+}}(%esp), %xmm0, %xmm0
121 ; X64-LABEL: ftrunc32:
123 ; X64-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0
125 %res = call half @llvm.experimental.constrained.trunc.f16(
126 half %f, metadata !"fpexcept.strict") #0
130 define half @frint32(half %f) #0 {
131 ; SSE2-LABEL: frint32:
133 ; SSE2-NEXT: pushq %rax
134 ; SSE2-NEXT: callq __extendhfsf2@PLT
135 ; SSE2-NEXT: callq rintf@PLT
136 ; SSE2-NEXT: callq __truncsfhf2@PLT
137 ; SSE2-NEXT: popq %rax
140 ; AVX-LABEL: frint32:
142 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
143 ; AVX-NEXT: movzwl %ax, %eax
144 ; AVX-NEXT: vmovd %eax, %xmm0
145 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
146 ; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
147 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
148 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
149 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
150 ; AVX-NEXT: vmovd %xmm0, %eax
151 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
154 ; X86-LABEL: frint32:
156 ; X86-NEXT: vrndscalesh $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
159 ; X64-LABEL: frint32:
161 ; X64-NEXT: vrndscalesh $4, %xmm0, %xmm0, %xmm0
163 %res = call half @llvm.experimental.constrained.rint.f16(
165 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
169 define half @fnearbyint32(half %f) #0 {
170 ; SSE2-LABEL: fnearbyint32:
172 ; SSE2-NEXT: pushq %rax
173 ; SSE2-NEXT: callq __extendhfsf2@PLT
174 ; SSE2-NEXT: callq nearbyintf@PLT
175 ; SSE2-NEXT: callq __truncsfhf2@PLT
176 ; SSE2-NEXT: popq %rax
179 ; AVX-LABEL: fnearbyint32:
181 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
182 ; AVX-NEXT: movzwl %ax, %eax
183 ; AVX-NEXT: vmovd %eax, %xmm0
184 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
185 ; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
186 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
187 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
188 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
189 ; AVX-NEXT: vmovd %xmm0, %eax
190 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
193 ; X86-LABEL: fnearbyint32:
195 ; X86-NEXT: vrndscalesh $12, {{[0-9]+}}(%esp), %xmm0, %xmm0
198 ; X64-LABEL: fnearbyint32:
200 ; X64-NEXT: vrndscalesh $12, %xmm0, %xmm0, %xmm0
202 %res = call half @llvm.experimental.constrained.nearbyint.f16(
204 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
208 define half @froundeven16(half %f) #0 {
209 ; SSE2-LABEL: froundeven16:
211 ; SSE2-NEXT: pushq %rax
212 ; SSE2-NEXT: callq __extendhfsf2@PLT
213 ; SSE2-NEXT: callq roundevenf@PLT
214 ; SSE2-NEXT: callq __truncsfhf2@PLT
215 ; SSE2-NEXT: popq %rax
218 ; AVX-LABEL: froundeven16:
220 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
221 ; AVX-NEXT: movzwl %ax, %eax
222 ; AVX-NEXT: vmovd %eax, %xmm0
223 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
224 ; AVX-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
225 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
226 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
227 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
228 ; AVX-NEXT: vmovd %xmm0, %eax
229 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
232 ; X86-LABEL: froundeven16:
234 ; X86-NEXT: vrndscalesh $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
237 ; X64-LABEL: froundeven16:
239 ; X64-NEXT: vrndscalesh $8, %xmm0, %xmm0, %xmm0
242 %res = call half @llvm.experimental.constrained.roundeven.f16(
243 half %f, metadata !"fpexcept.strict") #0
247 define half @fround16(half %f) #0 {
248 ; SSE2-LABEL: fround16:
250 ; SSE2-NEXT: pushq %rax
251 ; SSE2-NEXT: callq __extendhfsf2@PLT
252 ; SSE2-NEXT: callq roundf@PLT
253 ; SSE2-NEXT: callq __truncsfhf2@PLT
254 ; SSE2-NEXT: popq %rax
257 ; AVX-LABEL: fround16:
259 ; AVX-NEXT: pushq %rax
260 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
261 ; AVX-NEXT: movzwl %ax, %eax
262 ; AVX-NEXT: vmovd %eax, %xmm0
263 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
264 ; AVX-NEXT: callq roundf@PLT
265 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
266 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
267 ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
268 ; AVX-NEXT: vmovd %xmm0, %eax
269 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
270 ; AVX-NEXT: popq %rax
273 ; X86-LABEL: fround16:
275 ; X86-NEXT: subl $8, %esp
276 ; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
277 ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
278 ; X86-NEXT: vmovss %xmm0, (%esp)
279 ; X86-NEXT: calll roundf
280 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
282 ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
283 ; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
284 ; X86-NEXT: addl $8, %esp
287 ; X64-LABEL: fround16:
289 ; X64-NEXT: pushq %rax
290 ; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
291 ; X64-NEXT: callq roundf@PLT
292 ; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
293 ; X64-NEXT: popq %rax
296 %res = call half @llvm.experimental.constrained.round.f16(
297 half %f, metadata !"fpexcept.strict") #0
301 attributes #0 = { strictfp nounwind }