1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X86
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X64
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X86
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X64
9 declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
10 declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
11 declare float @llvm.experimental.constrained.floor.f32(float, metadata)
12 declare double @llvm.experimental.constrained.floor.f64(double, metadata)
13 declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
14 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
15 declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
16 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
17 declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
18 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
19 declare float @llvm.experimental.constrained.round.f32(float, metadata)
20 declare double @llvm.experimental.constrained.round.f64(double, metadata)
21 declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
22 declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
24 define float @fceil32(float %f) #0 {
25 ; SSE41-X86-LABEL: fceil32:
27 ; SSE41-X86-NEXT: pushl %eax
28 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
29 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
30 ; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
31 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
32 ; SSE41-X86-NEXT: flds (%esp)
33 ; SSE41-X86-NEXT: wait
34 ; SSE41-X86-NEXT: popl %eax
35 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
36 ; SSE41-X86-NEXT: retl
38 ; SSE41-X64-LABEL: fceil32:
40 ; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0
41 ; SSE41-X64-NEXT: retq
43 ; AVX-X86-LABEL: fceil32:
45 ; AVX-X86-NEXT: pushl %eax
46 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
47 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
48 ; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
49 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
50 ; AVX-X86-NEXT: flds (%esp)
52 ; AVX-X86-NEXT: popl %eax
53 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
56 ; AVX-X64-LABEL: fceil32:
58 ; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
60 %res = call float @llvm.experimental.constrained.ceil.f32(
61 float %f, metadata !"fpexcept.strict") #0
65 define double @fceilf64(double %f) #0 {
66 ; SSE41-X86-LABEL: fceilf64:
68 ; SSE41-X86-NEXT: pushl %ebp
69 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
70 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
71 ; SSE41-X86-NEXT: movl %esp, %ebp
72 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
73 ; SSE41-X86-NEXT: andl $-8, %esp
74 ; SSE41-X86-NEXT: subl $8, %esp
75 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
76 ; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
77 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
78 ; SSE41-X86-NEXT: fldl (%esp)
79 ; SSE41-X86-NEXT: wait
80 ; SSE41-X86-NEXT: movl %ebp, %esp
81 ; SSE41-X86-NEXT: popl %ebp
82 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
83 ; SSE41-X86-NEXT: retl
85 ; SSE41-X64-LABEL: fceilf64:
87 ; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0
88 ; SSE41-X64-NEXT: retq
90 ; AVX-X86-LABEL: fceilf64:
92 ; AVX-X86-NEXT: pushl %ebp
93 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
94 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
95 ; AVX-X86-NEXT: movl %esp, %ebp
96 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
97 ; AVX-X86-NEXT: andl $-8, %esp
98 ; AVX-X86-NEXT: subl $8, %esp
99 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
100 ; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
101 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
102 ; AVX-X86-NEXT: fldl (%esp)
104 ; AVX-X86-NEXT: movl %ebp, %esp
105 ; AVX-X86-NEXT: popl %ebp
106 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
109 ; AVX-X64-LABEL: fceilf64:
111 ; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
113 %res = call double @llvm.experimental.constrained.ceil.f64(
114 double %f, metadata !"fpexcept.strict") #0
118 define float @ffloor32(float %f) #0 {
119 ; SSE41-X86-LABEL: ffloor32:
120 ; SSE41-X86: # %bb.0:
121 ; SSE41-X86-NEXT: pushl %eax
122 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
123 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
124 ; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
125 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
126 ; SSE41-X86-NEXT: flds (%esp)
127 ; SSE41-X86-NEXT: wait
128 ; SSE41-X86-NEXT: popl %eax
129 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
130 ; SSE41-X86-NEXT: retl
132 ; SSE41-X64-LABEL: ffloor32:
133 ; SSE41-X64: # %bb.0:
134 ; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0
135 ; SSE41-X64-NEXT: retq
137 ; AVX-X86-LABEL: ffloor32:
139 ; AVX-X86-NEXT: pushl %eax
140 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
141 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
142 ; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
143 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
144 ; AVX-X86-NEXT: flds (%esp)
146 ; AVX-X86-NEXT: popl %eax
147 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
150 ; AVX-X64-LABEL: ffloor32:
152 ; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
154 %res = call float @llvm.experimental.constrained.floor.f32(
155 float %f, metadata !"fpexcept.strict") #0
159 define double @ffloorf64(double %f) #0 {
160 ; SSE41-X86-LABEL: ffloorf64:
161 ; SSE41-X86: # %bb.0:
162 ; SSE41-X86-NEXT: pushl %ebp
163 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
164 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
165 ; SSE41-X86-NEXT: movl %esp, %ebp
166 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
167 ; SSE41-X86-NEXT: andl $-8, %esp
168 ; SSE41-X86-NEXT: subl $8, %esp
169 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
170 ; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
171 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
172 ; SSE41-X86-NEXT: fldl (%esp)
173 ; SSE41-X86-NEXT: wait
174 ; SSE41-X86-NEXT: movl %ebp, %esp
175 ; SSE41-X86-NEXT: popl %ebp
176 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
177 ; SSE41-X86-NEXT: retl
179 ; SSE41-X64-LABEL: ffloorf64:
180 ; SSE41-X64: # %bb.0:
181 ; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0
182 ; SSE41-X64-NEXT: retq
184 ; AVX-X86-LABEL: ffloorf64:
186 ; AVX-X86-NEXT: pushl %ebp
187 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
188 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
189 ; AVX-X86-NEXT: movl %esp, %ebp
190 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
191 ; AVX-X86-NEXT: andl $-8, %esp
192 ; AVX-X86-NEXT: subl $8, %esp
193 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
194 ; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
195 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
196 ; AVX-X86-NEXT: fldl (%esp)
198 ; AVX-X86-NEXT: movl %ebp, %esp
199 ; AVX-X86-NEXT: popl %ebp
200 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
203 ; AVX-X64-LABEL: ffloorf64:
205 ; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
207 %res = call double @llvm.experimental.constrained.floor.f64(
208 double %f, metadata !"fpexcept.strict") #0
212 define float @ftrunc32(float %f) #0 {
213 ; SSE41-X86-LABEL: ftrunc32:
214 ; SSE41-X86: # %bb.0:
215 ; SSE41-X86-NEXT: pushl %eax
216 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
217 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
218 ; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
219 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
220 ; SSE41-X86-NEXT: flds (%esp)
221 ; SSE41-X86-NEXT: wait
222 ; SSE41-X86-NEXT: popl %eax
223 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
224 ; SSE41-X86-NEXT: retl
226 ; SSE41-X64-LABEL: ftrunc32:
227 ; SSE41-X64: # %bb.0:
228 ; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0
229 ; SSE41-X64-NEXT: retq
231 ; AVX-X86-LABEL: ftrunc32:
233 ; AVX-X86-NEXT: pushl %eax
234 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
235 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
236 ; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
237 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
238 ; AVX-X86-NEXT: flds (%esp)
240 ; AVX-X86-NEXT: popl %eax
241 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
244 ; AVX-X64-LABEL: ftrunc32:
246 ; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
248 %res = call float @llvm.experimental.constrained.trunc.f32(
249 float %f, metadata !"fpexcept.strict") #0
253 define double @ftruncf64(double %f) #0 {
254 ; SSE41-X86-LABEL: ftruncf64:
255 ; SSE41-X86: # %bb.0:
256 ; SSE41-X86-NEXT: pushl %ebp
257 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
258 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
259 ; SSE41-X86-NEXT: movl %esp, %ebp
260 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
261 ; SSE41-X86-NEXT: andl $-8, %esp
262 ; SSE41-X86-NEXT: subl $8, %esp
263 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
264 ; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
265 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
266 ; SSE41-X86-NEXT: fldl (%esp)
267 ; SSE41-X86-NEXT: wait
268 ; SSE41-X86-NEXT: movl %ebp, %esp
269 ; SSE41-X86-NEXT: popl %ebp
270 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
271 ; SSE41-X86-NEXT: retl
273 ; SSE41-X64-LABEL: ftruncf64:
274 ; SSE41-X64: # %bb.0:
275 ; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0
276 ; SSE41-X64-NEXT: retq
278 ; AVX-X86-LABEL: ftruncf64:
280 ; AVX-X86-NEXT: pushl %ebp
281 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
282 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
283 ; AVX-X86-NEXT: movl %esp, %ebp
284 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
285 ; AVX-X86-NEXT: andl $-8, %esp
286 ; AVX-X86-NEXT: subl $8, %esp
287 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
288 ; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
289 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
290 ; AVX-X86-NEXT: fldl (%esp)
292 ; AVX-X86-NEXT: movl %ebp, %esp
293 ; AVX-X86-NEXT: popl %ebp
294 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
297 ; AVX-X64-LABEL: ftruncf64:
299 ; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
301 %res = call double @llvm.experimental.constrained.trunc.f64(
302 double %f, metadata !"fpexcept.strict") #0
306 define float @frint32(float %f) #0 {
307 ; SSE41-X86-LABEL: frint32:
308 ; SSE41-X86: # %bb.0:
309 ; SSE41-X86-NEXT: pushl %eax
310 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
311 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
312 ; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
313 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
314 ; SSE41-X86-NEXT: flds (%esp)
315 ; SSE41-X86-NEXT: wait
316 ; SSE41-X86-NEXT: popl %eax
317 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
318 ; SSE41-X86-NEXT: retl
320 ; SSE41-X64-LABEL: frint32:
321 ; SSE41-X64: # %bb.0:
322 ; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0
323 ; SSE41-X64-NEXT: retq
325 ; AVX-X86-LABEL: frint32:
327 ; AVX-X86-NEXT: pushl %eax
328 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
329 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330 ; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
331 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
332 ; AVX-X86-NEXT: flds (%esp)
334 ; AVX-X86-NEXT: popl %eax
335 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
338 ; AVX-X64-LABEL: frint32:
340 ; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
342 %res = call float @llvm.experimental.constrained.rint.f32(
344 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
348 define double @frintf64(double %f) #0 {
349 ; SSE41-X86-LABEL: frintf64:
350 ; SSE41-X86: # %bb.0:
351 ; SSE41-X86-NEXT: pushl %ebp
352 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
353 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
354 ; SSE41-X86-NEXT: movl %esp, %ebp
355 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
356 ; SSE41-X86-NEXT: andl $-8, %esp
357 ; SSE41-X86-NEXT: subl $8, %esp
358 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
359 ; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
360 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
361 ; SSE41-X86-NEXT: fldl (%esp)
362 ; SSE41-X86-NEXT: wait
363 ; SSE41-X86-NEXT: movl %ebp, %esp
364 ; SSE41-X86-NEXT: popl %ebp
365 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
366 ; SSE41-X86-NEXT: retl
368 ; SSE41-X64-LABEL: frintf64:
369 ; SSE41-X64: # %bb.0:
370 ; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0
371 ; SSE41-X64-NEXT: retq
373 ; AVX-X86-LABEL: frintf64:
375 ; AVX-X86-NEXT: pushl %ebp
376 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
377 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
378 ; AVX-X86-NEXT: movl %esp, %ebp
379 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
380 ; AVX-X86-NEXT: andl $-8, %esp
381 ; AVX-X86-NEXT: subl $8, %esp
382 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
383 ; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
384 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
385 ; AVX-X86-NEXT: fldl (%esp)
387 ; AVX-X86-NEXT: movl %ebp, %esp
388 ; AVX-X86-NEXT: popl %ebp
389 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
392 ; AVX-X64-LABEL: frintf64:
394 ; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
396 %res = call double @llvm.experimental.constrained.rint.f64(
398 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
402 define float @fnearbyint32(float %f) #0 {
403 ; SSE41-X86-LABEL: fnearbyint32:
404 ; SSE41-X86: # %bb.0:
405 ; SSE41-X86-NEXT: pushl %eax
406 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
407 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
408 ; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
409 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
410 ; SSE41-X86-NEXT: flds (%esp)
411 ; SSE41-X86-NEXT: wait
412 ; SSE41-X86-NEXT: popl %eax
413 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
414 ; SSE41-X86-NEXT: retl
416 ; SSE41-X64-LABEL: fnearbyint32:
417 ; SSE41-X64: # %bb.0:
418 ; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0
419 ; SSE41-X64-NEXT: retq
421 ; AVX-X86-LABEL: fnearbyint32:
423 ; AVX-X86-NEXT: pushl %eax
424 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
425 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
426 ; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
427 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
428 ; AVX-X86-NEXT: flds (%esp)
430 ; AVX-X86-NEXT: popl %eax
431 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
434 ; AVX-X64-LABEL: fnearbyint32:
436 ; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
438 %res = call float @llvm.experimental.constrained.nearbyint.f32(
440 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
444 define double @fnearbyintf64(double %f) #0 {
445 ; SSE41-X86-LABEL: fnearbyintf64:
446 ; SSE41-X86: # %bb.0:
447 ; SSE41-X86-NEXT: pushl %ebp
448 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
449 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
450 ; SSE41-X86-NEXT: movl %esp, %ebp
451 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
452 ; SSE41-X86-NEXT: andl $-8, %esp
453 ; SSE41-X86-NEXT: subl $8, %esp
454 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
455 ; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
456 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
457 ; SSE41-X86-NEXT: fldl (%esp)
458 ; SSE41-X86-NEXT: wait
459 ; SSE41-X86-NEXT: movl %ebp, %esp
460 ; SSE41-X86-NEXT: popl %ebp
461 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
462 ; SSE41-X86-NEXT: retl
464 ; SSE41-X64-LABEL: fnearbyintf64:
465 ; SSE41-X64: # %bb.0:
466 ; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0
467 ; SSE41-X64-NEXT: retq
469 ; AVX-X86-LABEL: fnearbyintf64:
471 ; AVX-X86-NEXT: pushl %ebp
472 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
473 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
474 ; AVX-X86-NEXT: movl %esp, %ebp
475 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
476 ; AVX-X86-NEXT: andl $-8, %esp
477 ; AVX-X86-NEXT: subl $8, %esp
478 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
479 ; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
480 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
481 ; AVX-X86-NEXT: fldl (%esp)
483 ; AVX-X86-NEXT: movl %ebp, %esp
484 ; AVX-X86-NEXT: popl %ebp
485 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
488 ; AVX-X64-LABEL: fnearbyintf64:
490 ; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
492 %res = call double @llvm.experimental.constrained.nearbyint.f64(
494 metadata !"round.dynamic", metadata !"fpexcept.strict") #0
498 define float @fround32(float %f) #0 {
499 ; SSE41-X86-LABEL: fround32:
500 ; SSE41-X86: # %bb.0:
501 ; SSE41-X86-NEXT: pushl %eax
502 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
503 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
504 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
505 ; SSE41-X86-NEXT: calll roundf
506 ; SSE41-X86-NEXT: popl %eax
507 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
508 ; SSE41-X86-NEXT: retl
510 ; SSE41-X64-LABEL: fround32:
511 ; SSE41-X64: # %bb.0:
512 ; SSE41-X64-NEXT: pushq %rax
513 ; SSE41-X64-NEXT: .cfi_def_cfa_offset 16
514 ; SSE41-X64-NEXT: callq roundf@PLT
515 ; SSE41-X64-NEXT: popq %rax
516 ; SSE41-X64-NEXT: .cfi_def_cfa_offset 8
517 ; SSE41-X64-NEXT: retq
519 ; AVX-X86-LABEL: fround32:
521 ; AVX-X86-NEXT: pushl %eax
522 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
523 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
524 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
525 ; AVX-X86-NEXT: calll roundf
526 ; AVX-X86-NEXT: popl %eax
527 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
530 ; AVX-X64-LABEL: fround32:
532 ; AVX-X64-NEXT: pushq %rax
533 ; AVX-X64-NEXT: .cfi_def_cfa_offset 16
534 ; AVX-X64-NEXT: callq roundf@PLT
535 ; AVX-X64-NEXT: popq %rax
536 ; AVX-X64-NEXT: .cfi_def_cfa_offset 8
538 %res = call float @llvm.experimental.constrained.round.f32(
539 float %f, metadata !"fpexcept.strict") #0
543 define double @froundf64(double %f) #0 {
544 ; SSE41-X86-LABEL: froundf64:
545 ; SSE41-X86: # %bb.0:
546 ; SSE41-X86-NEXT: subl $8, %esp
547 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 12
548 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
549 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
550 ; SSE41-X86-NEXT: calll round
551 ; SSE41-X86-NEXT: addl $8, %esp
552 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
553 ; SSE41-X86-NEXT: retl
555 ; SSE41-X64-LABEL: froundf64:
556 ; SSE41-X64: # %bb.0:
557 ; SSE41-X64-NEXT: pushq %rax
558 ; SSE41-X64-NEXT: .cfi_def_cfa_offset 16
559 ; SSE41-X64-NEXT: callq round@PLT
560 ; SSE41-X64-NEXT: popq %rax
561 ; SSE41-X64-NEXT: .cfi_def_cfa_offset 8
562 ; SSE41-X64-NEXT: retq
564 ; AVX-X86-LABEL: froundf64:
566 ; AVX-X86-NEXT: subl $8, %esp
567 ; AVX-X86-NEXT: .cfi_def_cfa_offset 12
568 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
569 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
570 ; AVX-X86-NEXT: calll round
571 ; AVX-X86-NEXT: addl $8, %esp
572 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
575 ; AVX-X64-LABEL: froundf64:
577 ; AVX-X64-NEXT: pushq %rax
578 ; AVX-X64-NEXT: .cfi_def_cfa_offset 16
579 ; AVX-X64-NEXT: callq round@PLT
580 ; AVX-X64-NEXT: popq %rax
581 ; AVX-X64-NEXT: .cfi_def_cfa_offset 8
583 %res = call double @llvm.experimental.constrained.round.f64(
584 double %f, metadata !"fpexcept.strict") #0
588 define float @froundeven32(float %f) #0 {
589 ; SSE41-X86-LABEL: froundeven32:
590 ; SSE41-X86: # %bb.0:
591 ; SSE41-X86-NEXT: pushl %eax
592 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
593 ; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
594 ; SSE41-X86-NEXT: roundss $8, %xmm0, %xmm0
595 ; SSE41-X86-NEXT: movss %xmm0, (%esp)
596 ; SSE41-X86-NEXT: flds (%esp)
597 ; SSE41-X86-NEXT: wait
598 ; SSE41-X86-NEXT: popl %eax
599 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
600 ; SSE41-X86-NEXT: retl
602 ; SSE41-X64-LABEL: froundeven32:
603 ; SSE41-X64: # %bb.0:
604 ; SSE41-X64-NEXT: roundss $8, %xmm0, %xmm0
605 ; SSE41-X64-NEXT: retq
607 ; AVX-X86-LABEL: froundeven32:
609 ; AVX-X86-NEXT: pushl %eax
610 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
611 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
612 ; AVX-X86-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
613 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
614 ; AVX-X86-NEXT: flds (%esp)
616 ; AVX-X86-NEXT: popl %eax
617 ; AVX-X86-NEXT: .cfi_def_cfa_offset 4
620 ; AVX-X64-LABEL: froundeven32:
622 ; AVX-X64-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
624 %res = call float @llvm.experimental.constrained.roundeven.f32(
625 float %f, metadata !"fpexcept.strict") #0
629 define double @froundevenf64(double %f) #0 {
630 ; SSE41-X86-LABEL: froundevenf64:
631 ; SSE41-X86: # %bb.0:
632 ; SSE41-X86-NEXT: pushl %ebp
633 ; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
634 ; SSE41-X86-NEXT: .cfi_offset %ebp, -8
635 ; SSE41-X86-NEXT: movl %esp, %ebp
636 ; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
637 ; SSE41-X86-NEXT: andl $-8, %esp
638 ; SSE41-X86-NEXT: subl $8, %esp
639 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
640 ; SSE41-X86-NEXT: roundsd $8, %xmm0, %xmm0
641 ; SSE41-X86-NEXT: movsd %xmm0, (%esp)
642 ; SSE41-X86-NEXT: fldl (%esp)
643 ; SSE41-X86-NEXT: wait
644 ; SSE41-X86-NEXT: movl %ebp, %esp
645 ; SSE41-X86-NEXT: popl %ebp
646 ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
647 ; SSE41-X86-NEXT: retl
649 ; SSE41-X64-LABEL: froundevenf64:
650 ; SSE41-X64: # %bb.0:
651 ; SSE41-X64-NEXT: roundsd $8, %xmm0, %xmm0
652 ; SSE41-X64-NEXT: retq
654 ; AVX-X86-LABEL: froundevenf64:
656 ; AVX-X86-NEXT: pushl %ebp
657 ; AVX-X86-NEXT: .cfi_def_cfa_offset 8
658 ; AVX-X86-NEXT: .cfi_offset %ebp, -8
659 ; AVX-X86-NEXT: movl %esp, %ebp
660 ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
661 ; AVX-X86-NEXT: andl $-8, %esp
662 ; AVX-X86-NEXT: subl $8, %esp
663 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
664 ; AVX-X86-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0
665 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
666 ; AVX-X86-NEXT: fldl (%esp)
668 ; AVX-X86-NEXT: movl %ebp, %esp
669 ; AVX-X86-NEXT: popl %ebp
670 ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
673 ; AVX-X64-LABEL: froundevenf64:
675 ; AVX-X64-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0
677 %res = call double @llvm.experimental.constrained.roundeven.f64(
678 double %f, metadata !"fpexcept.strict") #0
682 attributes #0 = { strictfp }