1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=X64_AVX1
5 ; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=X32_AVX1
7 declare i32 @llvm.fptoui.sat.i32.f32(float)
8 declare i64 @llvm.fptosi.sat.i64.f64(double)
10 define float @trunc_unsigned_f32(float %x) #0 {
11 ; SSE2-LABEL: trunc_unsigned_f32:
13 ; SSE2-NEXT: cvttss2si %xmm0, %rax
14 ; SSE2-NEXT: movl %eax, %eax
15 ; SSE2-NEXT: xorps %xmm0, %xmm0
16 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
19 ; SSE41-LABEL: trunc_unsigned_f32:
21 ; SSE41-NEXT: roundss $11, %xmm0, %xmm0
24 ; X64_AVX1-LABEL: trunc_unsigned_f32:
26 ; X64_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
29 ; X32_AVX1-LABEL: trunc_unsigned_f32:
31 ; X32_AVX1-NEXT: pushl %eax
32 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33 ; X32_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
34 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
35 ; X32_AVX1-NEXT: flds (%esp)
36 ; X32_AVX1-NEXT: popl %eax
38 %i = fptoui float %x to i32
39 %r = uitofp i32 %i to float
43 define double @trunc_unsigned_f64(double %x) #0 {
44 ; SSE2-LABEL: trunc_unsigned_f64:
46 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
47 ; SSE2-NEXT: movq %rax, %rcx
48 ; SSE2-NEXT: sarq $63, %rcx
49 ; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
50 ; SSE2-NEXT: cvttsd2si %xmm0, %rdx
51 ; SSE2-NEXT: andq %rcx, %rdx
52 ; SSE2-NEXT: orq %rax, %rdx
53 ; SSE2-NEXT: movq %rdx, %xmm1
54 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
55 ; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
56 ; SSE2-NEXT: movapd %xmm1, %xmm0
57 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
58 ; SSE2-NEXT: addsd %xmm1, %xmm0
61 ; SSE41-LABEL: trunc_unsigned_f64:
63 ; SSE41-NEXT: roundsd $11, %xmm0, %xmm0
66 ; X64_AVX1-LABEL: trunc_unsigned_f64:
68 ; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
71 ; X32_AVX1-LABEL: trunc_unsigned_f64:
73 ; X32_AVX1-NEXT: pushl %ebp
74 ; X32_AVX1-NEXT: movl %esp, %ebp
75 ; X32_AVX1-NEXT: andl $-8, %esp
76 ; X32_AVX1-NEXT: subl $8, %esp
77 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
78 ; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
79 ; X32_AVX1-NEXT: vmovsd %xmm0, (%esp)
80 ; X32_AVX1-NEXT: fldl (%esp)
81 ; X32_AVX1-NEXT: movl %ebp, %esp
82 ; X32_AVX1-NEXT: popl %ebp
84 %i = fptoui double %x to i64
85 %r = uitofp i64 %i to double
89 define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
90 ; SSE2-LABEL: trunc_unsigned_v4f32:
92 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm1
93 ; SSE2-NEXT: movdqa %xmm1, %xmm2
94 ; SSE2-NEXT: psrad $31, %xmm2
95 ; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
97 ; SSE2-NEXT: pand %xmm2, %xmm0
98 ; SSE2-NEXT: por %xmm1, %xmm0
99 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
100 ; SSE2-NEXT: pand %xmm0, %xmm1
101 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
102 ; SSE2-NEXT: psrld $16, %xmm0
103 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
104 ; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
105 ; SSE2-NEXT: addps %xmm1, %xmm0
108 ; SSE41-LABEL: trunc_unsigned_v4f32:
110 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0
113 ; X64_AVX1-LABEL: trunc_unsigned_v4f32:
115 ; X64_AVX1-NEXT: vroundps $11, %xmm0, %xmm0
116 ; X64_AVX1-NEXT: retq
118 ; X32_AVX1-LABEL: trunc_unsigned_v4f32:
120 ; X32_AVX1-NEXT: vroundps $11, %xmm0, %xmm0
121 ; X32_AVX1-NEXT: retl
122 %i = fptoui <4 x float> %x to <4 x i32>
123 %r = uitofp <4 x i32> %i to <4 x float>
127 define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
128 ; SSE2-LABEL: trunc_unsigned_v2f64:
130 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
131 ; SSE2-NEXT: movapd %xmm0, %xmm1
132 ; SSE2-NEXT: subsd %xmm2, %xmm1
133 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
134 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
135 ; SSE2-NEXT: movq %rcx, %rdx
136 ; SSE2-NEXT: sarq $63, %rdx
137 ; SSE2-NEXT: andq %rax, %rdx
138 ; SSE2-NEXT: orq %rcx, %rdx
139 ; SSE2-NEXT: movq %rdx, %xmm1
140 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
141 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
142 ; SSE2-NEXT: subsd %xmm2, %xmm0
143 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
144 ; SSE2-NEXT: movq %rax, %rdx
145 ; SSE2-NEXT: sarq $63, %rdx
146 ; SSE2-NEXT: andq %rcx, %rdx
147 ; SSE2-NEXT: orq %rax, %rdx
148 ; SSE2-NEXT: movq %rdx, %xmm0
149 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
150 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
151 ; SSE2-NEXT: pand %xmm1, %xmm0
152 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
153 ; SSE2-NEXT: psrlq $32, %xmm1
154 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
155 ; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
156 ; SSE2-NEXT: addpd %xmm0, %xmm1
157 ; SSE2-NEXT: movapd %xmm1, %xmm0
160 ; SSE41-LABEL: trunc_unsigned_v2f64:
162 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
165 ; X64_AVX1-LABEL: trunc_unsigned_v2f64:
167 ; X64_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
168 ; X64_AVX1-NEXT: retq
170 ; X32_AVX1-LABEL: trunc_unsigned_v2f64:
172 ; X32_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
173 ; X32_AVX1-NEXT: retl
174 %i = fptoui <2 x double> %x to <2 x i64>
175 %r = uitofp <2 x i64> %i to <2 x double>
179 define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
180 ; SSE2-LABEL: trunc_unsigned_v4f64:
182 ; SSE2-NEXT: movapd %xmm1, %xmm2
183 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
184 ; SSE2-NEXT: subsd %xmm3, %xmm1
185 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
186 ; SSE2-NEXT: cvttsd2si %xmm2, %rcx
187 ; SSE2-NEXT: movq %rcx, %rdx
188 ; SSE2-NEXT: sarq $63, %rdx
189 ; SSE2-NEXT: andq %rax, %rdx
190 ; SSE2-NEXT: orq %rcx, %rdx
191 ; SSE2-NEXT: movq %rdx, %xmm1
192 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
193 ; SSE2-NEXT: cvttsd2si %xmm2, %rax
194 ; SSE2-NEXT: subsd %xmm3, %xmm2
195 ; SSE2-NEXT: cvttsd2si %xmm2, %rcx
196 ; SSE2-NEXT: movq %rax, %rdx
197 ; SSE2-NEXT: sarq $63, %rdx
198 ; SSE2-NEXT: andq %rcx, %rdx
199 ; SSE2-NEXT: orq %rax, %rdx
200 ; SSE2-NEXT: movq %rdx, %xmm2
201 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
202 ; SSE2-NEXT: movapd %xmm0, %xmm2
203 ; SSE2-NEXT: subsd %xmm3, %xmm2
204 ; SSE2-NEXT: cvttsd2si %xmm2, %rax
205 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
206 ; SSE2-NEXT: movq %rcx, %rdx
207 ; SSE2-NEXT: sarq $63, %rdx
208 ; SSE2-NEXT: andq %rax, %rdx
209 ; SSE2-NEXT: orq %rcx, %rdx
210 ; SSE2-NEXT: movq %rdx, %xmm2
211 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
212 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
213 ; SSE2-NEXT: subsd %xmm3, %xmm0
214 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
215 ; SSE2-NEXT: movq %rax, %rdx
216 ; SSE2-NEXT: sarq $63, %rdx
217 ; SSE2-NEXT: andq %rcx, %rdx
218 ; SSE2-NEXT: orq %rax, %rdx
219 ; SSE2-NEXT: movq %rdx, %xmm0
220 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
221 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
222 ; SSE2-NEXT: movdqa %xmm2, %xmm3
223 ; SSE2-NEXT: pand %xmm0, %xmm3
224 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
225 ; SSE2-NEXT: por %xmm4, %xmm3
226 ; SSE2-NEXT: psrlq $32, %xmm2
227 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
228 ; SSE2-NEXT: por %xmm5, %xmm2
229 ; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
230 ; SSE2-NEXT: subpd %xmm6, %xmm2
231 ; SSE2-NEXT: addpd %xmm3, %xmm2
232 ; SSE2-NEXT: pand %xmm1, %xmm0
233 ; SSE2-NEXT: por %xmm4, %xmm0
234 ; SSE2-NEXT: psrlq $32, %xmm1
235 ; SSE2-NEXT: por %xmm5, %xmm1
236 ; SSE2-NEXT: subpd %xmm6, %xmm1
237 ; SSE2-NEXT: addpd %xmm0, %xmm1
238 ; SSE2-NEXT: movapd %xmm2, %xmm0
241 ; SSE41-LABEL: trunc_unsigned_v4f64:
243 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
244 ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1
247 ; X64_AVX1-LABEL: trunc_unsigned_v4f64:
249 ; X64_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
250 ; X64_AVX1-NEXT: retq
252 ; X32_AVX1-LABEL: trunc_unsigned_v4f64:
254 ; X32_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
255 ; X32_AVX1-NEXT: retl
256 %i = fptoui <4 x double> %x to <4 x i64>
257 %r = uitofp <4 x i64> %i to <4 x double>
261 define float @trunc_signed_f32_no_fast_math(float %x) {
262 ; SSE-LABEL: trunc_signed_f32_no_fast_math:
264 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
265 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
268 ; X64_AVX1-LABEL: trunc_signed_f32_no_fast_math:
270 ; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
271 ; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
272 ; X64_AVX1-NEXT: retq
274 ; X32_AVX1-LABEL: trunc_signed_f32_no_fast_math:
276 ; X32_AVX1-NEXT: pushl %eax
277 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 8
278 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
279 ; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
280 ; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
281 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
282 ; X32_AVX1-NEXT: flds (%esp)
283 ; X32_AVX1-NEXT: popl %eax
284 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 4
285 ; X32_AVX1-NEXT: retl
286 %i = fptosi float %x to i32
287 %r = sitofp i32 %i to float
291 ; Without -0.0, it is ok to use roundss if it is available.
293 define float @trunc_signed_f32_nsz(float %x) #0 {
294 ; SSE2-LABEL: trunc_signed_f32_nsz:
296 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
297 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
300 ; SSE41-LABEL: trunc_signed_f32_nsz:
302 ; SSE41-NEXT: roundss $11, %xmm0, %xmm0
305 ; X64_AVX1-LABEL: trunc_signed_f32_nsz:
307 ; X64_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
308 ; X64_AVX1-NEXT: retq
310 ; X32_AVX1-LABEL: trunc_signed_f32_nsz:
312 ; X32_AVX1-NEXT: pushl %eax
313 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
314 ; X32_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
315 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
316 ; X32_AVX1-NEXT: flds (%esp)
317 ; X32_AVX1-NEXT: popl %eax
318 ; X32_AVX1-NEXT: retl
319 %i = fptosi float %x to i32
320 %r = sitofp i32 %i to float
324 define double @trunc_signed32_f64_no_fast_math(double %x) {
325 ; SSE-LABEL: trunc_signed32_f64_no_fast_math:
327 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
328 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
331 ; X64_AVX1-LABEL: trunc_signed32_f64_no_fast_math:
333 ; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
334 ; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
335 ; X64_AVX1-NEXT: retq
337 ; X32_AVX1-LABEL: trunc_signed32_f64_no_fast_math:
339 ; X32_AVX1-NEXT: pushl %ebp
340 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 8
341 ; X32_AVX1-NEXT: .cfi_offset %ebp, -8
342 ; X32_AVX1-NEXT: movl %esp, %ebp
343 ; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp
344 ; X32_AVX1-NEXT: andl $-8, %esp
345 ; X32_AVX1-NEXT: subl $8, %esp
346 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
347 ; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
348 ; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
349 ; X32_AVX1-NEXT: vmovlps %xmm0, (%esp)
350 ; X32_AVX1-NEXT: fldl (%esp)
351 ; X32_AVX1-NEXT: movl %ebp, %esp
352 ; X32_AVX1-NEXT: popl %ebp
353 ; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4
354 ; X32_AVX1-NEXT: retl
355 %i = fptosi double %x to i32
356 %r = sitofp i32 %i to double
360 define double @trunc_signed32_f64_nsz(double %x) #0 {
361 ; SSE2-LABEL: trunc_signed32_f64_nsz:
363 ; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0
364 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
367 ; SSE41-LABEL: trunc_signed32_f64_nsz:
369 ; SSE41-NEXT: roundsd $11, %xmm0, %xmm0
372 ; X64_AVX1-LABEL: trunc_signed32_f64_nsz:
374 ; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
375 ; X64_AVX1-NEXT: retq
377 ; X32_AVX1-LABEL: trunc_signed32_f64_nsz:
379 ; X32_AVX1-NEXT: pushl %ebp
380 ; X32_AVX1-NEXT: movl %esp, %ebp
381 ; X32_AVX1-NEXT: andl $-8, %esp
382 ; X32_AVX1-NEXT: subl $8, %esp
383 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
384 ; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
385 ; X32_AVX1-NEXT: vmovsd %xmm0, (%esp)
386 ; X32_AVX1-NEXT: fldl (%esp)
387 ; X32_AVX1-NEXT: movl %ebp, %esp
388 ; X32_AVX1-NEXT: popl %ebp
389 ; X32_AVX1-NEXT: retl
390 %i = fptosi double %x to i32
391 %r = sitofp i32 %i to double
395 define double @trunc_f32_signed32_f64_no_fast_math(float %x) {
396 ; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math:
398 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
399 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
402 ; X64_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
404 ; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
405 ; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
406 ; X64_AVX1-NEXT: retq
408 ; X32_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
410 ; X32_AVX1-NEXT: pushl %ebp
411 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 8
412 ; X32_AVX1-NEXT: .cfi_offset %ebp, -8
413 ; X32_AVX1-NEXT: movl %esp, %ebp
414 ; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp
415 ; X32_AVX1-NEXT: andl $-8, %esp
416 ; X32_AVX1-NEXT: subl $8, %esp
417 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
418 ; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
419 ; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
420 ; X32_AVX1-NEXT: vmovlps %xmm0, (%esp)
421 ; X32_AVX1-NEXT: fldl (%esp)
422 ; X32_AVX1-NEXT: movl %ebp, %esp
423 ; X32_AVX1-NEXT: popl %ebp
424 ; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4
425 ; X32_AVX1-NEXT: retl
426 %i = fptosi float %x to i32
427 %r = sitofp i32 %i to double
431 define double @trunc_f32_signed32_f64_nsz(float %x) #0 {
432 ; SSE-LABEL: trunc_f32_signed32_f64_nsz:
434 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
435 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
438 ; X64_AVX1-LABEL: trunc_f32_signed32_f64_nsz:
440 ; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
441 ; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
442 ; X64_AVX1-NEXT: retq
444 ; X32_AVX1-LABEL: trunc_f32_signed32_f64_nsz:
446 ; X32_AVX1-NEXT: pushl %ebp
447 ; X32_AVX1-NEXT: movl %esp, %ebp
448 ; X32_AVX1-NEXT: andl $-8, %esp
449 ; X32_AVX1-NEXT: subl $8, %esp
450 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
451 ; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
452 ; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
453 ; X32_AVX1-NEXT: vmovlps %xmm0, (%esp)
454 ; X32_AVX1-NEXT: fldl (%esp)
455 ; X32_AVX1-NEXT: movl %ebp, %esp
456 ; X32_AVX1-NEXT: popl %ebp
457 ; X32_AVX1-NEXT: retl
458 %i = fptosi float %x to i32
459 %r = sitofp i32 %i to double
463 define float @trunc_f64_signed32_f32_no_fast_math(double %x) {
464 ; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math:
466 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
467 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
470 ; X64_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
472 ; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
473 ; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
474 ; X64_AVX1-NEXT: retq
476 ; X32_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
478 ; X32_AVX1-NEXT: pushl %eax
479 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 8
480 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
481 ; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
482 ; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
483 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
484 ; X32_AVX1-NEXT: flds (%esp)
485 ; X32_AVX1-NEXT: popl %eax
486 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 4
487 ; X32_AVX1-NEXT: retl
488 %i = fptosi double %x to i32
489 %r = sitofp i32 %i to float
493 define float @trunc_f64_signed32_f32_nsz(double %x) #0 {
494 ; SSE-LABEL: trunc_f64_signed32_f32_nsz:
496 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
497 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
500 ; X64_AVX1-LABEL: trunc_f64_signed32_f32_nsz:
502 ; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
503 ; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
504 ; X64_AVX1-NEXT: retq
506 ; X32_AVX1-LABEL: trunc_f64_signed32_f32_nsz:
508 ; X32_AVX1-NEXT: pushl %eax
509 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
510 ; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
511 ; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
512 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
513 ; X32_AVX1-NEXT: flds (%esp)
514 ; X32_AVX1-NEXT: popl %eax
515 ; X32_AVX1-NEXT: retl
516 %i = fptosi double %x to i32
517 %r = sitofp i32 %i to float
521 define double @trunc_signed_f64_no_fast_math(double %x) {
522 ; SSE-LABEL: trunc_signed_f64_no_fast_math:
524 ; SSE-NEXT: cvttsd2si %xmm0, %rax
525 ; SSE-NEXT: xorps %xmm0, %xmm0
526 ; SSE-NEXT: cvtsi2sd %rax, %xmm0
529 ; X64_AVX1-LABEL: trunc_signed_f64_no_fast_math:
531 ; X64_AVX1-NEXT: vcvttsd2si %xmm0, %rax
532 ; X64_AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
533 ; X64_AVX1-NEXT: retq
535 ; X32_AVX1-LABEL: trunc_signed_f64_no_fast_math:
537 ; X32_AVX1-NEXT: pushl %ebp
538 ; X32_AVX1-NEXT: .cfi_def_cfa_offset 8
539 ; X32_AVX1-NEXT: .cfi_offset %ebp, -8
540 ; X32_AVX1-NEXT: movl %esp, %ebp
541 ; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp
542 ; X32_AVX1-NEXT: andl $-8, %esp
543 ; X32_AVX1-NEXT: subl $24, %esp
544 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
545 ; X32_AVX1-NEXT: vmovsd %xmm0, (%esp)
546 ; X32_AVX1-NEXT: fldl (%esp)
547 ; X32_AVX1-NEXT: fisttpll (%esp)
548 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
549 ; X32_AVX1-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
550 ; X32_AVX1-NEXT: fildll {{[0-9]+}}(%esp)
551 ; X32_AVX1-NEXT: fstpl {{[0-9]+}}(%esp)
552 ; X32_AVX1-NEXT: fldl {{[0-9]+}}(%esp)
553 ; X32_AVX1-NEXT: movl %ebp, %esp
554 ; X32_AVX1-NEXT: popl %ebp
555 ; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4
556 ; X32_AVX1-NEXT: retl
557 %i = fptosi double %x to i64
558 %r = sitofp i64 %i to double
562 define double @trunc_signed_f64_nsz(double %x) #0 {
563 ; SSE2-LABEL: trunc_signed_f64_nsz:
565 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
566 ; SSE2-NEXT: xorps %xmm0, %xmm0
567 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
570 ; SSE41-LABEL: trunc_signed_f64_nsz:
572 ; SSE41-NEXT: roundsd $11, %xmm0, %xmm0
575 ; X64_AVX1-LABEL: trunc_signed_f64_nsz:
577 ; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
578 ; X64_AVX1-NEXT: retq
580 ; X32_AVX1-LABEL: trunc_signed_f64_nsz:
582 ; X32_AVX1-NEXT: pushl %ebp
583 ; X32_AVX1-NEXT: movl %esp, %ebp
584 ; X32_AVX1-NEXT: andl $-8, %esp
585 ; X32_AVX1-NEXT: subl $8, %esp
586 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
587 ; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
588 ; X32_AVX1-NEXT: vmovsd %xmm0, (%esp)
589 ; X32_AVX1-NEXT: fldl (%esp)
590 ; X32_AVX1-NEXT: movl %ebp, %esp
591 ; X32_AVX1-NEXT: popl %ebp
592 ; X32_AVX1-NEXT: retl
593 %i = fptosi double %x to i64
594 %r = sitofp i64 %i to double
598 define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 {
599 ; SSE2-LABEL: trunc_signed_v4f32_nsz:
601 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
602 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
605 ; SSE41-LABEL: trunc_signed_v4f32_nsz:
607 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0
610 ; X64_AVX1-LABEL: trunc_signed_v4f32_nsz:
612 ; X64_AVX1-NEXT: vroundps $11, %xmm0, %xmm0
613 ; X64_AVX1-NEXT: retq
615 ; X32_AVX1-LABEL: trunc_signed_v4f32_nsz:
617 ; X32_AVX1-NEXT: vroundps $11, %xmm0, %xmm0
618 ; X32_AVX1-NEXT: retl
619 %i = fptosi <4 x float> %x to <4 x i32>
620 %r = sitofp <4 x i32> %i to <4 x float>
624 define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 {
625 ; SSE2-LABEL: trunc_signed_v2f64_nsz:
627 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
628 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
629 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
630 ; SSE2-NEXT: xorps %xmm0, %xmm0
631 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
632 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm1
633 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
636 ; SSE41-LABEL: trunc_signed_v2f64_nsz:
638 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
641 ; X64_AVX1-LABEL: trunc_signed_v2f64_nsz:
643 ; X64_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
644 ; X64_AVX1-NEXT: retq
646 ; X32_AVX1-LABEL: trunc_signed_v2f64_nsz:
648 ; X32_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
649 ; X32_AVX1-NEXT: retl
650 %i = fptosi <2 x double> %x to <2 x i64>
651 %r = sitofp <2 x i64> %i to <2 x double>
655 define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 {
656 ; SSE2-LABEL: trunc_signed_v4f64_nsz:
658 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
659 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
660 ; SSE2-NEXT: cvttsd2si %xmm1, %rcx
661 ; SSE2-NEXT: cvttsd2si %xmm0, %rdx
662 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
663 ; SSE2-NEXT: cvttsd2si %xmm0, %rsi
664 ; SSE2-NEXT: xorps %xmm0, %xmm0
665 ; SSE2-NEXT: cvtsi2sd %rdx, %xmm0
666 ; SSE2-NEXT: xorps %xmm1, %xmm1
667 ; SSE2-NEXT: cvtsi2sd %rsi, %xmm1
668 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
669 ; SSE2-NEXT: xorps %xmm1, %xmm1
670 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1
671 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm2
672 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
675 ; SSE41-LABEL: trunc_signed_v4f64_nsz:
677 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
678 ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1
681 ; X64_AVX1-LABEL: trunc_signed_v4f64_nsz:
683 ; X64_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
684 ; X64_AVX1-NEXT: retq
686 ; X32_AVX1-LABEL: trunc_signed_v4f64_nsz:
688 ; X32_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
689 ; X32_AVX1-NEXT: retl
690 %i = fptosi <4 x double> %x to <4 x i64>
691 %r = sitofp <4 x i64> %i to <4 x double>
695 ; The FTRUNC ("round**" x86 asm) fold relies on UB in the case of overflow.
696 ; This used to be guarded with an attribute check. That allowed existing
697 ; code to continue working based on its assumptions that float->int
698 ; overflow had saturating behavior.
700 ; Now, we expect a front-end to use IR intrinsics if it wants to avoid this
703 define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #0 {
704 ; SSE-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
706 ; SSE-NEXT: cvttss2si %xmm0, %rax
707 ; SSE-NEXT: xorl %ecx, %ecx
708 ; SSE-NEXT: xorps %xmm1, %xmm1
709 ; SSE-NEXT: ucomiss %xmm1, %xmm0
710 ; SSE-NEXT: cmovael %eax, %ecx
711 ; SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712 ; SSE-NEXT: movl $-1, %eax
713 ; SSE-NEXT: cmovbel %ecx, %eax
714 ; SSE-NEXT: xorps %xmm0, %xmm0
715 ; SSE-NEXT: cvtsi2ss %rax, %xmm0
718 ; X64_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
720 ; X64_AVX1-NEXT: vcvttss2si %xmm0, %rax
721 ; X64_AVX1-NEXT: xorl %ecx, %ecx
722 ; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
723 ; X64_AVX1-NEXT: vucomiss %xmm1, %xmm0
724 ; X64_AVX1-NEXT: cmovael %eax, %ecx
725 ; X64_AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
726 ; X64_AVX1-NEXT: movl $-1, %eax
727 ; X64_AVX1-NEXT: cmovbel %ecx, %eax
728 ; X64_AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
729 ; X64_AVX1-NEXT: retq
731 ; X32_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
733 ; X32_AVX1-NEXT: pushl %eax
734 ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
735 ; X32_AVX1-NEXT: vcvttss2si %xmm0, %eax
736 ; X32_AVX1-NEXT: movl %eax, %ecx
737 ; X32_AVX1-NEXT: sarl $31, %ecx
738 ; X32_AVX1-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
739 ; X32_AVX1-NEXT: vcvttss2si %xmm1, %edx
740 ; X32_AVX1-NEXT: andl %ecx, %edx
741 ; X32_AVX1-NEXT: orl %eax, %edx
742 ; X32_AVX1-NEXT: xorl %eax, %eax
743 ; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
744 ; X32_AVX1-NEXT: vucomiss %xmm1, %xmm0
745 ; X32_AVX1-NEXT: cmovael %edx, %eax
746 ; X32_AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
747 ; X32_AVX1-NEXT: movl $-1, %ecx
748 ; X32_AVX1-NEXT: cmovbel %eax, %ecx
749 ; X32_AVX1-NEXT: vmovd %ecx, %xmm0
750 ; X32_AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
751 ; X32_AVX1-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
752 ; X32_AVX1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
753 ; X32_AVX1-NEXT: vmovss %xmm0, (%esp)
754 ; X32_AVX1-NEXT: flds (%esp)
755 ; X32_AVX1-NEXT: popl %eax
756 ; X32_AVX1-NEXT: retl
757 %i = call i32 @llvm.fptoui.sat.i32.f32(float %x)
758 %r = uitofp i32 %i to float
762 define double @trunc_signed_f64_disable_via_intrinsic(double %x) #0 {
763 ; SSE-LABEL: trunc_signed_f64_disable_via_intrinsic:
765 ; SSE-NEXT: cvttsd2si %xmm0, %rax
766 ; SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
767 ; SSE-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
768 ; SSE-NEXT: cmovbeq %rax, %rcx
769 ; SSE-NEXT: xorl %eax, %eax
770 ; SSE-NEXT: ucomisd %xmm0, %xmm0
771 ; SSE-NEXT: cmovnpq %rcx, %rax
772 ; SSE-NEXT: xorps %xmm0, %xmm0
773 ; SSE-NEXT: cvtsi2sd %rax, %xmm0
776 ; X64_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
778 ; X64_AVX1-NEXT: vcvttsd2si %xmm0, %rax
779 ; X64_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
780 ; X64_AVX1-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
781 ; X64_AVX1-NEXT: cmovbeq %rax, %rcx
782 ; X64_AVX1-NEXT: xorl %eax, %eax
783 ; X64_AVX1-NEXT: vucomisd %xmm0, %xmm0
784 ; X64_AVX1-NEXT: cmovnpq %rcx, %rax
785 ; X64_AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
786 ; X64_AVX1-NEXT: retq
788 ; X32_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
790 ; X32_AVX1-NEXT: pushl %ebp
791 ; X32_AVX1-NEXT: movl %esp, %ebp
792 ; X32_AVX1-NEXT: pushl %esi
793 ; X32_AVX1-NEXT: andl $-8, %esp
794 ; X32_AVX1-NEXT: subl $32, %esp
795 ; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
796 ; X32_AVX1-NEXT: vmovsd %xmm0, (%esp)
797 ; X32_AVX1-NEXT: fldl (%esp)
798 ; X32_AVX1-NEXT: fisttpll (%esp)
799 ; X32_AVX1-NEXT: xorl %eax, %eax
800 ; X32_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
801 ; X32_AVX1-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
802 ; X32_AVX1-NEXT: movl $0, %edx
803 ; X32_AVX1-NEXT: jb .LBB19_2
804 ; X32_AVX1-NEXT: # %bb.1:
805 ; X32_AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
806 ; X32_AVX1-NEXT: movl (%esp), %edx
807 ; X32_AVX1-NEXT: .LBB19_2:
808 ; X32_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
809 ; X32_AVX1-NEXT: movl $-1, %esi
810 ; X32_AVX1-NEXT: cmovbel %edx, %esi
811 ; X32_AVX1-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
812 ; X32_AVX1-NEXT: cmovbel %ecx, %edx
813 ; X32_AVX1-NEXT: vucomisd %xmm0, %xmm0
814 ; X32_AVX1-NEXT: cmovpl %eax, %edx
815 ; X32_AVX1-NEXT: cmovpl %eax, %esi
816 ; X32_AVX1-NEXT: vmovd %esi, %xmm0
817 ; X32_AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
818 ; X32_AVX1-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
819 ; X32_AVX1-NEXT: fildll {{[0-9]+}}(%esp)
820 ; X32_AVX1-NEXT: fstpl {{[0-9]+}}(%esp)
821 ; X32_AVX1-NEXT: fldl {{[0-9]+}}(%esp)
822 ; X32_AVX1-NEXT: leal -4(%ebp), %esp
823 ; X32_AVX1-NEXT: popl %esi
824 ; X32_AVX1-NEXT: popl %ebp
825 ; X32_AVX1-NEXT: retl
826 %i = call i64 @llvm.fptosi.sat.i64.f64(double %x)
827 %r = sitofp i64 %i to double
831 attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }