1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1 | FileCheck %s --check-prefixes=SSE
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
6 ; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751
7 ; We can't combine into 'round' instructions because the behavior is different for out-of-range values.
9 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
10 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
11 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
12 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
13 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
14 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
16 define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) #0 {
17 ; SSE-LABEL: float_to_int_to_float_mem_f32_i32:
19 ; SSE-NEXT: cvttss2si (%rdi), %eax
20 ; SSE-NEXT: cvtsi2ssl %eax, %xmm0
23 ; AVX-LABEL: float_to_int_to_float_mem_f32_i32:
25 ; AVX-NEXT: vcvttss2si (%rdi), %eax
26 ; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
28 %x = load <4 x float>, <4 x float>* %p, align 16
29 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
30 %sitofp = sitofp i32 %fptosi to float
34 define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 {
35 ; SSE-LABEL: float_to_int_to_float_reg_f32_i32:
37 ; SSE-NEXT: cvttss2si %xmm0, %eax
38 ; SSE-NEXT: xorps %xmm0, %xmm0
39 ; SSE-NEXT: cvtsi2ssl %eax, %xmm0
42 ; AVX-LABEL: float_to_int_to_float_reg_f32_i32:
44 ; AVX-NEXT: vcvttss2si %xmm0, %eax
45 ; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
47 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
48 %sitofp = sitofp i32 %fptosi to float
52 define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) #0 {
53 ; SSE-LABEL: float_to_int_to_float_mem_f32_i64:
55 ; SSE-NEXT: cvttss2si (%rdi), %rax
56 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
59 ; AVX-LABEL: float_to_int_to_float_mem_f32_i64:
61 ; AVX-NEXT: vcvttss2si (%rdi), %rax
62 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
64 %x = load <4 x float>, <4 x float>* %p, align 16
65 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
66 %sitofp = sitofp i64 %fptosi to float
70 define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 {
71 ; SSE-LABEL: float_to_int_to_float_reg_f32_i64:
73 ; SSE-NEXT: cvttss2si %xmm0, %rax
74 ; SSE-NEXT: xorps %xmm0, %xmm0
75 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
78 ; AVX-LABEL: float_to_int_to_float_reg_f32_i64:
80 ; AVX-NEXT: vcvttss2si %xmm0, %rax
81 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm0
83 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
84 %sitofp = sitofp i64 %fptosi to float
88 define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) #0 {
89 ; SSE-LABEL: float_to_int_to_float_mem_f64_i32:
91 ; SSE-NEXT: cvttsd2si (%rdi), %eax
92 ; SSE-NEXT: cvtsi2sdl %eax, %xmm0
95 ; AVX-LABEL: float_to_int_to_float_mem_f64_i32:
97 ; AVX-NEXT: vcvttsd2si (%rdi), %eax
98 ; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
100 %x = load <2 x double>, <2 x double>* %p, align 16
101 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
102 %sitofp = sitofp i32 %fptosi to double
106 define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 {
107 ; SSE-LABEL: float_to_int_to_float_reg_f64_i32:
109 ; SSE-NEXT: cvttsd2si %xmm0, %eax
110 ; SSE-NEXT: xorps %xmm0, %xmm0
111 ; SSE-NEXT: cvtsi2sdl %eax, %xmm0
114 ; AVX-LABEL: float_to_int_to_float_reg_f64_i32:
116 ; AVX-NEXT: vcvttsd2si %xmm0, %eax
117 ; AVX-NEXT: vcvtsi2sdl %eax, %xmm1, %xmm0
119 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
120 %sitofp = sitofp i32 %fptosi to double
124 define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) #0 {
125 ; SSE-LABEL: float_to_int_to_float_mem_f64_i64:
127 ; SSE-NEXT: cvttsd2si (%rdi), %rax
128 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
131 ; AVX-LABEL: float_to_int_to_float_mem_f64_i64:
133 ; AVX-NEXT: vcvttsd2si (%rdi), %rax
134 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
136 %x = load <2 x double>, <2 x double>* %p, align 16
137 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
138 %sitofp = sitofp i64 %fptosi to double
142 define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 {
143 ; SSE-LABEL: float_to_int_to_float_reg_f64_i64:
145 ; SSE-NEXT: cvttsd2si %xmm0, %rax
146 ; SSE-NEXT: xorps %xmm0, %xmm0
147 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
150 ; AVX-LABEL: float_to_int_to_float_reg_f64_i64:
152 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
153 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm0
155 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
156 %sitofp = sitofp i64 %fptosi to double
160 define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) #0 {
161 ; SSE-LABEL: float_to_int_to_float_mem_v4f32:
163 ; SSE-NEXT: cvttps2dq (%rdi), %xmm0
164 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
167 ; AVX-LABEL: float_to_int_to_float_mem_v4f32:
169 ; AVX-NEXT: vcvttps2dq (%rdi), %xmm0
170 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
172 %x = load <4 x float>, <4 x float>* %p, align 16
173 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
174 %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
175 ret <4 x float> %sitofp
178 define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) #0 {
179 ; SSE-LABEL: float_to_int_to_float_reg_v4f32:
181 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
182 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
185 ; AVX-LABEL: float_to_int_to_float_reg_v4f32:
187 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
188 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
190 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
191 %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
192 ret <4 x float> %sitofp
195 define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) #0 {
196 ; SSE-LABEL: float_to_int_to_float_mem_v2f64:
198 ; SSE-NEXT: cvttpd2dq (%rdi), %xmm0
199 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
202 ; AVX-LABEL: float_to_int_to_float_mem_v2f64:
204 ; AVX-NEXT: vcvttpd2dqx (%rdi), %xmm0
205 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
207 %x = load <2 x double>, <2 x double>* %p, align 16
208 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
209 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
210 %sitofp = sitofp <2 x i32> %concat to <2 x double>
211 ret <2 x double> %sitofp
214 define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) #0 {
215 ; SSE-LABEL: float_to_int_to_float_reg_v2f64:
217 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
218 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
221 ; AVX-LABEL: float_to_int_to_float_reg_v2f64:
223 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
224 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
226 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
227 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
228 %sitofp = sitofp <2 x i32> %concat to <2 x double>
229 ret <2 x double> %sitofp
232 attributes #0 = { "no-signed-zeros-fp-math"="true" }