1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
3 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
4 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
5 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
6 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX1-i32
7 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32
8 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32
9 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX1-i64
10 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64
11 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64
13 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
14 ; X86-SSE2-LABEL: lrint_v1f32:
16 ; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
19 ; X86-AVX-LABEL: lrint_v1f32:
21 ; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
24 ; X64-AVX-i32-LABEL: lrint_v1f32:
25 ; X64-AVX-i32: # %bb.0:
26 ; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax
27 ; X64-AVX-i32-NEXT: retq
29 ; X64-AVX-i64-LABEL: lrint_v1f32:
30 ; X64-AVX-i64: # %bb.0:
31 ; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax
32 ; X64-AVX-i64-NEXT: retq
33 %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
36 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
38 define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
39 ; X86-SSE2-LABEL: lrint_v2f32:
41 ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
44 ; X86-AVX-LABEL: lrint_v2f32:
46 ; X86-AVX-NEXT: vcvtps2dq %xmm0, %xmm0
49 ; X64-AVX-i32-LABEL: lrint_v2f32:
50 ; X64-AVX-i32: # %bb.0:
51 ; X64-AVX-i32-NEXT: vcvtps2dq %xmm0, %xmm0
52 ; X64-AVX-i32-NEXT: retq
54 ; X64-AVX1-i64-LABEL: lrint_v2f32:
55 ; X64-AVX1-i64: # %bb.0:
56 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
57 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
58 ; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
59 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
60 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
61 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
62 ; X64-AVX1-i64-NEXT: retq
64 ; AVX512-i64-LABEL: lrint_v2f32:
65 ; AVX512-i64: # %bb.0:
66 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
67 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
68 ; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
69 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
70 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
71 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
72 ; AVX512-i64-NEXT: retq
74 ; AVX512DQ-i64-LABEL: lrint_v2f32:
75 ; AVX512DQ-i64: # %bb.0:
76 ; AVX512DQ-i64-NEXT: vcvtps2qq %xmm0, %xmm0
77 ; AVX512DQ-i64-NEXT: retq
78 %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
81 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
83 define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
84 ; X86-SSE2-LABEL: lrint_v4f32:
86 ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
89 ; X86-AVX-LABEL: lrint_v4f32:
91 ; X86-AVX-NEXT: vcvtps2dq %xmm0, %xmm0
94 ; X64-AVX-i32-LABEL: lrint_v4f32:
95 ; X64-AVX-i32: # %bb.0:
96 ; X64-AVX-i32-NEXT: vcvtps2dq %xmm0, %xmm0
97 ; X64-AVX-i32-NEXT: retq
99 ; X64-AVX1-i64-LABEL: lrint_v4f32:
100 ; X64-AVX1-i64: # %bb.0:
101 ; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
102 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax
103 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
104 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
105 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax
106 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
107 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
108 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
109 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
110 ; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
111 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
112 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
113 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
114 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
115 ; X64-AVX1-i64-NEXT: retq
117 ; AVX512-i64-LABEL: lrint_v4f32:
118 ; AVX512-i64: # %bb.0:
119 ; AVX512-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
120 ; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax
121 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
122 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
123 ; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax
124 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
125 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
126 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
127 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
128 ; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
129 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
130 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
131 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
132 ; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
133 ; AVX512-i64-NEXT: retq
135 ; AVX512DQ-i64-LABEL: lrint_v4f32:
136 ; AVX512DQ-i64: # %bb.0:
137 ; AVX512DQ-i64-NEXT: vcvtps2qq %xmm0, %ymm0
138 ; AVX512DQ-i64-NEXT: retq
139 %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
142 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
144 define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
145 ; X86-SSE2-LABEL: lrint_v8f32:
147 ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0
148 ; X86-SSE2-NEXT: cvtps2dq %xmm1, %xmm1
149 ; X86-SSE2-NEXT: retl
151 ; X86-AVX-LABEL: lrint_v8f32:
153 ; X86-AVX-NEXT: vcvtps2dq %ymm0, %ymm0
156 ; X64-AVX-i32-LABEL: lrint_v8f32:
157 ; X64-AVX-i32: # %bb.0:
158 ; X64-AVX-i32-NEXT: vcvtps2dq %ymm0, %ymm0
159 ; X64-AVX-i32-NEXT: retq
161 ; X64-AVX1-i64-LABEL: lrint_v8f32:
162 ; X64-AVX1-i64: # %bb.0:
163 ; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
164 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax
165 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
166 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
167 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax
168 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
169 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
170 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
171 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
172 ; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
173 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax
174 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
175 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
176 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
177 ; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm0
178 ; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
179 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax
180 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
181 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
182 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax
183 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
184 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
185 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
186 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
187 ; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
188 ; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax
189 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
190 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
191 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
192 ; X64-AVX1-i64-NEXT: vmovaps %ymm2, %ymm0
193 ; X64-AVX1-i64-NEXT: retq
195 ; AVX512-i64-LABEL: lrint_v8f32:
196 ; AVX512-i64: # %bb.0:
197 ; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1
198 ; AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
199 ; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax
200 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
201 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
202 ; AVX512-i64-NEXT: vcvtss2si %xmm3, %rax
203 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
204 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
205 ; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax
206 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
207 ; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
208 ; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax
209 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
210 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
211 ; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
212 ; AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
213 ; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax
214 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
215 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
216 ; AVX512-i64-NEXT: vcvtss2si %xmm3, %rax
217 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
218 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
219 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
220 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
221 ; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
222 ; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax
223 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
224 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
225 ; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
226 ; AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
227 ; AVX512-i64-NEXT: retq
229 ; AVX512DQ-i64-LABEL: lrint_v8f32:
230 ; AVX512DQ-i64: # %bb.0:
231 ; AVX512DQ-i64-NEXT: vcvtps2qq %ymm0, %zmm0
232 ; AVX512DQ-i64-NEXT: retq
233 %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
236 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
238 define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
239 %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
242 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
244 define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
245 ; X86-SSE2-LABEL: lrint_v1f64:
247 ; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
248 ; X86-SSE2-NEXT: retl
250 ; X86-AVX-LABEL: lrint_v1f64:
252 ; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
255 ; X64-AVX-i32-LABEL: lrint_v1f64:
256 ; X64-AVX-i32: # %bb.0:
257 ; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax
258 ; X64-AVX-i32-NEXT: retq
260 ; X64-AVX-i64-LABEL: lrint_v1f64:
261 ; X64-AVX-i64: # %bb.0:
262 ; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax
263 ; X64-AVX-i64-NEXT: retq
264 %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
267 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
269 define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
270 ; X86-SSE2-LABEL: lrint_v2f64:
272 ; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
273 ; X86-SSE2-NEXT: movd %eax, %xmm1
274 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
275 ; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
276 ; X86-SSE2-NEXT: movd %eax, %xmm0
277 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
278 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
279 ; X86-SSE2-NEXT: retl
281 ; X86-AVX-LABEL: lrint_v2f64:
283 ; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
284 ; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
285 ; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
286 ; X86-AVX-NEXT: vmovd %ecx, %xmm0
287 ; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
290 ; X64-AVX-i32-LABEL: lrint_v2f64:
291 ; X64-AVX-i32: # %bb.0:
292 ; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
293 ; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
294 ; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
295 ; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
296 ; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
297 ; X64-AVX-i32-NEXT: retq
299 ; X64-AVX1-i64-LABEL: lrint_v2f64:
300 ; X64-AVX1-i64: # %bb.0:
301 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
302 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
303 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
304 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
305 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
306 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
307 ; X64-AVX1-i64-NEXT: retq
309 ; AVX512-i64-LABEL: lrint_v2f64:
310 ; AVX512-i64: # %bb.0:
311 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
312 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
313 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
314 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
315 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
316 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317 ; AVX512-i64-NEXT: retq
319 ; AVX512DQ-i64-LABEL: lrint_v2f64:
320 ; AVX512DQ-i64: # %bb.0:
321 ; AVX512DQ-i64-NEXT: vcvtpd2qq %xmm0, %xmm0
322 ; AVX512DQ-i64-NEXT: retq
323 %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
326 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
328 define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
329 ; X86-SSE2-LABEL: lrint_v4f64:
331 ; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
332 ; X86-SSE2-NEXT: movd %eax, %xmm2
333 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
334 ; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
335 ; X86-SSE2-NEXT: movd %eax, %xmm1
336 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
337 ; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
338 ; X86-SSE2-NEXT: movd %eax, %xmm1
339 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
340 ; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
341 ; X86-SSE2-NEXT: movd %eax, %xmm0
342 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
343 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
344 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
345 ; X86-SSE2-NEXT: retl
347 ; X86-AVX-LABEL: lrint_v4f64:
349 ; X86-AVX-NEXT: vcvtpd2dq %ymm0, %xmm0
350 ; X86-AVX-NEXT: vzeroupper
353 ; X64-AVX-i32-LABEL: lrint_v4f64:
354 ; X64-AVX-i32: # %bb.0:
355 ; X64-AVX-i32-NEXT: vcvtpd2dq %ymm0, %xmm0
356 ; X64-AVX-i32-NEXT: vzeroupper
357 ; X64-AVX-i32-NEXT: retq
359 ; X64-AVX1-i64-LABEL: lrint_v4f64:
360 ; X64-AVX1-i64: # %bb.0:
361 ; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm1
362 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax
363 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
364 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
365 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax
366 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
367 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
368 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
369 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
370 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
371 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
372 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
373 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
374 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
375 ; X64-AVX1-i64-NEXT: retq
377 ; AVX512-i64-LABEL: lrint_v4f64:
378 ; AVX512-i64: # %bb.0:
379 ; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1
380 ; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax
381 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
382 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
383 ; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax
384 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
385 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
386 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
387 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
388 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
389 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
390 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
391 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
392 ; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
393 ; AVX512-i64-NEXT: retq
395 ; AVX512DQ-i64-LABEL: lrint_v4f64:
396 ; AVX512DQ-i64: # %bb.0:
397 ; AVX512DQ-i64-NEXT: vcvtpd2qq %ymm0, %ymm0
398 ; AVX512DQ-i64-NEXT: retq
399 %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
402 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
404 define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
405 ; X86-SSE2-LABEL: lrint_v8f64:
407 ; X86-SSE2-NEXT: pushl %ebp
408 ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
409 ; X86-SSE2-NEXT: .cfi_offset %ebp, -8
410 ; X86-SSE2-NEXT: movl %esp, %ebp
411 ; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
412 ; X86-SSE2-NEXT: andl $-16, %esp
413 ; X86-SSE2-NEXT: subl $16, %esp
414 ; X86-SSE2-NEXT: movapd %xmm0, %xmm3
415 ; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4
416 ; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
417 ; X86-SSE2-NEXT: movd %eax, %xmm5
418 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
419 ; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
420 ; X86-SSE2-NEXT: movd %eax, %xmm0
421 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
422 ; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
423 ; X86-SSE2-NEXT: movd %eax, %xmm0
424 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
425 ; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
426 ; X86-SSE2-NEXT: movd %eax, %xmm1
427 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
428 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
429 ; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
430 ; X86-SSE2-NEXT: movd %eax, %xmm3
431 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
432 ; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
433 ; X86-SSE2-NEXT: movd %eax, %xmm1
434 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
435 ; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
436 ; X86-SSE2-NEXT: movd %eax, %xmm1
437 ; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
438 ; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
439 ; X86-SSE2-NEXT: movd %eax, %xmm2
440 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
441 ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
442 ; X86-SSE2-NEXT: movl %ebp, %esp
443 ; X86-SSE2-NEXT: popl %ebp
444 ; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
445 ; X86-SSE2-NEXT: retl
447 ; X86-AVX1-LABEL: lrint_v8f64:
449 ; X86-AVX1-NEXT: vcvtpd2dq %ymm0, %xmm0
450 ; X86-AVX1-NEXT: vcvtpd2dq %ymm1, %xmm1
451 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
452 ; X86-AVX1-NEXT: retl
454 ; AVX512-i32-LABEL: lrint_v8f64:
455 ; AVX512-i32: # %bb.0:
456 ; AVX512-i32-NEXT: vcvtpd2dq %zmm0, %ymm0
457 ; AVX512-i32-NEXT: ret{{[l|q]}}
459 ; X64-AVX1-i32-LABEL: lrint_v8f64:
460 ; X64-AVX1-i32: # %bb.0:
461 ; X64-AVX1-i32-NEXT: vcvtpd2dq %ymm0, %xmm0
462 ; X64-AVX1-i32-NEXT: vcvtpd2dq %ymm1, %xmm1
463 ; X64-AVX1-i32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
464 ; X64-AVX1-i32-NEXT: retq
466 ; X64-AVX1-i64-LABEL: lrint_v8f64:
467 ; X64-AVX1-i64: # %bb.0:
468 ; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm2
469 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax
470 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
471 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
472 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax
473 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
474 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
475 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
476 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
477 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
478 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax
479 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0
480 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
481 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
482 ; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm1, %xmm2
483 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax
484 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
485 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
486 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax
487 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2
488 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
489 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax
490 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3
491 ; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
492 ; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax
493 ; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1
494 ; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
495 ; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
496 ; X64-AVX1-i64-NEXT: retq
498 ; AVX512-i64-LABEL: lrint_v8f64:
499 ; AVX512-i64: # %bb.0:
500 ; AVX512-i64-NEXT: vextractf32x4 $3, %zmm0, %xmm1
501 ; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax
502 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
503 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
504 ; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax
505 ; AVX512-i64-NEXT: vmovq %rax, %xmm1
506 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
507 ; AVX512-i64-NEXT: vextractf32x4 $2, %zmm0, %xmm2
508 ; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax
509 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
510 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
511 ; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax
512 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
513 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
514 ; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
515 ; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm2
516 ; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax
517 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
518 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
519 ; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax
520 ; AVX512-i64-NEXT: vmovq %rax, %xmm2
521 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
522 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
523 ; AVX512-i64-NEXT: vmovq %rax, %xmm3
524 ; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
525 ; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax
526 ; AVX512-i64-NEXT: vmovq %rax, %xmm0
527 ; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
528 ; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
529 ; AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
530 ; AVX512-i64-NEXT: retq
532 ; AVX512DQ-i64-LABEL: lrint_v8f64:
533 ; AVX512DQ-i64: # %bb.0:
534 ; AVX512DQ-i64-NEXT: vcvtpd2qq %zmm0, %zmm0
535 ; AVX512DQ-i64-NEXT: retq
536 %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
539 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)