1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ
7 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
8 ; SSE-LABEL: llrint_v1i64_v1f32:
10 ; SSE-NEXT: cvtss2si %xmm0, %rax
13 ; AVX-LABEL: llrint_v1i64_v1f32:
15 ; AVX-NEXT: vcvtss2si %xmm0, %rax
18 ; AVX512DQ-LABEL: llrint_v1i64_v1f32:
20 ; AVX512DQ-NEXT: vcvtss2si %xmm0, %rax
22 %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
25 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
27 define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
28 ; SSE-LABEL: llrint_v2i64_v2f32:
30 ; SSE-NEXT: cvtss2si %xmm0, %rax
31 ; SSE-NEXT: movq %rax, %xmm1
32 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
33 ; SSE-NEXT: cvtss2si %xmm0, %rax
34 ; SSE-NEXT: movq %rax, %xmm0
35 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
36 ; SSE-NEXT: movdqa %xmm1, %xmm0
39 ; AVX-LABEL: llrint_v2i64_v2f32:
41 ; AVX-NEXT: vcvtss2si %xmm0, %rax
42 ; AVX-NEXT: vmovq %rax, %xmm1
43 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
44 ; AVX-NEXT: vcvtss2si %xmm0, %rax
45 ; AVX-NEXT: vmovq %rax, %xmm0
46 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
49 ; AVX512DQ-LABEL: llrint_v2i64_v2f32:
51 ; AVX512DQ-NEXT: vcvtps2qq %xmm0, %xmm0
53 %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
56 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
58 define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
59 ; SSE-LABEL: llrint_v4i64_v4f32:
61 ; SSE-NEXT: cvtss2si %xmm0, %rax
62 ; SSE-NEXT: movq %rax, %xmm2
63 ; SSE-NEXT: movaps %xmm0, %xmm1
64 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
65 ; SSE-NEXT: cvtss2si %xmm1, %rax
66 ; SSE-NEXT: movq %rax, %xmm1
67 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
68 ; SSE-NEXT: movaps %xmm0, %xmm1
69 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
70 ; SSE-NEXT: cvtss2si %xmm1, %rax
71 ; SSE-NEXT: movq %rax, %xmm3
72 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
73 ; SSE-NEXT: cvtss2si %xmm0, %rax
74 ; SSE-NEXT: movq %rax, %xmm1
75 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
76 ; SSE-NEXT: movdqa %xmm2, %xmm0
79 ; AVX1-LABEL: llrint_v4i64_v4f32:
81 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
82 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
83 ; AVX1-NEXT: vmovq %rax, %xmm1
84 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
85 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
86 ; AVX1-NEXT: vmovq %rax, %xmm2
87 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
88 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
89 ; AVX1-NEXT: vmovq %rax, %xmm2
90 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
91 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
92 ; AVX1-NEXT: vmovq %rax, %xmm0
93 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
94 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
97 ; AVX512-LABEL: llrint_v4i64_v4f32:
99 ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
100 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
101 ; AVX512-NEXT: vmovq %rax, %xmm1
102 ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
103 ; AVX512-NEXT: vcvtss2si %xmm2, %rax
104 ; AVX512-NEXT: vmovq %rax, %xmm2
105 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
106 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
107 ; AVX512-NEXT: vmovq %rax, %xmm2
108 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
109 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
110 ; AVX512-NEXT: vmovq %rax, %xmm0
111 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
112 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
115 ; AVX512DQ-LABEL: llrint_v4i64_v4f32:
117 ; AVX512DQ-NEXT: vcvtps2qq %xmm0, %ymm0
118 ; AVX512DQ-NEXT: retq
119 %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
122 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
124 define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
125 ; SSE-LABEL: llrint_v8i64_v8f32:
127 ; SSE-NEXT: movaps %xmm0, %xmm2
128 ; SSE-NEXT: cvtss2si %xmm0, %rax
129 ; SSE-NEXT: movq %rax, %xmm0
130 ; SSE-NEXT: movaps %xmm2, %xmm3
131 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[1,1]
132 ; SSE-NEXT: cvtss2si %xmm3, %rax
133 ; SSE-NEXT: movq %rax, %xmm3
134 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
135 ; SSE-NEXT: movaps %xmm2, %xmm3
136 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3]
137 ; SSE-NEXT: cvtss2si %xmm3, %rax
138 ; SSE-NEXT: movq %rax, %xmm3
139 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
140 ; SSE-NEXT: cvtss2si %xmm2, %rax
141 ; SSE-NEXT: movq %rax, %xmm4
142 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
143 ; SSE-NEXT: cvtss2si %xmm1, %rax
144 ; SSE-NEXT: movq %rax, %xmm2
145 ; SSE-NEXT: movaps %xmm1, %xmm3
146 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[1,1]
147 ; SSE-NEXT: cvtss2si %xmm3, %rax
148 ; SSE-NEXT: movq %rax, %xmm3
149 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
150 ; SSE-NEXT: movaps %xmm1, %xmm3
151 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm1[3,3]
152 ; SSE-NEXT: cvtss2si %xmm3, %rax
153 ; SSE-NEXT: movq %rax, %xmm5
154 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
155 ; SSE-NEXT: cvtss2si %xmm1, %rax
156 ; SSE-NEXT: movq %rax, %xmm3
157 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
158 ; SSE-NEXT: movdqa %xmm4, %xmm1
161 ; AVX1-LABEL: llrint_v8i64_v8f32:
163 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
164 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
165 ; AVX1-NEXT: vmovq %rax, %xmm1
166 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
167 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
168 ; AVX1-NEXT: vmovq %rax, %xmm2
169 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
170 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
171 ; AVX1-NEXT: vmovq %rax, %xmm2
172 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
173 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
174 ; AVX1-NEXT: vmovq %rax, %xmm3
175 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
176 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
177 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
178 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
179 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
180 ; AVX1-NEXT: vmovq %rax, %xmm1
181 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
182 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
183 ; AVX1-NEXT: vmovq %rax, %xmm3
184 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
185 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
186 ; AVX1-NEXT: vmovq %rax, %xmm3
187 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
188 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
189 ; AVX1-NEXT: vmovq %rax, %xmm0
190 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
191 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
192 ; AVX1-NEXT: vmovaps %ymm2, %ymm0
195 ; AVX512-LABEL: llrint_v8i64_v8f32:
197 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
198 ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
199 ; AVX512-NEXT: vcvtss2si %xmm2, %rax
200 ; AVX512-NEXT: vmovq %rax, %xmm2
201 ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
202 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
203 ; AVX512-NEXT: vmovq %rax, %xmm3
204 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
205 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
206 ; AVX512-NEXT: vmovq %rax, %xmm3
207 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
208 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
209 ; AVX512-NEXT: vmovq %rax, %xmm1
210 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
211 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
212 ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
213 ; AVX512-NEXT: vcvtss2si %xmm2, %rax
214 ; AVX512-NEXT: vmovq %rax, %xmm2
215 ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
216 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
217 ; AVX512-NEXT: vmovq %rax, %xmm3
218 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
219 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
220 ; AVX512-NEXT: vmovq %rax, %xmm3
221 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
222 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
223 ; AVX512-NEXT: vmovq %rax, %xmm0
224 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
225 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
226 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
229 ; AVX512DQ-LABEL: llrint_v8i64_v8f32:
231 ; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm0
232 ; AVX512DQ-NEXT: retq
233 %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
236 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
238 define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
239 ; SSE-LABEL: llrint_v16i64_v16f32:
241 ; SSE-NEXT: movq %rdi, %rax
242 ; SSE-NEXT: cvtss2si %xmm0, %rcx
243 ; SSE-NEXT: movq %rcx, %xmm4
244 ; SSE-NEXT: movaps %xmm0, %xmm5
245 ; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[1,1]
246 ; SSE-NEXT: cvtss2si %xmm5, %rcx
247 ; SSE-NEXT: movq %rcx, %xmm5
248 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
249 ; SSE-NEXT: movaps %xmm0, %xmm5
250 ; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,3],xmm0[3,3]
251 ; SSE-NEXT: cvtss2si %xmm5, %rcx
252 ; SSE-NEXT: movq %rcx, %xmm5
253 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
254 ; SSE-NEXT: cvtss2si %xmm0, %rcx
255 ; SSE-NEXT: movq %rcx, %xmm0
256 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
257 ; SSE-NEXT: cvtss2si %xmm1, %rcx
258 ; SSE-NEXT: movq %rcx, %xmm5
259 ; SSE-NEXT: movaps %xmm1, %xmm6
260 ; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,1],xmm1[1,1]
261 ; SSE-NEXT: cvtss2si %xmm6, %rcx
262 ; SSE-NEXT: movq %rcx, %xmm6
263 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
264 ; SSE-NEXT: movaps %xmm1, %xmm6
265 ; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[3,3],xmm1[3,3]
266 ; SSE-NEXT: cvtss2si %xmm6, %rcx
267 ; SSE-NEXT: movq %rcx, %xmm6
268 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
269 ; SSE-NEXT: cvtss2si %xmm1, %rcx
270 ; SSE-NEXT: movq %rcx, %xmm1
271 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0]
272 ; SSE-NEXT: cvtss2si %xmm2, %rcx
273 ; SSE-NEXT: movq %rcx, %xmm6
274 ; SSE-NEXT: movaps %xmm2, %xmm7
275 ; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1],xmm2[1,1]
276 ; SSE-NEXT: cvtss2si %xmm7, %rcx
277 ; SSE-NEXT: movq %rcx, %xmm7
278 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
279 ; SSE-NEXT: movaps %xmm2, %xmm7
280 ; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[3,3],xmm2[3,3]
281 ; SSE-NEXT: cvtss2si %xmm7, %rcx
282 ; SSE-NEXT: movq %rcx, %xmm7
283 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
284 ; SSE-NEXT: cvtss2si %xmm2, %rcx
285 ; SSE-NEXT: movq %rcx, %xmm2
286 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
287 ; SSE-NEXT: cvtss2si %xmm3, %rcx
288 ; SSE-NEXT: movq %rcx, %xmm7
289 ; SSE-NEXT: movaps %xmm3, %xmm8
290 ; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,1],xmm3[1,1]
291 ; SSE-NEXT: cvtss2si %xmm8, %rcx
292 ; SSE-NEXT: movq %rcx, %xmm8
293 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm8[0]
294 ; SSE-NEXT: movaps %xmm3, %xmm8
295 ; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[3,3],xmm3[3,3]
296 ; SSE-NEXT: cvtss2si %xmm8, %rcx
297 ; SSE-NEXT: movq %rcx, %xmm8
298 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
299 ; SSE-NEXT: cvtss2si %xmm3, %rcx
300 ; SSE-NEXT: movq %rcx, %xmm3
301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm8[0]
302 ; SSE-NEXT: movdqa %xmm3, 112(%rdi)
303 ; SSE-NEXT: movdqa %xmm7, 96(%rdi)
304 ; SSE-NEXT: movdqa %xmm2, 80(%rdi)
305 ; SSE-NEXT: movdqa %xmm6, 64(%rdi)
306 ; SSE-NEXT: movdqa %xmm1, 48(%rdi)
307 ; SSE-NEXT: movdqa %xmm5, 32(%rdi)
308 ; SSE-NEXT: movdqa %xmm0, 16(%rdi)
309 ; SSE-NEXT: movdqa %xmm4, (%rdi)
312 ; AVX1-LABEL: llrint_v16i64_v16f32:
314 ; AVX1-NEXT: vmovaps %ymm0, %ymm2
315 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm2[3,3,3,3]
316 ; AVX1-NEXT: vcvtss2si %xmm0, %rax
317 ; AVX1-NEXT: vmovq %rax, %xmm0
318 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
319 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
320 ; AVX1-NEXT: vmovq %rax, %xmm3
321 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
322 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
323 ; AVX1-NEXT: vmovq %rax, %xmm3
324 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3]
325 ; AVX1-NEXT: vcvtss2si %xmm4, %rax
326 ; AVX1-NEXT: vmovq %rax, %xmm4
327 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
328 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
329 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
330 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[3,3,3,3]
331 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
332 ; AVX1-NEXT: vmovq %rax, %xmm3
333 ; AVX1-NEXT: vshufpd {{.*#+}} xmm4 = xmm2[1,0]
334 ; AVX1-NEXT: vcvtss2si %xmm4, %rax
335 ; AVX1-NEXT: vmovq %rax, %xmm4
336 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
337 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
338 ; AVX1-NEXT: vmovq %rax, %xmm4
339 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
340 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
341 ; AVX1-NEXT: vmovq %rax, %xmm2
342 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
343 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4
344 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
345 ; AVX1-NEXT: vcvtss2si %xmm2, %rax
346 ; AVX1-NEXT: vmovq %rax, %xmm2
347 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
348 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
349 ; AVX1-NEXT: vmovq %rax, %xmm3
350 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
351 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
352 ; AVX1-NEXT: vmovq %rax, %xmm3
353 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
354 ; AVX1-NEXT: vcvtss2si %xmm5, %rax
355 ; AVX1-NEXT: vmovq %rax, %xmm5
356 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
357 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
358 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
359 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
360 ; AVX1-NEXT: vcvtss2si %xmm3, %rax
361 ; AVX1-NEXT: vmovq %rax, %xmm3
362 ; AVX1-NEXT: vshufpd {{.*#+}} xmm5 = xmm1[1,0]
363 ; AVX1-NEXT: vcvtss2si %xmm5, %rax
364 ; AVX1-NEXT: vmovq %rax, %xmm5
365 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm5[0],xmm3[0]
366 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
367 ; AVX1-NEXT: vmovq %rax, %xmm5
368 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
369 ; AVX1-NEXT: vcvtss2si %xmm1, %rax
370 ; AVX1-NEXT: vmovq %rax, %xmm1
371 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
372 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm3
373 ; AVX1-NEXT: vmovaps %ymm4, %ymm1
376 ; AVX512-LABEL: llrint_v16i64_v16f32:
378 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
379 ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
380 ; AVX512-NEXT: vcvtss2si %xmm2, %rax
381 ; AVX512-NEXT: vmovq %rax, %xmm2
382 ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
383 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
384 ; AVX512-NEXT: vmovq %rax, %xmm3
385 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
386 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
387 ; AVX512-NEXT: vmovq %rax, %xmm3
388 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
389 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
390 ; AVX512-NEXT: vmovq %rax, %xmm1
391 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
392 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
393 ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
394 ; AVX512-NEXT: vcvtss2si %xmm2, %rax
395 ; AVX512-NEXT: vmovq %rax, %xmm2
396 ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
397 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
398 ; AVX512-NEXT: vmovq %rax, %xmm3
399 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
400 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
401 ; AVX512-NEXT: vmovq %rax, %xmm3
402 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
403 ; AVX512-NEXT: vcvtss2si %xmm4, %rax
404 ; AVX512-NEXT: vmovq %rax, %xmm4
405 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
406 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
407 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2
408 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
409 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
410 ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
411 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
412 ; AVX512-NEXT: vmovq %rax, %xmm3
413 ; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
414 ; AVX512-NEXT: vcvtss2si %xmm4, %rax
415 ; AVX512-NEXT: vmovq %rax, %xmm4
416 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
417 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
418 ; AVX512-NEXT: vmovq %rax, %xmm4
419 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
420 ; AVX512-NEXT: vcvtss2si %xmm1, %rax
421 ; AVX512-NEXT: vmovq %rax, %xmm1
422 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0]
423 ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
424 ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
425 ; AVX512-NEXT: vcvtss2si %xmm3, %rax
426 ; AVX512-NEXT: vmovq %rax, %xmm3
427 ; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
428 ; AVX512-NEXT: vcvtss2si %xmm4, %rax
429 ; AVX512-NEXT: vmovq %rax, %xmm4
430 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
431 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
432 ; AVX512-NEXT: vmovq %rax, %xmm4
433 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
434 ; AVX512-NEXT: vcvtss2si %xmm0, %rax
435 ; AVX512-NEXT: vmovq %rax, %xmm0
436 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
437 ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
438 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
439 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
442 ; AVX512DQ-LABEL: llrint_v16i64_v16f32:
444 ; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm2
445 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm0
446 ; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm1
447 ; AVX512DQ-NEXT: vmovaps %zmm2, %zmm0
448 ; AVX512DQ-NEXT: retq
449 %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
452 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
454 define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
455 ; SSE-LABEL: llrint_v1i64_v1f64:
457 ; SSE-NEXT: cvtsd2si %xmm0, %rax
460 ; AVX-LABEL: llrint_v1i64_v1f64:
462 ; AVX-NEXT: vcvtsd2si %xmm0, %rax
465 ; AVX512DQ-LABEL: llrint_v1i64_v1f64:
467 ; AVX512DQ-NEXT: vcvtsd2si %xmm0, %rax
468 ; AVX512DQ-NEXT: retq
469 %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
472 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
474 define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
475 ; SSE-LABEL: llrint_v2i64_v2f64:
477 ; SSE-NEXT: cvtsd2si %xmm0, %rax
478 ; SSE-NEXT: movq %rax, %xmm1
479 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
480 ; SSE-NEXT: cvtsd2si %xmm0, %rax
481 ; SSE-NEXT: movq %rax, %xmm0
482 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
483 ; SSE-NEXT: movdqa %xmm1, %xmm0
486 ; AVX-LABEL: llrint_v2i64_v2f64:
488 ; AVX-NEXT: vcvtsd2si %xmm0, %rax
489 ; AVX-NEXT: vmovq %rax, %xmm1
490 ; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
491 ; AVX-NEXT: vcvtsd2si %xmm0, %rax
492 ; AVX-NEXT: vmovq %rax, %xmm0
493 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
496 ; AVX512DQ-LABEL: llrint_v2i64_v2f64:
498 ; AVX512DQ-NEXT: vcvtpd2qq %xmm0, %xmm0
499 ; AVX512DQ-NEXT: retq
500 %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
503 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
505 define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
506 ; SSE-LABEL: llrint_v4i64_v4f64:
508 ; SSE-NEXT: cvtsd2si %xmm0, %rax
509 ; SSE-NEXT: movq %rax, %xmm2
510 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
511 ; SSE-NEXT: cvtsd2si %xmm0, %rax
512 ; SSE-NEXT: movq %rax, %xmm0
513 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
514 ; SSE-NEXT: cvtsd2si %xmm1, %rax
515 ; SSE-NEXT: movq %rax, %xmm3
516 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
517 ; SSE-NEXT: cvtsd2si %xmm1, %rax
518 ; SSE-NEXT: movq %rax, %xmm0
519 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
520 ; SSE-NEXT: movdqa %xmm2, %xmm0
521 ; SSE-NEXT: movdqa %xmm3, %xmm1
524 ; AVX1-LABEL: llrint_v4i64_v4f64:
526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
527 ; AVX1-NEXT: vcvtsd2si %xmm1, %rax
528 ; AVX1-NEXT: vmovq %rax, %xmm2
529 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
530 ; AVX1-NEXT: vcvtsd2si %xmm1, %rax
531 ; AVX1-NEXT: vmovq %rax, %xmm1
532 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
533 ; AVX1-NEXT: vcvtsd2si %xmm0, %rax
534 ; AVX1-NEXT: vmovq %rax, %xmm2
535 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
536 ; AVX1-NEXT: vcvtsd2si %xmm0, %rax
537 ; AVX1-NEXT: vmovq %rax, %xmm0
538 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
539 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
542 ; AVX512-LABEL: llrint_v4i64_v4f64:
544 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
545 ; AVX512-NEXT: vcvtsd2si %xmm1, %rax
546 ; AVX512-NEXT: vmovq %rax, %xmm2
547 ; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
548 ; AVX512-NEXT: vcvtsd2si %xmm1, %rax
549 ; AVX512-NEXT: vmovq %rax, %xmm1
550 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
551 ; AVX512-NEXT: vcvtsd2si %xmm0, %rax
552 ; AVX512-NEXT: vmovq %rax, %xmm2
553 ; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
554 ; AVX512-NEXT: vcvtsd2si %xmm0, %rax
555 ; AVX512-NEXT: vmovq %rax, %xmm0
556 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
557 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
560 ; AVX512DQ-LABEL: llrint_v4i64_v4f64:
562 ; AVX512DQ-NEXT: vcvtpd2qq %ymm0, %ymm0
563 ; AVX512DQ-NEXT: retq
564 %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
567 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
569 define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
570 ; SSE-LABEL: llrint_v8i64_v8f64:
572 ; SSE-NEXT: cvtsd2si %xmm0, %rax
573 ; SSE-NEXT: movq %rax, %xmm4
574 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
575 ; SSE-NEXT: cvtsd2si %xmm0, %rax
576 ; SSE-NEXT: movq %rax, %xmm0
577 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
578 ; SSE-NEXT: cvtsd2si %xmm1, %rax
579 ; SSE-NEXT: movq %rax, %xmm5
580 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
581 ; SSE-NEXT: cvtsd2si %xmm1, %rax
582 ; SSE-NEXT: movq %rax, %xmm0
583 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
584 ; SSE-NEXT: cvtsd2si %xmm2, %rax
585 ; SSE-NEXT: movq %rax, %xmm6
586 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
587 ; SSE-NEXT: cvtsd2si %xmm2, %rax
588 ; SSE-NEXT: movq %rax, %xmm0
589 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
590 ; SSE-NEXT: cvtsd2si %xmm3, %rax
591 ; SSE-NEXT: movq %rax, %xmm7
592 ; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
593 ; SSE-NEXT: cvtsd2si %xmm3, %rax
594 ; SSE-NEXT: movq %rax, %xmm0
595 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0]
596 ; SSE-NEXT: movdqa %xmm4, %xmm0
597 ; SSE-NEXT: movdqa %xmm5, %xmm1
598 ; SSE-NEXT: movdqa %xmm6, %xmm2
599 ; SSE-NEXT: movdqa %xmm7, %xmm3
602 ; AVX1-LABEL: llrint_v8i64_v8f64:
604 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
605 ; AVX1-NEXT: vcvtsd2si %xmm2, %rax
606 ; AVX1-NEXT: vmovq %rax, %xmm3
607 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
608 ; AVX1-NEXT: vcvtsd2si %xmm2, %rax
609 ; AVX1-NEXT: vmovq %rax, %xmm2
610 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
611 ; AVX1-NEXT: vcvtsd2si %xmm0, %rax
612 ; AVX1-NEXT: vmovq %rax, %xmm3
613 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
614 ; AVX1-NEXT: vcvtsd2si %xmm0, %rax
615 ; AVX1-NEXT: vmovq %rax, %xmm0
616 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
617 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
618 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
619 ; AVX1-NEXT: vcvtsd2si %xmm2, %rax
620 ; AVX1-NEXT: vmovq %rax, %xmm3
621 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
622 ; AVX1-NEXT: vcvtsd2si %xmm2, %rax
623 ; AVX1-NEXT: vmovq %rax, %xmm2
624 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
625 ; AVX1-NEXT: vcvtsd2si %xmm1, %rax
626 ; AVX1-NEXT: vmovq %rax, %xmm3
627 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
628 ; AVX1-NEXT: vcvtsd2si %xmm1, %rax
629 ; AVX1-NEXT: vmovq %rax, %xmm1
630 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
631 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
634 ; AVX512-LABEL: llrint_v8i64_v8f64:
636 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1
637 ; AVX512-NEXT: vcvtsd2si %xmm1, %rax
638 ; AVX512-NEXT: vmovq %rax, %xmm2
639 ; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
640 ; AVX512-NEXT: vcvtsd2si %xmm1, %rax
641 ; AVX512-NEXT: vmovq %rax, %xmm1
642 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
643 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
644 ; AVX512-NEXT: vcvtsd2si %xmm2, %rax
645 ; AVX512-NEXT: vmovq %rax, %xmm3
646 ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
647 ; AVX512-NEXT: vcvtsd2si %xmm2, %rax
648 ; AVX512-NEXT: vmovq %rax, %xmm2
649 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
650 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
651 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
652 ; AVX512-NEXT: vcvtsd2si %xmm2, %rax
653 ; AVX512-NEXT: vmovq %rax, %xmm3
654 ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
655 ; AVX512-NEXT: vcvtsd2si %xmm2, %rax
656 ; AVX512-NEXT: vmovq %rax, %xmm2
657 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
658 ; AVX512-NEXT: vcvtsd2si %xmm0, %rax
659 ; AVX512-NEXT: vmovq %rax, %xmm3
660 ; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
661 ; AVX512-NEXT: vcvtsd2si %xmm0, %rax
662 ; AVX512-NEXT: vmovq %rax, %xmm0
663 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
664 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
665 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
668 ; AVX512DQ-LABEL: llrint_v8i64_v8f64:
670 ; AVX512DQ-NEXT: vcvtpd2qq %zmm0, %zmm0
671 ; AVX512DQ-NEXT: retq
672 %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
675 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)