1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Double to Signed Integer
19 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20 ; SSE-LABEL: fptosi_2f64_to_2i64:
22 ; SSE-NEXT: cvttsd2si %xmm0, %rax
23 ; SSE-NEXT: movq %rax, %xmm1
24 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
25 ; SSE-NEXT: cvttsd2si %xmm0, %rax
26 ; SSE-NEXT: movq %rax, %xmm0
27 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movdqa %xmm1, %xmm0
31 ; VEX-LABEL: fptosi_2f64_to_2i64:
33 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
34 ; VEX-NEXT: vmovq %rax, %xmm1
35 ; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
36 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
37 ; VEX-NEXT: vmovq %rax, %xmm0
38 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41 ; AVX512F-LABEL: fptosi_2f64_to_2i64:
43 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
44 ; AVX512F-NEXT: vmovq %rax, %xmm1
45 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
46 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
47 ; AVX512F-NEXT: vmovq %rax, %xmm0
48 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
51 ; AVX512VL-LABEL: fptosi_2f64_to_2i64:
53 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
54 ; AVX512VL-NEXT: vmovq %rax, %xmm1
55 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
56 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
57 ; AVX512VL-NEXT: vmovq %rax, %xmm0
58 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
61 ; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
63 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
64 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
65 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512DQ-NEXT: vzeroupper
69 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70 ; AVX512VLDQ: # %bb.0:
71 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
72 ; AVX512VLDQ-NEXT: retq
73 %cvt = fptosi <2 x double> %a to <2 x i64>
77 define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78 ; SSE-LABEL: fptosi_2f64_to_4i32:
80 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
83 ; AVX-LABEL: fptosi_2f64_to_4i32:
85 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
87 %cvt = fptosi <2 x double> %a to <2 x i32>
88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92 define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93 ; SSE-LABEL: fptosi_2f64_to_2i32:
95 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
98 ; AVX-LABEL: fptosi_2f64_to_2i32:
100 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
102 %cvt = fptosi <2 x double> %a to <2 x i32>
106 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107 ; SSE-LABEL: fptosi_4f64_to_2i32:
109 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
112 ; AVX-LABEL: fptosi_4f64_to_2i32:
114 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
115 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
116 ; AVX-NEXT: vzeroupper
118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119 %cvt = fptosi <4 x double> %ext to <4 x i32>
123 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124 ; SSE-LABEL: fptosi_4f64_to_4i64:
126 ; SSE-NEXT: cvttsd2si %xmm0, %rax
127 ; SSE-NEXT: movq %rax, %xmm2
128 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
129 ; SSE-NEXT: cvttsd2si %xmm0, %rax
130 ; SSE-NEXT: movq %rax, %xmm0
131 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132 ; SSE-NEXT: cvttsd2si %xmm1, %rax
133 ; SSE-NEXT: movq %rax, %xmm3
134 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
135 ; SSE-NEXT: cvttsd2si %xmm1, %rax
136 ; SSE-NEXT: movq %rax, %xmm0
137 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138 ; SSE-NEXT: movdqa %xmm2, %xmm0
139 ; SSE-NEXT: movdqa %xmm3, %xmm1
142 ; AVX1-LABEL: fptosi_4f64_to_4i64:
144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
145 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
146 ; AVX1-NEXT: vmovq %rax, %xmm2
147 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
148 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
149 ; AVX1-NEXT: vmovq %rax, %xmm1
150 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
152 ; AVX1-NEXT: vmovq %rax, %xmm2
153 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
154 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
155 ; AVX1-NEXT: vmovq %rax, %xmm0
156 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
160 ; AVX2-LABEL: fptosi_4f64_to_4i64:
162 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
163 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
164 ; AVX2-NEXT: vmovq %rax, %xmm2
165 ; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
166 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
167 ; AVX2-NEXT: vmovq %rax, %xmm1
168 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
170 ; AVX2-NEXT: vmovq %rax, %xmm2
171 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
172 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
173 ; AVX2-NEXT: vmovq %rax, %xmm0
174 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
178 ; AVX512F-LABEL: fptosi_4f64_to_4i64:
180 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
181 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
182 ; AVX512F-NEXT: vmovq %rax, %xmm2
183 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
184 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
185 ; AVX512F-NEXT: vmovq %rax, %xmm1
186 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
188 ; AVX512F-NEXT: vmovq %rax, %xmm2
189 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
190 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
191 ; AVX512F-NEXT: vmovq %rax, %xmm0
192 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
196 ; AVX512VL-LABEL: fptosi_4f64_to_4i64:
198 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
199 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
200 ; AVX512VL-NEXT: vmovq %rax, %xmm2
201 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
202 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
203 ; AVX512VL-NEXT: vmovq %rax, %xmm1
204 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
206 ; AVX512VL-NEXT: vmovq %rax, %xmm2
207 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
208 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
209 ; AVX512VL-NEXT: vmovq %rax, %xmm0
210 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
212 ; AVX512VL-NEXT: retq
214 ; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
216 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
217 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
218 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
219 ; AVX512DQ-NEXT: retq
221 ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222 ; AVX512VLDQ: # %bb.0:
223 ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
224 ; AVX512VLDQ-NEXT: retq
225 %cvt = fptosi <4 x double> %a to <4 x i64>
229 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230 ; SSE-LABEL: fptosi_4f64_to_4i32:
232 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
233 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX-LABEL: fptosi_4f64_to_4i32:
239 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
240 ; AVX-NEXT: vzeroupper
242 %cvt = fptosi <4 x double> %a to <4 x i32>
247 ; Double to Unsigned Integer
250 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251 ; SSE-LABEL: fptoui_2f64_to_2i64:
253 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
254 ; SSE-NEXT: movapd %xmm0, %xmm1
255 ; SSE-NEXT: subsd %xmm2, %xmm1
256 ; SSE-NEXT: cvttsd2si %xmm1, %rax
257 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
258 ; SSE-NEXT: movq %rcx, %rdx
259 ; SSE-NEXT: sarq $63, %rdx
260 ; SSE-NEXT: andq %rax, %rdx
261 ; SSE-NEXT: orq %rcx, %rdx
262 ; SSE-NEXT: movq %rdx, %xmm1
263 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264 ; SSE-NEXT: cvttsd2si %xmm0, %rax
265 ; SSE-NEXT: subsd %xmm2, %xmm0
266 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
267 ; SSE-NEXT: movq %rax, %rdx
268 ; SSE-NEXT: sarq $63, %rdx
269 ; SSE-NEXT: andq %rcx, %rdx
270 ; SSE-NEXT: orq %rax, %rdx
271 ; SSE-NEXT: movq %rdx, %xmm0
272 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273 ; SSE-NEXT: movdqa %xmm1, %xmm0
276 ; VEX-LABEL: fptoui_2f64_to_2i64:
278 ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
279 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
280 ; VEX-NEXT: vcvttsd2si %xmm2, %rax
281 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
282 ; VEX-NEXT: movq %rcx, %rdx
283 ; VEX-NEXT: sarq $63, %rdx
284 ; VEX-NEXT: andq %rax, %rdx
285 ; VEX-NEXT: orq %rcx, %rdx
286 ; VEX-NEXT: vmovq %rdx, %xmm2
287 ; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
288 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm1
289 ; VEX-NEXT: vcvttsd2si %xmm1, %rax
290 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
291 ; VEX-NEXT: movq %rcx, %rdx
292 ; VEX-NEXT: sarq $63, %rdx
293 ; VEX-NEXT: andq %rax, %rdx
294 ; VEX-NEXT: orq %rcx, %rdx
295 ; VEX-NEXT: vmovq %rdx, %xmm0
296 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
299 ; AVX512F-LABEL: fptoui_2f64_to_2i64:
301 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
302 ; AVX512F-NEXT: vmovq %rax, %xmm1
303 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
304 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
305 ; AVX512F-NEXT: vmovq %rax, %xmm0
306 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
309 ; AVX512VL-LABEL: fptoui_2f64_to_2i64:
311 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
312 ; AVX512VL-NEXT: vmovq %rax, %xmm1
313 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
314 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
315 ; AVX512VL-NEXT: vmovq %rax, %xmm0
316 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317 ; AVX512VL-NEXT: retq
319 ; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
321 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
322 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
323 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
324 ; AVX512DQ-NEXT: vzeroupper
325 ; AVX512DQ-NEXT: retq
327 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
328 ; AVX512VLDQ: # %bb.0:
329 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
330 ; AVX512VLDQ-NEXT: retq
331 %cvt = fptoui <2 x double> %a to <2 x i64>
335 define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
336 ; SSE-LABEL: fptoui_2f64_to_4i32:
338 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
339 ; SSE-NEXT: movapd %xmm1, %xmm2
340 ; SSE-NEXT: psrad $31, %xmm2
341 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
342 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
343 ; SSE-NEXT: andpd %xmm2, %xmm0
344 ; SSE-NEXT: orpd %xmm1, %xmm0
347 ; VEX-LABEL: fptoui_2f64_to_4i32:
349 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
350 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
351 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
352 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
353 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
354 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
357 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
359 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
360 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
361 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
362 ; AVX512F-NEXT: vzeroupper
365 ; AVX512VL-LABEL: fptoui_2f64_to_4i32:
367 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
368 ; AVX512VL-NEXT: retq
370 ; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
372 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
373 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
374 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
375 ; AVX512DQ-NEXT: vzeroupper
376 ; AVX512DQ-NEXT: retq
378 ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
379 ; AVX512VLDQ: # %bb.0:
380 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
381 ; AVX512VLDQ-NEXT: retq
382 %cvt = fptoui <2 x double> %a to <2 x i32>
383 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
387 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
388 ; SSE-LABEL: fptoui_2f64_to_2i32:
390 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
391 ; SSE-NEXT: movapd %xmm1, %xmm2
392 ; SSE-NEXT: psrad $31, %xmm2
393 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
394 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
395 ; SSE-NEXT: andpd %xmm2, %xmm0
396 ; SSE-NEXT: orpd %xmm1, %xmm0
399 ; VEX-LABEL: fptoui_2f64_to_2i32:
401 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
402 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
403 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
404 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
405 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
406 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
409 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
411 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
412 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
413 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414 ; AVX512F-NEXT: vzeroupper
417 ; AVX512VL-LABEL: fptoui_2f64_to_2i32:
419 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
420 ; AVX512VL-NEXT: retq
422 ; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
424 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
425 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
426 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
427 ; AVX512DQ-NEXT: vzeroupper
428 ; AVX512DQ-NEXT: retq
430 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
431 ; AVX512VLDQ: # %bb.0:
432 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
433 ; AVX512VLDQ-NEXT: retq
434 %cvt = fptoui <2 x double> %a to <2 x i32>
435 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
439 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
440 ; SSE-LABEL: fptoui_4f64_to_2i32:
442 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
443 ; SSE-NEXT: movapd %xmm1, %xmm2
444 ; SSE-NEXT: psrad $31, %xmm2
445 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
446 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
447 ; SSE-NEXT: andpd %xmm2, %xmm0
448 ; SSE-NEXT: orpd %xmm1, %xmm0
451 ; AVX1-LABEL: fptoui_4f64_to_2i32:
453 ; AVX1-NEXT: vmovapd %xmm0, %xmm0
454 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
455 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
456 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
458 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
459 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
460 ; AVX1-NEXT: vzeroupper
463 ; AVX2-LABEL: fptoui_4f64_to_2i32:
465 ; AVX2-NEXT: vmovapd %xmm0, %xmm0
466 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
467 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
468 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
469 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
470 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
471 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
472 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
473 ; AVX2-NEXT: vzeroupper
476 ; AVX512F-LABEL: fptoui_4f64_to_2i32:
478 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
479 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
480 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
481 ; AVX512F-NEXT: vzeroupper
484 ; AVX512VL-LABEL: fptoui_4f64_to_2i32:
486 ; AVX512VL-NEXT: vmovaps %xmm0, %xmm0
487 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
488 ; AVX512VL-NEXT: vzeroupper
489 ; AVX512VL-NEXT: retq
491 ; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
493 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
494 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
495 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
496 ; AVX512DQ-NEXT: vzeroupper
497 ; AVX512DQ-NEXT: retq
499 ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
500 ; AVX512VLDQ: # %bb.0:
501 ; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0
502 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
503 ; AVX512VLDQ-NEXT: vzeroupper
504 ; AVX512VLDQ-NEXT: retq
505 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
506 %cvt = fptoui <4 x double> %ext to <4 x i32>
510 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
511 ; SSE-LABEL: fptoui_4f64_to_4i64:
513 ; SSE-NEXT: movapd %xmm0, %xmm2
514 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
515 ; SSE-NEXT: subsd %xmm3, %xmm0
516 ; SSE-NEXT: cvttsd2si %xmm0, %rax
517 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
518 ; SSE-NEXT: movq %rcx, %rdx
519 ; SSE-NEXT: sarq $63, %rdx
520 ; SSE-NEXT: andq %rax, %rdx
521 ; SSE-NEXT: orq %rcx, %rdx
522 ; SSE-NEXT: movq %rdx, %xmm0
523 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
524 ; SSE-NEXT: cvttsd2si %xmm2, %rax
525 ; SSE-NEXT: subsd %xmm3, %xmm2
526 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
527 ; SSE-NEXT: movq %rax, %rdx
528 ; SSE-NEXT: sarq $63, %rdx
529 ; SSE-NEXT: andq %rcx, %rdx
530 ; SSE-NEXT: orq %rax, %rdx
531 ; SSE-NEXT: movq %rdx, %xmm2
532 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
533 ; SSE-NEXT: movapd %xmm1, %xmm2
534 ; SSE-NEXT: subsd %xmm3, %xmm2
535 ; SSE-NEXT: cvttsd2si %xmm2, %rax
536 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
537 ; SSE-NEXT: movq %rcx, %rdx
538 ; SSE-NEXT: sarq $63, %rdx
539 ; SSE-NEXT: andq %rax, %rdx
540 ; SSE-NEXT: orq %rcx, %rdx
541 ; SSE-NEXT: movq %rdx, %xmm2
542 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
543 ; SSE-NEXT: cvttsd2si %xmm1, %rax
544 ; SSE-NEXT: subsd %xmm3, %xmm1
545 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
546 ; SSE-NEXT: movq %rax, %rdx
547 ; SSE-NEXT: sarq $63, %rdx
548 ; SSE-NEXT: andq %rcx, %rdx
549 ; SSE-NEXT: orq %rax, %rdx
550 ; SSE-NEXT: movq %rdx, %xmm1
551 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
552 ; SSE-NEXT: movdqa %xmm2, %xmm1
555 ; AVX1-LABEL: fptoui_4f64_to_4i64:
557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
558 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
559 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
560 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
561 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
562 ; AVX1-NEXT: movq %rcx, %rdx
563 ; AVX1-NEXT: sarq $63, %rdx
564 ; AVX1-NEXT: andq %rax, %rdx
565 ; AVX1-NEXT: orq %rcx, %rdx
566 ; AVX1-NEXT: vmovq %rdx, %xmm3
567 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
568 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
569 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
570 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
571 ; AVX1-NEXT: movq %rcx, %rdx
572 ; AVX1-NEXT: sarq $63, %rdx
573 ; AVX1-NEXT: andq %rax, %rdx
574 ; AVX1-NEXT: orq %rcx, %rdx
575 ; AVX1-NEXT: vmovq %rdx, %xmm2
576 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
577 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
578 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
579 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
580 ; AVX1-NEXT: movq %rcx, %rdx
581 ; AVX1-NEXT: sarq $63, %rdx
582 ; AVX1-NEXT: andq %rax, %rdx
583 ; AVX1-NEXT: orq %rcx, %rdx
584 ; AVX1-NEXT: vmovq %rdx, %xmm3
585 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
586 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm1
587 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
588 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
589 ; AVX1-NEXT: movq %rcx, %rdx
590 ; AVX1-NEXT: sarq $63, %rdx
591 ; AVX1-NEXT: andq %rax, %rdx
592 ; AVX1-NEXT: orq %rcx, %rdx
593 ; AVX1-NEXT: vmovq %rdx, %xmm0
594 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
595 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
598 ; AVX2-LABEL: fptoui_4f64_to_4i64:
600 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
601 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
602 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
603 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
604 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
605 ; AVX2-NEXT: movq %rcx, %rdx
606 ; AVX2-NEXT: sarq $63, %rdx
607 ; AVX2-NEXT: andq %rax, %rdx
608 ; AVX2-NEXT: orq %rcx, %rdx
609 ; AVX2-NEXT: vmovq %rdx, %xmm3
610 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
611 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
612 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
613 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
614 ; AVX2-NEXT: movq %rcx, %rdx
615 ; AVX2-NEXT: sarq $63, %rdx
616 ; AVX2-NEXT: andq %rax, %rdx
617 ; AVX2-NEXT: orq %rcx, %rdx
618 ; AVX2-NEXT: vmovq %rdx, %xmm2
619 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
620 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
621 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
622 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
623 ; AVX2-NEXT: movq %rcx, %rdx
624 ; AVX2-NEXT: sarq $63, %rdx
625 ; AVX2-NEXT: andq %rax, %rdx
626 ; AVX2-NEXT: orq %rcx, %rdx
627 ; AVX2-NEXT: vmovq %rdx, %xmm3
628 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
629 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm1
630 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
631 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
632 ; AVX2-NEXT: movq %rcx, %rdx
633 ; AVX2-NEXT: sarq $63, %rdx
634 ; AVX2-NEXT: andq %rax, %rdx
635 ; AVX2-NEXT: orq %rcx, %rdx
636 ; AVX2-NEXT: vmovq %rdx, %xmm0
637 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
641 ; AVX512F-LABEL: fptoui_4f64_to_4i64:
643 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
644 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
645 ; AVX512F-NEXT: vmovq %rax, %xmm2
646 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
647 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
648 ; AVX512F-NEXT: vmovq %rax, %xmm1
649 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
650 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
651 ; AVX512F-NEXT: vmovq %rax, %xmm2
652 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
653 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
654 ; AVX512F-NEXT: vmovq %rax, %xmm0
655 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
656 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
659 ; AVX512VL-LABEL: fptoui_4f64_to_4i64:
661 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
662 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
663 ; AVX512VL-NEXT: vmovq %rax, %xmm2
664 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
665 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
666 ; AVX512VL-NEXT: vmovq %rax, %xmm1
667 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
668 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
669 ; AVX512VL-NEXT: vmovq %rax, %xmm2
670 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
671 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
672 ; AVX512VL-NEXT: vmovq %rax, %xmm0
673 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
674 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
675 ; AVX512VL-NEXT: retq
677 ; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
679 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
680 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
681 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
682 ; AVX512DQ-NEXT: retq
684 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
685 ; AVX512VLDQ: # %bb.0:
686 ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
687 ; AVX512VLDQ-NEXT: retq
688 %cvt = fptoui <4 x double> %a to <4 x i64>
692 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
693 ; SSE-LABEL: fptoui_4f64_to_4i32:
695 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
696 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm3
697 ; SSE-NEXT: subpd %xmm2, %xmm1
698 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
699 ; SSE-NEXT: movapd %xmm3, %xmm4
700 ; SSE-NEXT: psrad $31, %xmm4
701 ; SSE-NEXT: pand %xmm1, %xmm4
702 ; SSE-NEXT: por %xmm3, %xmm4
703 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
704 ; SSE-NEXT: subpd %xmm2, %xmm0
705 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm2
706 ; SSE-NEXT: movapd %xmm1, %xmm0
707 ; SSE-NEXT: psrad $31, %xmm0
708 ; SSE-NEXT: pand %xmm2, %xmm0
709 ; SSE-NEXT: por %xmm1, %xmm0
710 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
713 ; AVX1-LABEL: fptoui_4f64_to_4i32:
715 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
716 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
717 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
718 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
719 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
720 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
721 ; AVX1-NEXT: vzeroupper
724 ; AVX2-LABEL: fptoui_4f64_to_4i32:
726 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
727 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
728 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
729 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
730 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
731 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
732 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
733 ; AVX2-NEXT: vzeroupper
736 ; AVX512F-LABEL: fptoui_4f64_to_4i32:
738 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
739 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
740 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
741 ; AVX512F-NEXT: vzeroupper
744 ; AVX512VL-LABEL: fptoui_4f64_to_4i32:
746 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
747 ; AVX512VL-NEXT: vzeroupper
748 ; AVX512VL-NEXT: retq
750 ; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
752 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
753 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
754 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
755 ; AVX512DQ-NEXT: vzeroupper
756 ; AVX512DQ-NEXT: retq
758 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
759 ; AVX512VLDQ: # %bb.0:
760 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
761 ; AVX512VLDQ-NEXT: vzeroupper
762 ; AVX512VLDQ-NEXT: retq
763 %cvt = fptoui <4 x double> %a to <4 x i32>
768 ; Float to Signed Integer
771 define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
772 ; SSE-LABEL: fptosi_2f32_to_2i32:
774 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
777 ; AVX-LABEL: fptosi_2f32_to_2i32:
779 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
781 %cvt = fptosi <2 x float> %a to <2 x i32>
785 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
786 ; SSE-LABEL: fptosi_4f32_to_4i32:
788 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
791 ; AVX-LABEL: fptosi_4f32_to_4i32:
793 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
795 %cvt = fptosi <4 x float> %a to <4 x i32>
799 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
800 ; SSE-LABEL: fptosi_2f32_to_2i64:
802 ; SSE-NEXT: cvttss2si %xmm0, %rax
803 ; SSE-NEXT: movq %rax, %xmm1
804 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
805 ; SSE-NEXT: cvttss2si %xmm0, %rax
806 ; SSE-NEXT: movq %rax, %xmm0
807 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
808 ; SSE-NEXT: movdqa %xmm1, %xmm0
811 ; VEX-LABEL: fptosi_2f32_to_2i64:
813 ; VEX-NEXT: vcvttss2si %xmm0, %rax
814 ; VEX-NEXT: vmovq %rax, %xmm1
815 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
816 ; VEX-NEXT: vcvttss2si %xmm0, %rax
817 ; VEX-NEXT: vmovq %rax, %xmm0
818 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
821 ; AVX512F-LABEL: fptosi_2f32_to_2i64:
823 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
824 ; AVX512F-NEXT: vmovq %rax, %xmm1
825 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
826 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
827 ; AVX512F-NEXT: vmovq %rax, %xmm0
828 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
831 ; AVX512VL-LABEL: fptosi_2f32_to_2i64:
833 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
834 ; AVX512VL-NEXT: vmovq %rax, %xmm1
835 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
836 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
837 ; AVX512VL-NEXT: vmovq %rax, %xmm0
838 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
839 ; AVX512VL-NEXT: retq
841 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
843 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
844 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
845 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
846 ; AVX512DQ-NEXT: vzeroupper
847 ; AVX512DQ-NEXT: retq
849 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
850 ; AVX512VLDQ: # %bb.0:
851 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
852 ; AVX512VLDQ-NEXT: retq
853 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
854 %cvt = fptosi <2 x float> %shuf to <2 x i64>
858 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
859 ; SSE-LABEL: fptosi_4f32_to_2i64:
861 ; SSE-NEXT: cvttss2si %xmm0, %rax
862 ; SSE-NEXT: movq %rax, %xmm1
863 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
864 ; SSE-NEXT: cvttss2si %xmm0, %rax
865 ; SSE-NEXT: movq %rax, %xmm0
866 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
867 ; SSE-NEXT: movdqa %xmm1, %xmm0
870 ; VEX-LABEL: fptosi_4f32_to_2i64:
872 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
873 ; VEX-NEXT: vcvttss2si %xmm1, %rax
874 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
875 ; VEX-NEXT: vmovq %rcx, %xmm0
876 ; VEX-NEXT: vmovq %rax, %xmm1
877 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
880 ; AVX512F-LABEL: fptosi_4f32_to_2i64:
882 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
883 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
884 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
885 ; AVX512F-NEXT: vmovq %rcx, %xmm0
886 ; AVX512F-NEXT: vmovq %rax, %xmm1
887 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
890 ; AVX512VL-LABEL: fptosi_4f32_to_2i64:
892 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
893 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
894 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
895 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
896 ; AVX512VL-NEXT: vmovq %rax, %xmm1
897 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
898 ; AVX512VL-NEXT: retq
900 ; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
902 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
903 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
904 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
905 ; AVX512DQ-NEXT: vzeroupper
906 ; AVX512DQ-NEXT: retq
908 ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
909 ; AVX512VLDQ: # %bb.0:
910 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
911 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
912 ; AVX512VLDQ-NEXT: vzeroupper
913 ; AVX512VLDQ-NEXT: retq
914 %cvt = fptosi <4 x float> %a to <4 x i64>
915 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
919 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
920 ; SSE-LABEL: fptosi_8f32_to_8i32:
922 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
923 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
926 ; AVX-LABEL: fptosi_8f32_to_8i32:
928 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
930 %cvt = fptosi <8 x float> %a to <8 x i32>
934 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
935 ; SSE-LABEL: fptosi_4f32_to_4i64:
937 ; SSE-NEXT: cvttss2si %xmm0, %rax
938 ; SSE-NEXT: movq %rax, %xmm2
939 ; SSE-NEXT: movaps %xmm0, %xmm1
940 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
941 ; SSE-NEXT: cvttss2si %xmm1, %rax
942 ; SSE-NEXT: movq %rax, %xmm1
943 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
944 ; SSE-NEXT: movaps %xmm0, %xmm1
945 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
946 ; SSE-NEXT: cvttss2si %xmm1, %rax
947 ; SSE-NEXT: movq %rax, %xmm3
948 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
949 ; SSE-NEXT: cvttss2si %xmm0, %rax
950 ; SSE-NEXT: movq %rax, %xmm1
951 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
952 ; SSE-NEXT: movdqa %xmm2, %xmm0
955 ; AVX1-LABEL: fptosi_4f32_to_4i64:
957 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
958 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
959 ; AVX1-NEXT: vmovq %rax, %xmm1
960 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
961 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
962 ; AVX1-NEXT: vmovq %rax, %xmm2
963 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
964 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
965 ; AVX1-NEXT: vmovq %rax, %xmm2
966 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
967 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
968 ; AVX1-NEXT: vmovq %rax, %xmm0
969 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
970 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
973 ; AVX2-LABEL: fptosi_4f32_to_4i64:
975 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
976 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
977 ; AVX2-NEXT: vmovq %rax, %xmm1
978 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
979 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
980 ; AVX2-NEXT: vmovq %rax, %xmm2
981 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
982 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
983 ; AVX2-NEXT: vmovq %rax, %xmm2
984 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
985 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
986 ; AVX2-NEXT: vmovq %rax, %xmm0
987 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
988 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
991 ; AVX512F-LABEL: fptosi_4f32_to_4i64:
993 ; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
994 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
995 ; AVX512F-NEXT: vmovq %rax, %xmm1
996 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
997 ; AVX512F-NEXT: vcvttss2si %xmm2, %rax
998 ; AVX512F-NEXT: vmovq %rax, %xmm2
999 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1000 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1001 ; AVX512F-NEXT: vmovq %rax, %xmm2
1002 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1003 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1004 ; AVX512F-NEXT: vmovq %rax, %xmm0
1005 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1006 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1007 ; AVX512F-NEXT: retq
1009 ; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1010 ; AVX512VL: # %bb.0:
1011 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1012 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1013 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1014 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1015 ; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
1016 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1017 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1018 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1019 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1020 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1021 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1022 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1023 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1024 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1025 ; AVX512VL-NEXT: retq
1027 ; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1028 ; AVX512DQ: # %bb.0:
1029 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1030 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1031 ; AVX512DQ-NEXT: retq
1033 ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1034 ; AVX512VLDQ: # %bb.0:
1035 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
1036 ; AVX512VLDQ-NEXT: retq
1037 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1038 %cvt = fptosi <4 x float> %shuf to <4 x i64>
1042 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1043 ; SSE-LABEL: fptosi_8f32_to_4i64:
1045 ; SSE-NEXT: cvttss2si %xmm0, %rax
1046 ; SSE-NEXT: movq %rax, %xmm2
1047 ; SSE-NEXT: movaps %xmm0, %xmm1
1048 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
1049 ; SSE-NEXT: cvttss2si %xmm1, %rax
1050 ; SSE-NEXT: movq %rax, %xmm1
1051 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1052 ; SSE-NEXT: movaps %xmm0, %xmm1
1053 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
1054 ; SSE-NEXT: cvttss2si %xmm1, %rax
1055 ; SSE-NEXT: movq %rax, %xmm3
1056 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1057 ; SSE-NEXT: cvttss2si %xmm0, %rax
1058 ; SSE-NEXT: movq %rax, %xmm1
1059 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1060 ; SSE-NEXT: movdqa %xmm2, %xmm0
1063 ; AVX1-LABEL: fptosi_8f32_to_4i64:
1065 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1066 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1067 ; AVX1-NEXT: vmovq %rax, %xmm1
1068 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1069 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1070 ; AVX1-NEXT: vmovq %rax, %xmm2
1071 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1072 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1073 ; AVX1-NEXT: vmovq %rax, %xmm2
1074 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1075 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1076 ; AVX1-NEXT: vmovq %rax, %xmm0
1077 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1078 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1081 ; AVX2-LABEL: fptosi_8f32_to_4i64:
1083 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1084 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1085 ; AVX2-NEXT: vmovq %rax, %xmm1
1086 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1087 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1088 ; AVX2-NEXT: vmovq %rax, %xmm2
1089 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1090 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1091 ; AVX2-NEXT: vmovq %rax, %xmm2
1092 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1093 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1094 ; AVX2-NEXT: vmovq %rax, %xmm0
1095 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1096 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1099 ; AVX512F-LABEL: fptosi_8f32_to_4i64:
1101 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1102 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1103 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
1104 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1105 ; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
1106 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1107 ; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
1108 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1109 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1110 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1111 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1112 ; AVX512F-NEXT: vmovq %rax, %xmm2
1113 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1114 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1115 ; AVX512F-NEXT: retq
1117 ; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1118 ; AVX512VL: # %bb.0:
1119 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1120 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1121 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
1122 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1123 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
1124 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1125 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
1126 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1127 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1128 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1129 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1130 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1131 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1132 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1133 ; AVX512VL-NEXT: retq
1135 ; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1136 ; AVX512DQ: # %bb.0:
1137 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1138 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1139 ; AVX512DQ-NEXT: retq
1141 ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1142 ; AVX512VLDQ: # %bb.0:
1143 ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
1144 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1145 ; AVX512VLDQ-NEXT: retq
1146 %cvt = fptosi <8 x float> %a to <8 x i64>
1147 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 ; Float to Unsigned Integer
1155 define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1156 ; SSE-LABEL: fptoui_2f32_to_2i32:
1158 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1159 ; SSE-NEXT: movdqa %xmm1, %xmm2
1160 ; SSE-NEXT: psrad $31, %xmm2
1161 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1162 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1163 ; SSE-NEXT: pand %xmm2, %xmm0
1164 ; SSE-NEXT: por %xmm1, %xmm0
1167 ; AVX1-LABEL: fptoui_2f32_to_2i32:
1169 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1170 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1171 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1172 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1173 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1174 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1177 ; AVX2-LABEL: fptoui_2f32_to_2i32:
1179 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1180 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1181 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1182 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1183 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1184 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1185 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1188 ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1190 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1191 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1192 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1193 ; AVX512F-NEXT: vzeroupper
1194 ; AVX512F-NEXT: retq
1196 ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1197 ; AVX512VL: # %bb.0:
1198 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1199 ; AVX512VL-NEXT: retq
1201 ; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1202 ; AVX512DQ: # %bb.0:
1203 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1204 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1205 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1206 ; AVX512DQ-NEXT: vzeroupper
1207 ; AVX512DQ-NEXT: retq
1209 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1210 ; AVX512VLDQ: # %bb.0:
1211 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1212 ; AVX512VLDQ-NEXT: retq
1213 %cvt = fptoui <2 x float> %a to <2 x i32>
1217 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1218 ; SSE-LABEL: fptoui_4f32_to_4i32:
1220 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1221 ; SSE-NEXT: movdqa %xmm1, %xmm2
1222 ; SSE-NEXT: psrad $31, %xmm2
1223 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1224 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1225 ; SSE-NEXT: pand %xmm2, %xmm0
1226 ; SSE-NEXT: por %xmm1, %xmm0
1229 ; AVX1-LABEL: fptoui_4f32_to_4i32:
1231 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1232 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1233 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1234 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1235 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1236 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1239 ; AVX2-LABEL: fptoui_4f32_to_4i32:
1241 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1242 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1243 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1244 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1245 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1246 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1247 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1250 ; AVX512F-LABEL: fptoui_4f32_to_4i32:
1252 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1253 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1254 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1255 ; AVX512F-NEXT: vzeroupper
1256 ; AVX512F-NEXT: retq
1258 ; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1259 ; AVX512VL: # %bb.0:
1260 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1261 ; AVX512VL-NEXT: retq
1263 ; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1264 ; AVX512DQ: # %bb.0:
1265 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1266 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1267 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1268 ; AVX512DQ-NEXT: vzeroupper
1269 ; AVX512DQ-NEXT: retq
1271 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1272 ; AVX512VLDQ: # %bb.0:
1273 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1274 ; AVX512VLDQ-NEXT: retq
1275 %cvt = fptoui <4 x float> %a to <4 x i32>
1279 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1280 ; SSE-LABEL: fptoui_2f32_to_2i64:
1282 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1283 ; SSE-NEXT: movaps %xmm0, %xmm1
1284 ; SSE-NEXT: subss %xmm2, %xmm1
1285 ; SSE-NEXT: cvttss2si %xmm1, %rax
1286 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1287 ; SSE-NEXT: movq %rcx, %rdx
1288 ; SSE-NEXT: sarq $63, %rdx
1289 ; SSE-NEXT: andq %rax, %rdx
1290 ; SSE-NEXT: orq %rcx, %rdx
1291 ; SSE-NEXT: movq %rdx, %xmm1
1292 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1293 ; SSE-NEXT: cvttss2si %xmm0, %rax
1294 ; SSE-NEXT: subss %xmm2, %xmm0
1295 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1296 ; SSE-NEXT: movq %rax, %rdx
1297 ; SSE-NEXT: sarq $63, %rdx
1298 ; SSE-NEXT: andq %rcx, %rdx
1299 ; SSE-NEXT: orq %rax, %rdx
1300 ; SSE-NEXT: movq %rdx, %xmm0
1301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1302 ; SSE-NEXT: movdqa %xmm1, %xmm0
1305 ; VEX-LABEL: fptoui_2f32_to_2i64:
1307 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1308 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
1309 ; VEX-NEXT: vcvttss2si %xmm2, %rax
1310 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1311 ; VEX-NEXT: movq %rcx, %rdx
1312 ; VEX-NEXT: sarq $63, %rdx
1313 ; VEX-NEXT: andq %rax, %rdx
1314 ; VEX-NEXT: orq %rcx, %rdx
1315 ; VEX-NEXT: vmovq %rdx, %xmm2
1316 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1317 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
1318 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1319 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1320 ; VEX-NEXT: movq %rcx, %rdx
1321 ; VEX-NEXT: sarq $63, %rdx
1322 ; VEX-NEXT: andq %rax, %rdx
1323 ; VEX-NEXT: orq %rcx, %rdx
1324 ; VEX-NEXT: vmovq %rdx, %xmm0
1325 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1328 ; AVX512F-LABEL: fptoui_2f32_to_2i64:
1330 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1331 ; AVX512F-NEXT: vmovq %rax, %xmm1
1332 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1333 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1334 ; AVX512F-NEXT: vmovq %rax, %xmm0
1335 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1336 ; AVX512F-NEXT: retq
1338 ; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1339 ; AVX512VL: # %bb.0:
1340 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1341 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1342 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1343 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1344 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1345 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1346 ; AVX512VL-NEXT: retq
1348 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1349 ; AVX512DQ: # %bb.0:
1350 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1351 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1352 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1353 ; AVX512DQ-NEXT: vzeroupper
1354 ; AVX512DQ-NEXT: retq
1356 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1357 ; AVX512VLDQ: # %bb.0:
1358 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1359 ; AVX512VLDQ-NEXT: retq
1360 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1361 %cvt = fptoui <2 x float> %shuf to <2 x i64>
1365 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1366 ; SSE-LABEL: fptoui_4f32_to_2i64:
1368 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1369 ; SSE-NEXT: movaps %xmm0, %xmm1
1370 ; SSE-NEXT: subss %xmm2, %xmm1
1371 ; SSE-NEXT: cvttss2si %xmm1, %rax
1372 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1373 ; SSE-NEXT: movq %rcx, %rdx
1374 ; SSE-NEXT: sarq $63, %rdx
1375 ; SSE-NEXT: andq %rax, %rdx
1376 ; SSE-NEXT: orq %rcx, %rdx
1377 ; SSE-NEXT: movq %rdx, %xmm1
1378 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1379 ; SSE-NEXT: cvttss2si %xmm0, %rax
1380 ; SSE-NEXT: subss %xmm2, %xmm0
1381 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1382 ; SSE-NEXT: movq %rax, %rdx
1383 ; SSE-NEXT: sarq $63, %rdx
1384 ; SSE-NEXT: andq %rcx, %rdx
1385 ; SSE-NEXT: orq %rax, %rdx
1386 ; SSE-NEXT: movq %rdx, %xmm0
1387 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1388 ; SSE-NEXT: movdqa %xmm1, %xmm0
1391 ; VEX-LABEL: fptoui_4f32_to_2i64:
1393 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1394 ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1395 ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
1396 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1397 ; VEX-NEXT: vcvttss2si %xmm1, %rcx
1398 ; VEX-NEXT: movq %rcx, %rdx
1399 ; VEX-NEXT: sarq $63, %rdx
1400 ; VEX-NEXT: andq %rax, %rdx
1401 ; VEX-NEXT: orq %rcx, %rdx
1402 ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
1403 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1404 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1405 ; VEX-NEXT: movq %rcx, %rsi
1406 ; VEX-NEXT: sarq $63, %rsi
1407 ; VEX-NEXT: andq %rax, %rsi
1408 ; VEX-NEXT: orq %rcx, %rsi
1409 ; VEX-NEXT: vmovq %rsi, %xmm0
1410 ; VEX-NEXT: vmovq %rdx, %xmm1
1411 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1414 ; AVX512F-LABEL: fptoui_4f32_to_2i64:
1416 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1417 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1418 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1419 ; AVX512F-NEXT: vmovq %rcx, %xmm0
1420 ; AVX512F-NEXT: vmovq %rax, %xmm1
1421 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1422 ; AVX512F-NEXT: retq
1424 ; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1425 ; AVX512VL: # %bb.0:
1426 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1427 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1428 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1429 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
1430 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1431 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1432 ; AVX512VL-NEXT: retq
1434 ; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1435 ; AVX512DQ: # %bb.0:
1436 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1437 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1438 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1439 ; AVX512DQ-NEXT: vzeroupper
1440 ; AVX512DQ-NEXT: retq
1442 ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1443 ; AVX512VLDQ: # %bb.0:
1444 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1445 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1446 ; AVX512VLDQ-NEXT: vzeroupper
1447 ; AVX512VLDQ-NEXT: retq
1448 %cvt = fptoui <4 x float> %a to <4 x i64>
1449 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1453 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1454 ; SSE-LABEL: fptoui_8f32_to_8i32:
1456 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1457 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1458 ; SSE-NEXT: subps %xmm2, %xmm0
1459 ; SSE-NEXT: cvttps2dq %xmm0, %xmm4
1460 ; SSE-NEXT: movdqa %xmm3, %xmm0
1461 ; SSE-NEXT: psrad $31, %xmm0
1462 ; SSE-NEXT: pand %xmm4, %xmm0
1463 ; SSE-NEXT: por %xmm3, %xmm0
1464 ; SSE-NEXT: cvttps2dq %xmm1, %xmm3
1465 ; SSE-NEXT: subps %xmm2, %xmm1
1466 ; SSE-NEXT: cvttps2dq %xmm1, %xmm2
1467 ; SSE-NEXT: movdqa %xmm3, %xmm1
1468 ; SSE-NEXT: psrad $31, %xmm1
1469 ; SSE-NEXT: pand %xmm2, %xmm1
1470 ; SSE-NEXT: por %xmm3, %xmm1
1473 ; AVX1-LABEL: fptoui_8f32_to_8i32:
1475 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm1
1476 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1477 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
1478 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
1479 ; AVX1-NEXT: vblendvps %ymm1, %ymm0, %ymm1, %ymm0
1482 ; AVX2-LABEL: fptoui_8f32_to_8i32:
1484 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1485 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1
1486 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
1487 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
1488 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm2
1489 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1490 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1493 ; AVX512F-LABEL: fptoui_8f32_to_8i32:
1495 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1496 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1497 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1498 ; AVX512F-NEXT: retq
1500 ; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1501 ; AVX512VL: # %bb.0:
1502 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1503 ; AVX512VL-NEXT: retq
1505 ; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1506 ; AVX512DQ: # %bb.0:
1507 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1508 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1509 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1510 ; AVX512DQ-NEXT: retq
1512 ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1513 ; AVX512VLDQ: # %bb.0:
1514 ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
1515 ; AVX512VLDQ-NEXT: retq
1516 %cvt = fptoui <8 x float> %a to <8 x i32>
1520 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1521 ; SSE-LABEL: fptoui_4f32_to_4i64:
1523 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1524 ; SSE-NEXT: movaps %xmm0, %xmm2
1525 ; SSE-NEXT: subss %xmm1, %xmm2
1526 ; SSE-NEXT: cvttss2si %xmm2, %rax
1527 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1528 ; SSE-NEXT: movq %rcx, %rdx
1529 ; SSE-NEXT: sarq $63, %rdx
1530 ; SSE-NEXT: andq %rax, %rdx
1531 ; SSE-NEXT: orq %rcx, %rdx
1532 ; SSE-NEXT: movq %rdx, %xmm2
1533 ; SSE-NEXT: movaps %xmm0, %xmm3
1534 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1535 ; SSE-NEXT: cvttss2si %xmm3, %rax
1536 ; SSE-NEXT: subss %xmm1, %xmm3
1537 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1538 ; SSE-NEXT: movq %rax, %rdx
1539 ; SSE-NEXT: sarq $63, %rdx
1540 ; SSE-NEXT: andq %rcx, %rdx
1541 ; SSE-NEXT: orq %rax, %rdx
1542 ; SSE-NEXT: movq %rdx, %xmm3
1543 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1544 ; SSE-NEXT: movaps %xmm0, %xmm3
1545 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1546 ; SSE-NEXT: cvttss2si %xmm3, %rax
1547 ; SSE-NEXT: subss %xmm1, %xmm3
1548 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1549 ; SSE-NEXT: movq %rax, %rdx
1550 ; SSE-NEXT: sarq $63, %rdx
1551 ; SSE-NEXT: andq %rcx, %rdx
1552 ; SSE-NEXT: orq %rax, %rdx
1553 ; SSE-NEXT: movq %rdx, %xmm3
1554 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1555 ; SSE-NEXT: cvttss2si %xmm0, %rax
1556 ; SSE-NEXT: subss %xmm1, %xmm0
1557 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1558 ; SSE-NEXT: movq %rax, %rdx
1559 ; SSE-NEXT: sarq $63, %rdx
1560 ; SSE-NEXT: andq %rcx, %rdx
1561 ; SSE-NEXT: orq %rax, %rdx
1562 ; SSE-NEXT: movq %rdx, %xmm1
1563 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1564 ; SSE-NEXT: movdqa %xmm2, %xmm0
1567 ; AVX1-LABEL: fptoui_4f32_to_4i64:
1569 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1570 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1571 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1572 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1573 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1574 ; AVX1-NEXT: movq %rcx, %rdx
1575 ; AVX1-NEXT: sarq $63, %rdx
1576 ; AVX1-NEXT: andq %rax, %rdx
1577 ; AVX1-NEXT: orq %rcx, %rdx
1578 ; AVX1-NEXT: vmovq %rdx, %xmm2
1579 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1580 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1581 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1582 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1583 ; AVX1-NEXT: movq %rcx, %rdx
1584 ; AVX1-NEXT: sarq $63, %rdx
1585 ; AVX1-NEXT: andq %rax, %rdx
1586 ; AVX1-NEXT: orq %rcx, %rdx
1587 ; AVX1-NEXT: vmovq %rdx, %xmm3
1588 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1589 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1590 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1591 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1592 ; AVX1-NEXT: movq %rcx, %rdx
1593 ; AVX1-NEXT: sarq $63, %rdx
1594 ; AVX1-NEXT: andq %rax, %rdx
1595 ; AVX1-NEXT: orq %rcx, %rdx
1596 ; AVX1-NEXT: vmovq %rdx, %xmm3
1597 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1598 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1599 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1600 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1601 ; AVX1-NEXT: movq %rcx, %rdx
1602 ; AVX1-NEXT: sarq $63, %rdx
1603 ; AVX1-NEXT: andq %rax, %rdx
1604 ; AVX1-NEXT: orq %rcx, %rdx
1605 ; AVX1-NEXT: vmovq %rdx, %xmm0
1606 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1607 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1610 ; AVX2-LABEL: fptoui_4f32_to_4i64:
1612 ; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1613 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1614 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1615 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1616 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1617 ; AVX2-NEXT: movq %rcx, %rdx
1618 ; AVX2-NEXT: sarq $63, %rdx
1619 ; AVX2-NEXT: andq %rax, %rdx
1620 ; AVX2-NEXT: orq %rcx, %rdx
1621 ; AVX2-NEXT: vmovq %rdx, %xmm2
1622 ; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1623 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1624 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1625 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1626 ; AVX2-NEXT: movq %rcx, %rdx
1627 ; AVX2-NEXT: sarq $63, %rdx
1628 ; AVX2-NEXT: andq %rax, %rdx
1629 ; AVX2-NEXT: orq %rcx, %rdx
1630 ; AVX2-NEXT: vmovq %rdx, %xmm3
1631 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1632 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1633 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1634 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1635 ; AVX2-NEXT: movq %rcx, %rdx
1636 ; AVX2-NEXT: sarq $63, %rdx
1637 ; AVX2-NEXT: andq %rax, %rdx
1638 ; AVX2-NEXT: orq %rcx, %rdx
1639 ; AVX2-NEXT: vmovq %rdx, %xmm3
1640 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1641 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1642 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1643 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1644 ; AVX2-NEXT: movq %rcx, %rdx
1645 ; AVX2-NEXT: sarq $63, %rdx
1646 ; AVX2-NEXT: andq %rax, %rdx
1647 ; AVX2-NEXT: orq %rcx, %rdx
1648 ; AVX2-NEXT: vmovq %rdx, %xmm0
1649 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1650 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1653 ; AVX512F-LABEL: fptoui_4f32_to_4i64:
1655 ; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1656 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1657 ; AVX512F-NEXT: vmovq %rax, %xmm1
1658 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1659 ; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
1660 ; AVX512F-NEXT: vmovq %rax, %xmm2
1661 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1662 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1663 ; AVX512F-NEXT: vmovq %rax, %xmm2
1664 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1665 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1666 ; AVX512F-NEXT: vmovq %rax, %xmm0
1667 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1668 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1669 ; AVX512F-NEXT: retq
1671 ; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1672 ; AVX512VL: # %bb.0:
1673 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1674 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1675 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1676 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1677 ; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
1678 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1679 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1680 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1681 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1682 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1683 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1684 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1685 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1686 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1687 ; AVX512VL-NEXT: retq
1689 ; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1690 ; AVX512DQ: # %bb.0:
1691 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1692 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1693 ; AVX512DQ-NEXT: retq
1695 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1696 ; AVX512VLDQ: # %bb.0:
1697 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1698 ; AVX512VLDQ-NEXT: retq
1699 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1700 %cvt = fptoui <4 x float> %shuf to <4 x i64>
1704 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1705 ; SSE-LABEL: fptoui_8f32_to_4i64:
1707 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1708 ; SSE-NEXT: movaps %xmm0, %xmm2
1709 ; SSE-NEXT: subss %xmm1, %xmm2
1710 ; SSE-NEXT: cvttss2si %xmm2, %rax
1711 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1712 ; SSE-NEXT: movq %rcx, %rdx
1713 ; SSE-NEXT: sarq $63, %rdx
1714 ; SSE-NEXT: andq %rax, %rdx
1715 ; SSE-NEXT: orq %rcx, %rdx
1716 ; SSE-NEXT: movq %rdx, %xmm2
1717 ; SSE-NEXT: movaps %xmm0, %xmm3
1718 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1719 ; SSE-NEXT: cvttss2si %xmm3, %rax
1720 ; SSE-NEXT: subss %xmm1, %xmm3
1721 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1722 ; SSE-NEXT: movq %rax, %rdx
1723 ; SSE-NEXT: sarq $63, %rdx
1724 ; SSE-NEXT: andq %rcx, %rdx
1725 ; SSE-NEXT: orq %rax, %rdx
1726 ; SSE-NEXT: movq %rdx, %xmm3
1727 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1728 ; SSE-NEXT: movaps %xmm0, %xmm3
1729 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1730 ; SSE-NEXT: cvttss2si %xmm3, %rax
1731 ; SSE-NEXT: subss %xmm1, %xmm3
1732 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1733 ; SSE-NEXT: movq %rax, %rdx
1734 ; SSE-NEXT: sarq $63, %rdx
1735 ; SSE-NEXT: andq %rcx, %rdx
1736 ; SSE-NEXT: orq %rax, %rdx
1737 ; SSE-NEXT: movq %rdx, %xmm3
1738 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1739 ; SSE-NEXT: cvttss2si %xmm0, %rax
1740 ; SSE-NEXT: subss %xmm1, %xmm0
1741 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1742 ; SSE-NEXT: movq %rax, %rdx
1743 ; SSE-NEXT: sarq $63, %rdx
1744 ; SSE-NEXT: andq %rcx, %rdx
1745 ; SSE-NEXT: orq %rax, %rdx
1746 ; SSE-NEXT: movq %rdx, %xmm1
1747 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1748 ; SSE-NEXT: movdqa %xmm2, %xmm0
1751 ; AVX1-LABEL: fptoui_8f32_to_4i64:
1753 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1754 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1755 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1756 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1757 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1758 ; AVX1-NEXT: movq %rcx, %rdx
1759 ; AVX1-NEXT: sarq $63, %rdx
1760 ; AVX1-NEXT: andq %rax, %rdx
1761 ; AVX1-NEXT: orq %rcx, %rdx
1762 ; AVX1-NEXT: vmovq %rdx, %xmm2
1763 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1764 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1765 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1766 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1767 ; AVX1-NEXT: movq %rcx, %rdx
1768 ; AVX1-NEXT: sarq $63, %rdx
1769 ; AVX1-NEXT: andq %rax, %rdx
1770 ; AVX1-NEXT: orq %rcx, %rdx
1771 ; AVX1-NEXT: vmovq %rdx, %xmm3
1772 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1773 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1774 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1775 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1776 ; AVX1-NEXT: movq %rcx, %rdx
1777 ; AVX1-NEXT: sarq $63, %rdx
1778 ; AVX1-NEXT: andq %rax, %rdx
1779 ; AVX1-NEXT: orq %rcx, %rdx
1780 ; AVX1-NEXT: vmovq %rdx, %xmm3
1781 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1782 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1783 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1784 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1785 ; AVX1-NEXT: movq %rcx, %rdx
1786 ; AVX1-NEXT: sarq $63, %rdx
1787 ; AVX1-NEXT: andq %rax, %rdx
1788 ; AVX1-NEXT: orq %rcx, %rdx
1789 ; AVX1-NEXT: vmovq %rdx, %xmm0
1790 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1794 ; AVX2-LABEL: fptoui_8f32_to_4i64:
1796 ; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1797 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1798 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1799 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1800 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1801 ; AVX2-NEXT: movq %rcx, %rdx
1802 ; AVX2-NEXT: sarq $63, %rdx
1803 ; AVX2-NEXT: andq %rax, %rdx
1804 ; AVX2-NEXT: orq %rcx, %rdx
1805 ; AVX2-NEXT: vmovq %rdx, %xmm2
1806 ; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1807 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1808 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1809 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1810 ; AVX2-NEXT: movq %rcx, %rdx
1811 ; AVX2-NEXT: sarq $63, %rdx
1812 ; AVX2-NEXT: andq %rax, %rdx
1813 ; AVX2-NEXT: orq %rcx, %rdx
1814 ; AVX2-NEXT: vmovq %rdx, %xmm3
1815 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1816 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1817 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1818 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1819 ; AVX2-NEXT: movq %rcx, %rdx
1820 ; AVX2-NEXT: sarq $63, %rdx
1821 ; AVX2-NEXT: andq %rax, %rdx
1822 ; AVX2-NEXT: orq %rcx, %rdx
1823 ; AVX2-NEXT: vmovq %rdx, %xmm3
1824 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1825 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1826 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1827 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1828 ; AVX2-NEXT: movq %rcx, %rdx
1829 ; AVX2-NEXT: sarq $63, %rdx
1830 ; AVX2-NEXT: andq %rax, %rdx
1831 ; AVX2-NEXT: orq %rcx, %rdx
1832 ; AVX2-NEXT: vmovq %rdx, %xmm0
1833 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1834 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1837 ; AVX512F-LABEL: fptoui_8f32_to_4i64:
1839 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1840 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1841 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1842 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1843 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
1844 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1845 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
1846 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1847 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1848 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1849 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1850 ; AVX512F-NEXT: vmovq %rax, %xmm2
1851 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1852 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1853 ; AVX512F-NEXT: retq
1855 ; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1856 ; AVX512VL: # %bb.0:
1857 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1858 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1859 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1860 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1861 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
1862 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1863 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
1864 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1865 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1866 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1867 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1868 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1869 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1870 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1871 ; AVX512VL-NEXT: retq
1873 ; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1874 ; AVX512DQ: # %bb.0:
1875 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1876 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1877 ; AVX512DQ-NEXT: retq
1879 ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1880 ; AVX512VLDQ: # %bb.0:
1881 ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1882 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1883 ; AVX512VLDQ-NEXT: retq
1884 %cvt = fptoui <8 x float> %a to <8 x i64>
1885 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1893 define <2 x i64> @fptosi_2f64_to_2i64_const() {
1894 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
1896 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1899 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
1901 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1903 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1907 define <4 x i32> @fptosi_2f64_to_2i32_const() {
1908 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
1910 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1913 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
1915 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4294967295,1,4294967295,1]
1916 ; AVX-NEXT: # xmm0 = mem[0,0]
1918 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1919 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1923 define <4 x i64> @fptosi_4f64_to_4i64_const() {
1924 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
1926 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1927 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1930 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
1932 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1934 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1938 define <4 x i32> @fptosi_4f64_to_4i32_const() {
1939 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
1941 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1944 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
1946 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1948 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
1952 define <2 x i64> @fptoui_2f64_to_2i64_const() {
1953 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
1955 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1958 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
1960 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
1962 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
1966 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
1967 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
1969 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
1972 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
1974 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [2,4,2,4]
1975 ; AVX-NEXT: # xmm0 = mem[0,0]
1977 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
1978 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1982 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
1983 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
1985 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1986 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
1989 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
1991 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
1993 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
1997 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
1998 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
2000 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
2003 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
2005 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2007 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2011 define <4 x i32> @fptosi_4f32_to_4i32_const() {
2012 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
2014 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2017 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
2019 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2021 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2025 define <4 x i64> @fptosi_4f32_to_4i64_const() {
2026 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
2028 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2029 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
2032 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
2034 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2036 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2040 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2041 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
2043 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2044 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2047 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
2049 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2051 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2055 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2056 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
2058 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2061 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
2063 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2065 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2069 define <4 x i64> @fptoui_4f32_to_4i64_const() {
2070 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
2072 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
2073 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
2076 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
2078 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2080 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2084 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2085 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
2087 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2088 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
2091 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
2093 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2095 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2099 define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2100 ; SSE-LABEL: fptosi_2f80_to_4i32:
2102 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2103 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2104 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2105 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2106 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2107 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2108 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2109 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2110 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2111 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2112 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2113 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2114 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2115 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2116 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2117 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2118 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2119 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2120 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2121 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2124 ; AVX-LABEL: fptosi_2f80_to_4i32:
2126 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2127 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2128 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2129 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2130 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2131 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2132 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2133 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2135 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2136 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2140 define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2141 ; SSE-LABEL: fptosi_2f128_to_4i32:
2143 ; SSE-NEXT: pushq %rbx
2144 ; SSE-NEXT: subq $16, %rsp
2145 ; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
2146 ; SSE-NEXT: callq __fixtfsi@PLT
2147 ; SSE-NEXT: movl %eax, %ebx
2148 ; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2149 ; SSE-NEXT: callq __fixtfsi@PLT
2150 ; SSE-NEXT: movd %eax, %xmm0
2151 ; SSE-NEXT: movd %ebx, %xmm1
2152 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2153 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2154 ; SSE-NEXT: addq $16, %rsp
2155 ; SSE-NEXT: popq %rbx
2158 ; AVX-LABEL: fptosi_2f128_to_4i32:
2160 ; AVX-NEXT: pushq %rbx
2161 ; AVX-NEXT: subq $16, %rsp
2162 ; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
2163 ; AVX-NEXT: callq __fixtfsi@PLT
2164 ; AVX-NEXT: movl %eax, %ebx
2165 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
2166 ; AVX-NEXT: callq __fixtfsi@PLT
2167 ; AVX-NEXT: vmovd %eax, %xmm0
2168 ; AVX-NEXT: vmovd %ebx, %xmm1
2169 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2170 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2171 ; AVX-NEXT: addq $16, %rsp
2172 ; AVX-NEXT: popq %rbx
2174 %cvt = fptosi <2 x fp128> %a to <2 x i32>
2175 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2179 define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2180 ; SSE-LABEL: fptosi_2f32_to_2i8:
2182 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2183 ; SSE-NEXT: packssdw %xmm0, %xmm0
2184 ; SSE-NEXT: packsswb %xmm0, %xmm0
2187 ; VEX-LABEL: fptosi_2f32_to_2i8:
2189 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2190 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2191 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2194 ; AVX512F-LABEL: fptosi_2f32_to_2i8:
2196 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2197 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2198 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2199 ; AVX512F-NEXT: retq
2201 ; AVX512VL-LABEL: fptosi_2f32_to_2i8:
2202 ; AVX512VL: # %bb.0:
2203 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2204 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2205 ; AVX512VL-NEXT: retq
2207 ; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
2208 ; AVX512DQ: # %bb.0:
2209 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2210 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2211 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2212 ; AVX512DQ-NEXT: retq
2214 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
2215 ; AVX512VLDQ: # %bb.0:
2216 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2217 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2218 ; AVX512VLDQ-NEXT: retq
2219 %cvt = fptosi <2 x float> %a to <2 x i8>
2223 define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2224 ; SSE-LABEL: fptosi_2f32_to_2i16:
2226 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2227 ; SSE-NEXT: packssdw %xmm0, %xmm0
2230 ; AVX-LABEL: fptosi_2f32_to_2i16:
2232 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2233 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2235 %cvt = fptosi <2 x float> %a to <2 x i16>
2239 define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2240 ; SSE-LABEL: fptoui_2f32_to_2i8:
2242 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2243 ; SSE-NEXT: packuswb %xmm0, %xmm0
2244 ; SSE-NEXT: packuswb %xmm0, %xmm0
2247 ; VEX-LABEL: fptoui_2f32_to_2i8:
2249 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2250 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2251 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2254 ; AVX512F-LABEL: fptoui_2f32_to_2i8:
2256 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2257 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2258 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2259 ; AVX512F-NEXT: retq
2261 ; AVX512VL-LABEL: fptoui_2f32_to_2i8:
2262 ; AVX512VL: # %bb.0:
2263 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2264 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2265 ; AVX512VL-NEXT: retq
2267 ; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
2268 ; AVX512DQ: # %bb.0:
2269 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2270 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2271 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2272 ; AVX512DQ-NEXT: retq
2274 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
2275 ; AVX512VLDQ: # %bb.0:
2276 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2277 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2278 ; AVX512VLDQ-NEXT: retq
2279 %cvt = fptoui <2 x float> %a to <2 x i8>
2283 define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2284 ; SSE-LABEL: fptoui_2f32_to_2i16:
2286 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2287 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2290 ; AVX-LABEL: fptoui_2f32_to_2i16:
2292 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2293 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2295 %cvt = fptoui <2 x float> %a to <2 x i16>
2299 define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2300 ; SSE-LABEL: fptosi_2f64_to_2i8:
2302 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2303 ; SSE-NEXT: packssdw %xmm0, %xmm0
2304 ; SSE-NEXT: packsswb %xmm0, %xmm0
2307 ; VEX-LABEL: fptosi_2f64_to_2i8:
2309 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2310 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2311 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2314 ; AVX512F-LABEL: fptosi_2f64_to_2i8:
2316 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2317 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2318 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2319 ; AVX512F-NEXT: retq
2321 ; AVX512VL-LABEL: fptosi_2f64_to_2i8:
2322 ; AVX512VL: # %bb.0:
2323 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2324 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2325 ; AVX512VL-NEXT: retq
2327 ; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
2328 ; AVX512DQ: # %bb.0:
2329 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2330 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2331 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2332 ; AVX512DQ-NEXT: retq
2334 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
2335 ; AVX512VLDQ: # %bb.0:
2336 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2337 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2338 ; AVX512VLDQ-NEXT: retq
2339 %cvt = fptosi <2 x double> %a to <2 x i8>
2343 define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2344 ; SSE-LABEL: fptosi_2f64_to_2i16:
2346 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2347 ; SSE-NEXT: packssdw %xmm0, %xmm0
2350 ; AVX-LABEL: fptosi_2f64_to_2i16:
2352 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2353 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2355 %cvt = fptosi <2 x double> %a to <2 x i16>
2359 define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2360 ; SSE-LABEL: fptoui_2f64_to_2i8:
2362 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2363 ; SSE-NEXT: packuswb %xmm0, %xmm0
2364 ; SSE-NEXT: packuswb %xmm0, %xmm0
2367 ; VEX-LABEL: fptoui_2f64_to_2i8:
2369 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2370 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2371 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2374 ; AVX512F-LABEL: fptoui_2f64_to_2i8:
2376 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2377 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2378 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2379 ; AVX512F-NEXT: retq
2381 ; AVX512VL-LABEL: fptoui_2f64_to_2i8:
2382 ; AVX512VL: # %bb.0:
2383 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2384 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2385 ; AVX512VL-NEXT: retq
2387 ; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
2388 ; AVX512DQ: # %bb.0:
2389 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2390 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2391 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2392 ; AVX512DQ-NEXT: retq
2394 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
2395 ; AVX512VLDQ: # %bb.0:
2396 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2397 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2398 ; AVX512VLDQ-NEXT: retq
2399 %cvt = fptoui <2 x double> %a to <2 x i8>
2403 define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2404 ; SSE-LABEL: fptoui_2f64_to_2i16:
2406 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2407 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2410 ; AVX-LABEL: fptoui_2f64_to_2i16:
2412 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2413 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2415 %cvt = fptoui <2 x double> %a to <2 x i16>
2419 define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2420 ; SSE-LABEL: fptosi_8f64_to_8i16:
2422 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2423 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2424 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2425 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2426 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2427 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2428 ; SSE-NEXT: packssdw %xmm2, %xmm0
2431 ; VEX-LABEL: fptosi_8f64_to_8i16:
2433 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2434 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2435 ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2436 ; VEX-NEXT: vzeroupper
2439 ; AVX512F-LABEL: fptosi_8f64_to_8i16:
2441 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2442 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2443 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2444 ; AVX512F-NEXT: vzeroupper
2445 ; AVX512F-NEXT: retq
2447 ; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2448 ; AVX512VL: # %bb.0:
2449 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2450 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2451 ; AVX512VL-NEXT: vzeroupper
2452 ; AVX512VL-NEXT: retq
2454 ; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2455 ; AVX512DQ: # %bb.0:
2456 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2457 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2458 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2459 ; AVX512DQ-NEXT: vzeroupper
2460 ; AVX512DQ-NEXT: retq
2462 ; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2463 ; AVX512VLDQ: # %bb.0:
2464 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2465 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2466 ; AVX512VLDQ-NEXT: vzeroupper
2467 ; AVX512VLDQ-NEXT: retq
2468 %cvt = fptosi <8 x double> %a to <8 x i16>
2472 define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2473 ; SSE-LABEL: fptoui_8f64_to_8i16:
2475 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2476 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2477 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2478 ; SSE-NEXT: pslld $16, %xmm2
2479 ; SSE-NEXT: psrad $16, %xmm2
2480 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2481 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2482 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2483 ; SSE-NEXT: pslld $16, %xmm0
2484 ; SSE-NEXT: psrad $16, %xmm0
2485 ; SSE-NEXT: packssdw %xmm2, %xmm0
2488 ; VEX-LABEL: fptoui_8f64_to_8i16:
2490 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2491 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2492 ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2493 ; VEX-NEXT: vzeroupper
2496 ; AVX512F-LABEL: fptoui_8f64_to_8i16:
2498 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2499 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2500 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2501 ; AVX512F-NEXT: vzeroupper
2502 ; AVX512F-NEXT: retq
2504 ; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2505 ; AVX512VL: # %bb.0:
2506 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2507 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2508 ; AVX512VL-NEXT: vzeroupper
2509 ; AVX512VL-NEXT: retq
2511 ; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2512 ; AVX512DQ: # %bb.0:
2513 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2514 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2515 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2516 ; AVX512DQ-NEXT: vzeroupper
2517 ; AVX512DQ-NEXT: retq
2519 ; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2520 ; AVX512VLDQ: # %bb.0:
2521 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2522 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2523 ; AVX512VLDQ-NEXT: vzeroupper
2524 ; AVX512VLDQ-NEXT: retq
2525 %cvt = fptoui <8 x double> %a to <8 x i16>
2529 define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2530 ; SSE-LABEL: fptosi_16f32_to_16i8:
2532 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2533 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2534 ; SSE-NEXT: packssdw %xmm3, %xmm2
2535 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2536 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2537 ; SSE-NEXT: packssdw %xmm1, %xmm0
2538 ; SSE-NEXT: packsswb %xmm2, %xmm0
2541 ; AVX1-LABEL: fptosi_16f32_to_16i8:
2543 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2545 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2546 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2547 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2548 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2549 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2550 ; AVX1-NEXT: vzeroupper
2553 ; AVX2-LABEL: fptosi_16f32_to_16i8:
2555 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2556 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2557 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2558 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2559 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2560 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2561 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2562 ; AVX2-NEXT: vzeroupper
2565 ; AVX512-LABEL: fptosi_16f32_to_16i8:
2567 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2568 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2569 ; AVX512-NEXT: vzeroupper
2571 %cvt = fptosi <16 x float> %a to <16 x i8>
2575 define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2576 ; SSE-LABEL: fptoui_16f32_to_16i8:
2578 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2579 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2580 ; SSE-NEXT: packssdw %xmm3, %xmm2
2581 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2582 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2583 ; SSE-NEXT: packssdw %xmm1, %xmm0
2584 ; SSE-NEXT: packuswb %xmm2, %xmm0
2587 ; AVX1-LABEL: fptoui_16f32_to_16i8:
2589 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2590 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2591 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2592 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2593 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2594 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2595 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2596 ; AVX1-NEXT: vzeroupper
2599 ; AVX2-LABEL: fptoui_16f32_to_16i8:
2601 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2602 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2603 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2604 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2605 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2606 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2607 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2608 ; AVX2-NEXT: vzeroupper
2611 ; AVX512-LABEL: fptoui_16f32_to_16i8:
2613 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2614 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2615 ; AVX512-NEXT: vzeroupper
2617 %cvt = fptoui <16 x float> %a to <16 x i8>
2621 define <2 x i64> @fptosi_2f32_to_2i64_load(ptr %x) {
2622 ; SSE-LABEL: fptosi_2f32_to_2i64_load:
2624 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2625 ; SSE-NEXT: cvttss2si %xmm1, %rax
2626 ; SSE-NEXT: movq %rax, %xmm0
2627 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2628 ; SSE-NEXT: cvttss2si %xmm1, %rax
2629 ; SSE-NEXT: movq %rax, %xmm1
2630 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2633 ; VEX-LABEL: fptosi_2f32_to_2i64_load:
2635 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2636 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2637 ; VEX-NEXT: vmovq %rax, %xmm1
2638 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2639 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2640 ; VEX-NEXT: vmovq %rax, %xmm0
2641 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2644 ; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2646 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2647 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2648 ; AVX512F-NEXT: vmovq %rax, %xmm1
2649 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2650 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2651 ; AVX512F-NEXT: vmovq %rax, %xmm0
2652 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2653 ; AVX512F-NEXT: retq
2655 ; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2656 ; AVX512VL: # %bb.0:
2657 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2658 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2659 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2660 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2661 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2662 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2663 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2664 ; AVX512VL-NEXT: retq
2666 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2667 ; AVX512DQ: # %bb.0:
2668 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2669 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
2670 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2671 ; AVX512DQ-NEXT: vzeroupper
2672 ; AVX512DQ-NEXT: retq
2674 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2675 ; AVX512VLDQ: # %bb.0:
2676 ; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
2677 ; AVX512VLDQ-NEXT: retq
2678 %a = load <2 x float>, ptr %x
2679 %b = fptosi <2 x float> %a to <2 x i64>
2683 define <2 x i64> @fptoui_2f32_to_2i64_load(ptr %x) {
2684 ; SSE-LABEL: fptoui_2f32_to_2i64_load:
2686 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2687 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2688 ; SSE-NEXT: movaps %xmm1, %xmm0
2689 ; SSE-NEXT: subss %xmm2, %xmm0
2690 ; SSE-NEXT: cvttss2si %xmm0, %rax
2691 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2692 ; SSE-NEXT: movq %rcx, %rdx
2693 ; SSE-NEXT: sarq $63, %rdx
2694 ; SSE-NEXT: andq %rax, %rdx
2695 ; SSE-NEXT: orq %rcx, %rdx
2696 ; SSE-NEXT: movq %rdx, %xmm0
2697 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2698 ; SSE-NEXT: cvttss2si %xmm1, %rax
2699 ; SSE-NEXT: subss %xmm2, %xmm1
2700 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2701 ; SSE-NEXT: movq %rax, %rdx
2702 ; SSE-NEXT: sarq $63, %rdx
2703 ; SSE-NEXT: andq %rcx, %rdx
2704 ; SSE-NEXT: orq %rax, %rdx
2705 ; SSE-NEXT: movq %rdx, %xmm1
2706 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2709 ; VEX-LABEL: fptoui_2f32_to_2i64_load:
2711 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2712 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2713 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
2714 ; VEX-NEXT: vcvttss2si %xmm2, %rax
2715 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2716 ; VEX-NEXT: movq %rcx, %rdx
2717 ; VEX-NEXT: sarq $63, %rdx
2718 ; VEX-NEXT: andq %rax, %rdx
2719 ; VEX-NEXT: orq %rcx, %rdx
2720 ; VEX-NEXT: vmovq %rdx, %xmm2
2721 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2722 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
2723 ; VEX-NEXT: vcvttss2si %xmm1, %rax
2724 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2725 ; VEX-NEXT: movq %rcx, %rdx
2726 ; VEX-NEXT: sarq $63, %rdx
2727 ; VEX-NEXT: andq %rax, %rdx
2728 ; VEX-NEXT: orq %rcx, %rdx
2729 ; VEX-NEXT: vmovq %rdx, %xmm0
2730 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2733 ; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2735 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2736 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2737 ; AVX512F-NEXT: vmovq %rax, %xmm1
2738 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2739 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2740 ; AVX512F-NEXT: vmovq %rax, %xmm0
2741 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2742 ; AVX512F-NEXT: retq
2744 ; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2745 ; AVX512VL: # %bb.0:
2746 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2747 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2748 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2749 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2750 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2751 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2752 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2753 ; AVX512VL-NEXT: retq
2755 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2756 ; AVX512DQ: # %bb.0:
2757 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2758 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
2759 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2760 ; AVX512DQ-NEXT: vzeroupper
2761 ; AVX512DQ-NEXT: retq
2763 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2764 ; AVX512VLDQ: # %bb.0:
2765 ; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
2766 ; AVX512VLDQ-NEXT: retq
2767 %a = load <2 x float>, ptr %x
2768 %b = fptoui <2 x float> %a to <2 x i64>