1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Double to Signed Integer
19 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20 ; SSE-LABEL: fptosi_2f64_to_2i64:
22 ; SSE-NEXT: cvttsd2si %xmm0, %rax
23 ; SSE-NEXT: movq %rax, %xmm1
24 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
25 ; SSE-NEXT: cvttsd2si %xmm0, %rax
26 ; SSE-NEXT: movq %rax, %xmm0
27 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movdqa %xmm1, %xmm0
31 ; VEX-LABEL: fptosi_2f64_to_2i64:
33 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
34 ; VEX-NEXT: vmovq %rax, %xmm1
35 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
36 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
37 ; VEX-NEXT: vmovq %rax, %xmm0
38 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41 ; AVX512F-LABEL: fptosi_2f64_to_2i64:
43 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
44 ; AVX512F-NEXT: vmovq %rax, %xmm1
45 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
46 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
47 ; AVX512F-NEXT: vmovq %rax, %xmm0
48 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
51 ; AVX512VL-LABEL: fptosi_2f64_to_2i64:
53 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
54 ; AVX512VL-NEXT: vmovq %rax, %xmm1
55 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
56 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
57 ; AVX512VL-NEXT: vmovq %rax, %xmm0
58 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
61 ; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
63 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
64 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
65 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512DQ-NEXT: vzeroupper
69 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70 ; AVX512VLDQ: # %bb.0:
71 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
72 ; AVX512VLDQ-NEXT: retq
73 %cvt = fptosi <2 x double> %a to <2 x i64>
77 define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78 ; SSE-LABEL: fptosi_2f64_to_4i32:
80 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
83 ; AVX-LABEL: fptosi_2f64_to_4i32:
85 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
87 %cvt = fptosi <2 x double> %a to <2 x i32>
88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92 define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93 ; SSE-LABEL: fptosi_2f64_to_2i32:
95 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
98 ; AVX-LABEL: fptosi_2f64_to_2i32:
100 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
102 %cvt = fptosi <2 x double> %a to <2 x i32>
106 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107 ; SSE-LABEL: fptosi_4f64_to_2i32:
109 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
112 ; AVX-LABEL: fptosi_4f64_to_2i32:
114 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
115 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
116 ; AVX-NEXT: vzeroupper
118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119 %cvt = fptosi <4 x double> %ext to <4 x i32>
123 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124 ; SSE-LABEL: fptosi_4f64_to_4i64:
126 ; SSE-NEXT: cvttsd2si %xmm0, %rax
127 ; SSE-NEXT: movq %rax, %xmm2
128 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
129 ; SSE-NEXT: cvttsd2si %xmm0, %rax
130 ; SSE-NEXT: movq %rax, %xmm0
131 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132 ; SSE-NEXT: cvttsd2si %xmm1, %rax
133 ; SSE-NEXT: movq %rax, %xmm3
134 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
135 ; SSE-NEXT: cvttsd2si %xmm1, %rax
136 ; SSE-NEXT: movq %rax, %xmm0
137 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138 ; SSE-NEXT: movdqa %xmm2, %xmm0
139 ; SSE-NEXT: movdqa %xmm3, %xmm1
142 ; AVX1-LABEL: fptosi_4f64_to_4i64:
144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
145 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
146 ; AVX1-NEXT: vmovq %rax, %xmm2
147 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
148 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
149 ; AVX1-NEXT: vmovq %rax, %xmm1
150 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
152 ; AVX1-NEXT: vmovq %rax, %xmm2
153 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
154 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
155 ; AVX1-NEXT: vmovq %rax, %xmm0
156 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
160 ; AVX2-LABEL: fptosi_4f64_to_4i64:
162 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
163 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
164 ; AVX2-NEXT: vmovq %rax, %xmm2
165 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
166 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
167 ; AVX2-NEXT: vmovq %rax, %xmm1
168 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
170 ; AVX2-NEXT: vmovq %rax, %xmm2
171 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
172 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
173 ; AVX2-NEXT: vmovq %rax, %xmm0
174 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
178 ; AVX512F-LABEL: fptosi_4f64_to_4i64:
180 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
181 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
182 ; AVX512F-NEXT: vmovq %rax, %xmm2
183 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
184 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
185 ; AVX512F-NEXT: vmovq %rax, %xmm1
186 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
188 ; AVX512F-NEXT: vmovq %rax, %xmm2
189 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
190 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
191 ; AVX512F-NEXT: vmovq %rax, %xmm0
192 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
196 ; AVX512VL-LABEL: fptosi_4f64_to_4i64:
198 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
199 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
200 ; AVX512VL-NEXT: vmovq %rax, %xmm2
201 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
202 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
203 ; AVX512VL-NEXT: vmovq %rax, %xmm1
204 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
206 ; AVX512VL-NEXT: vmovq %rax, %xmm2
207 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
208 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
209 ; AVX512VL-NEXT: vmovq %rax, %xmm0
210 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
212 ; AVX512VL-NEXT: retq
214 ; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
216 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
217 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
218 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
219 ; AVX512DQ-NEXT: retq
221 ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222 ; AVX512VLDQ: # %bb.0:
223 ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
224 ; AVX512VLDQ-NEXT: retq
225 %cvt = fptosi <4 x double> %a to <4 x i64>
229 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230 ; SSE-LABEL: fptosi_4f64_to_4i32:
232 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
233 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX-LABEL: fptosi_4f64_to_4i32:
239 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
240 ; AVX-NEXT: vzeroupper
242 %cvt = fptosi <4 x double> %a to <4 x i32>
247 ; Double to Unsigned Integer
250 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251 ; SSE-LABEL: fptoui_2f64_to_2i64:
253 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
254 ; SSE-NEXT: movapd %xmm0, %xmm1
255 ; SSE-NEXT: subsd %xmm2, %xmm1
256 ; SSE-NEXT: cvttsd2si %xmm1, %rax
257 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
258 ; SSE-NEXT: movq %rcx, %rdx
259 ; SSE-NEXT: sarq $63, %rdx
260 ; SSE-NEXT: andq %rax, %rdx
261 ; SSE-NEXT: orq %rcx, %rdx
262 ; SSE-NEXT: movq %rdx, %xmm1
263 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264 ; SSE-NEXT: cvttsd2si %xmm0, %rax
265 ; SSE-NEXT: subsd %xmm2, %xmm0
266 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
267 ; SSE-NEXT: movq %rax, %rdx
268 ; SSE-NEXT: sarq $63, %rdx
269 ; SSE-NEXT: andq %rcx, %rdx
270 ; SSE-NEXT: orq %rax, %rdx
271 ; SSE-NEXT: movq %rdx, %xmm0
272 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273 ; SSE-NEXT: movdqa %xmm1, %xmm0
276 ; VEX-LABEL: fptoui_2f64_to_2i64:
278 ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
279 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
280 ; VEX-NEXT: vcvttsd2si %xmm2, %rax
281 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
282 ; VEX-NEXT: movq %rcx, %rdx
283 ; VEX-NEXT: sarq $63, %rdx
284 ; VEX-NEXT: andq %rax, %rdx
285 ; VEX-NEXT: orq %rcx, %rdx
286 ; VEX-NEXT: vmovq %rdx, %xmm2
287 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
288 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm1
289 ; VEX-NEXT: vcvttsd2si %xmm1, %rax
290 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
291 ; VEX-NEXT: movq %rcx, %rdx
292 ; VEX-NEXT: sarq $63, %rdx
293 ; VEX-NEXT: andq %rax, %rdx
294 ; VEX-NEXT: orq %rcx, %rdx
295 ; VEX-NEXT: vmovq %rdx, %xmm0
296 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
299 ; AVX512F-LABEL: fptoui_2f64_to_2i64:
301 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
302 ; AVX512F-NEXT: vmovq %rax, %xmm1
303 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
304 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
305 ; AVX512F-NEXT: vmovq %rax, %xmm0
306 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
309 ; AVX512VL-LABEL: fptoui_2f64_to_2i64:
311 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
312 ; AVX512VL-NEXT: vmovq %rax, %xmm1
313 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
314 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
315 ; AVX512VL-NEXT: vmovq %rax, %xmm0
316 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317 ; AVX512VL-NEXT: retq
319 ; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
321 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
322 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
323 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
324 ; AVX512DQ-NEXT: vzeroupper
325 ; AVX512DQ-NEXT: retq
327 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
328 ; AVX512VLDQ: # %bb.0:
329 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
330 ; AVX512VLDQ-NEXT: retq
331 %cvt = fptoui <2 x double> %a to <2 x i64>
335 define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
336 ; SSE-LABEL: fptoui_2f64_to_4i32:
338 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
339 ; SSE-NEXT: movapd %xmm1, %xmm2
340 ; SSE-NEXT: psrad $31, %xmm2
341 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
342 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
343 ; SSE-NEXT: andpd %xmm2, %xmm0
344 ; SSE-NEXT: orpd %xmm1, %xmm0
347 ; VEX-LABEL: fptoui_2f64_to_4i32:
349 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
350 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
351 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
352 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
353 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
354 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
357 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
359 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
360 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
361 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
362 ; AVX512F-NEXT: vzeroupper
365 ; AVX512VL-LABEL: fptoui_2f64_to_4i32:
367 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
368 ; AVX512VL-NEXT: retq
370 ; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
372 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
373 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
374 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
375 ; AVX512DQ-NEXT: vzeroupper
376 ; AVX512DQ-NEXT: retq
378 ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
379 ; AVX512VLDQ: # %bb.0:
380 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
381 ; AVX512VLDQ-NEXT: retq
382 %cvt = fptoui <2 x double> %a to <2 x i32>
383 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
387 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
388 ; SSE-LABEL: fptoui_2f64_to_2i32:
390 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
391 ; SSE-NEXT: movapd %xmm1, %xmm2
392 ; SSE-NEXT: psrad $31, %xmm2
393 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
394 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
395 ; SSE-NEXT: andpd %xmm2, %xmm0
396 ; SSE-NEXT: orpd %xmm1, %xmm0
399 ; VEX-LABEL: fptoui_2f64_to_2i32:
401 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
402 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
403 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
404 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
405 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
406 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
409 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
411 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
412 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
413 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414 ; AVX512F-NEXT: vzeroupper
417 ; AVX512VL-LABEL: fptoui_2f64_to_2i32:
419 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
420 ; AVX512VL-NEXT: retq
422 ; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
424 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
425 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
426 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
427 ; AVX512DQ-NEXT: vzeroupper
428 ; AVX512DQ-NEXT: retq
430 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
431 ; AVX512VLDQ: # %bb.0:
432 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
433 ; AVX512VLDQ-NEXT: retq
434 %cvt = fptoui <2 x double> %a to <2 x i32>
435 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
439 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
440 ; SSE-LABEL: fptoui_4f64_to_2i32:
442 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
443 ; SSE-NEXT: movapd %xmm1, %xmm2
444 ; SSE-NEXT: psrad $31, %xmm2
445 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
446 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
447 ; SSE-NEXT: andpd %xmm2, %xmm0
448 ; SSE-NEXT: orpd %xmm1, %xmm0
451 ; AVX1-LABEL: fptoui_4f64_to_2i32:
453 ; AVX1-NEXT: vmovapd %xmm0, %xmm0
454 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
455 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
456 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
458 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
459 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
460 ; AVX1-NEXT: vzeroupper
463 ; AVX2-LABEL: fptoui_4f64_to_2i32:
465 ; AVX2-NEXT: vmovapd %xmm0, %xmm0
466 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
467 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
468 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
469 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
470 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
471 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
472 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
473 ; AVX2-NEXT: vzeroupper
476 ; AVX512F-LABEL: fptoui_4f64_to_2i32:
478 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
479 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
480 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
481 ; AVX512F-NEXT: vzeroupper
484 ; AVX512VL-LABEL: fptoui_4f64_to_2i32:
486 ; AVX512VL-NEXT: vmovaps %xmm0, %xmm0
487 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
488 ; AVX512VL-NEXT: vzeroupper
489 ; AVX512VL-NEXT: retq
491 ; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
493 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
494 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
495 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
496 ; AVX512DQ-NEXT: vzeroupper
497 ; AVX512DQ-NEXT: retq
499 ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
500 ; AVX512VLDQ: # %bb.0:
501 ; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0
502 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
503 ; AVX512VLDQ-NEXT: vzeroupper
504 ; AVX512VLDQ-NEXT: retq
505 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
506 %cvt = fptoui <4 x double> %ext to <4 x i32>
510 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
511 ; SSE-LABEL: fptoui_4f64_to_4i64:
513 ; SSE-NEXT: movapd %xmm0, %xmm2
514 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
515 ; SSE-NEXT: subsd %xmm3, %xmm0
516 ; SSE-NEXT: cvttsd2si %xmm0, %rax
517 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
518 ; SSE-NEXT: movq %rcx, %rdx
519 ; SSE-NEXT: sarq $63, %rdx
520 ; SSE-NEXT: andq %rax, %rdx
521 ; SSE-NEXT: orq %rcx, %rdx
522 ; SSE-NEXT: movq %rdx, %xmm0
523 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
524 ; SSE-NEXT: cvttsd2si %xmm2, %rax
525 ; SSE-NEXT: subsd %xmm3, %xmm2
526 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
527 ; SSE-NEXT: movq %rax, %rdx
528 ; SSE-NEXT: sarq $63, %rdx
529 ; SSE-NEXT: andq %rcx, %rdx
530 ; SSE-NEXT: orq %rax, %rdx
531 ; SSE-NEXT: movq %rdx, %xmm2
532 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
533 ; SSE-NEXT: movapd %xmm1, %xmm2
534 ; SSE-NEXT: subsd %xmm3, %xmm2
535 ; SSE-NEXT: cvttsd2si %xmm2, %rax
536 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
537 ; SSE-NEXT: movq %rcx, %rdx
538 ; SSE-NEXT: sarq $63, %rdx
539 ; SSE-NEXT: andq %rax, %rdx
540 ; SSE-NEXT: orq %rcx, %rdx
541 ; SSE-NEXT: movq %rdx, %xmm2
542 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
543 ; SSE-NEXT: cvttsd2si %xmm1, %rax
544 ; SSE-NEXT: subsd %xmm3, %xmm1
545 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
546 ; SSE-NEXT: movq %rax, %rdx
547 ; SSE-NEXT: sarq $63, %rdx
548 ; SSE-NEXT: andq %rcx, %rdx
549 ; SSE-NEXT: orq %rax, %rdx
550 ; SSE-NEXT: movq %rdx, %xmm1
551 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
552 ; SSE-NEXT: movdqa %xmm2, %xmm1
555 ; AVX1-LABEL: fptoui_4f64_to_4i64:
557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
558 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
559 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
560 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
561 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
562 ; AVX1-NEXT: movq %rcx, %rdx
563 ; AVX1-NEXT: sarq $63, %rdx
564 ; AVX1-NEXT: andq %rax, %rdx
565 ; AVX1-NEXT: orq %rcx, %rdx
566 ; AVX1-NEXT: vmovq %rdx, %xmm3
567 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
568 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
569 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
570 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
571 ; AVX1-NEXT: movq %rcx, %rdx
572 ; AVX1-NEXT: sarq $63, %rdx
573 ; AVX1-NEXT: andq %rax, %rdx
574 ; AVX1-NEXT: orq %rcx, %rdx
575 ; AVX1-NEXT: vmovq %rdx, %xmm2
576 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
577 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
578 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
579 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
580 ; AVX1-NEXT: movq %rcx, %rdx
581 ; AVX1-NEXT: sarq $63, %rdx
582 ; AVX1-NEXT: andq %rax, %rdx
583 ; AVX1-NEXT: orq %rcx, %rdx
584 ; AVX1-NEXT: vmovq %rdx, %xmm3
585 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
586 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm1
587 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
588 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
589 ; AVX1-NEXT: movq %rcx, %rdx
590 ; AVX1-NEXT: sarq $63, %rdx
591 ; AVX1-NEXT: andq %rax, %rdx
592 ; AVX1-NEXT: orq %rcx, %rdx
593 ; AVX1-NEXT: vmovq %rdx, %xmm0
594 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
595 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
598 ; AVX2-LABEL: fptoui_4f64_to_4i64:
600 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
601 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
602 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
603 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
604 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
605 ; AVX2-NEXT: movq %rcx, %rdx
606 ; AVX2-NEXT: sarq $63, %rdx
607 ; AVX2-NEXT: andq %rax, %rdx
608 ; AVX2-NEXT: orq %rcx, %rdx
609 ; AVX2-NEXT: vmovq %rdx, %xmm3
610 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
611 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
612 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
613 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
614 ; AVX2-NEXT: movq %rcx, %rdx
615 ; AVX2-NEXT: sarq $63, %rdx
616 ; AVX2-NEXT: andq %rax, %rdx
617 ; AVX2-NEXT: orq %rcx, %rdx
618 ; AVX2-NEXT: vmovq %rdx, %xmm2
619 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
620 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
621 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
622 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
623 ; AVX2-NEXT: movq %rcx, %rdx
624 ; AVX2-NEXT: sarq $63, %rdx
625 ; AVX2-NEXT: andq %rax, %rdx
626 ; AVX2-NEXT: orq %rcx, %rdx
627 ; AVX2-NEXT: vmovq %rdx, %xmm3
628 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
629 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm1
630 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
631 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
632 ; AVX2-NEXT: movq %rcx, %rdx
633 ; AVX2-NEXT: sarq $63, %rdx
634 ; AVX2-NEXT: andq %rax, %rdx
635 ; AVX2-NEXT: orq %rcx, %rdx
636 ; AVX2-NEXT: vmovq %rdx, %xmm0
637 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
641 ; AVX512F-LABEL: fptoui_4f64_to_4i64:
643 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
644 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
645 ; AVX512F-NEXT: vmovq %rax, %xmm2
646 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
647 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
648 ; AVX512F-NEXT: vmovq %rax, %xmm1
649 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
650 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
651 ; AVX512F-NEXT: vmovq %rax, %xmm2
652 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
653 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
654 ; AVX512F-NEXT: vmovq %rax, %xmm0
655 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
656 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
659 ; AVX512VL-LABEL: fptoui_4f64_to_4i64:
661 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
662 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
663 ; AVX512VL-NEXT: vmovq %rax, %xmm2
664 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
665 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
666 ; AVX512VL-NEXT: vmovq %rax, %xmm1
667 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
668 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
669 ; AVX512VL-NEXT: vmovq %rax, %xmm2
670 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
671 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
672 ; AVX512VL-NEXT: vmovq %rax, %xmm0
673 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
674 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
675 ; AVX512VL-NEXT: retq
677 ; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
679 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
680 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
681 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
682 ; AVX512DQ-NEXT: retq
684 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
685 ; AVX512VLDQ: # %bb.0:
686 ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
687 ; AVX512VLDQ-NEXT: retq
688 %cvt = fptoui <4 x double> %a to <4 x i64>
692 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
693 ; SSE-LABEL: fptoui_4f64_to_4i32:
695 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
696 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm3
697 ; SSE-NEXT: subpd %xmm2, %xmm1
698 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
699 ; SSE-NEXT: movapd %xmm3, %xmm4
700 ; SSE-NEXT: psrad $31, %xmm4
701 ; SSE-NEXT: pand %xmm1, %xmm4
702 ; SSE-NEXT: por %xmm3, %xmm4
703 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
704 ; SSE-NEXT: subpd %xmm2, %xmm0
705 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm2
706 ; SSE-NEXT: movapd %xmm1, %xmm0
707 ; SSE-NEXT: psrad $31, %xmm0
708 ; SSE-NEXT: pand %xmm2, %xmm0
709 ; SSE-NEXT: por %xmm1, %xmm0
710 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
713 ; AVX1-LABEL: fptoui_4f64_to_4i32:
715 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
716 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
717 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
718 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
719 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
720 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
721 ; AVX1-NEXT: vzeroupper
724 ; AVX2-LABEL: fptoui_4f64_to_4i32:
726 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
727 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
728 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
729 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
730 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
731 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
732 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
733 ; AVX2-NEXT: vzeroupper
736 ; AVX512F-LABEL: fptoui_4f64_to_4i32:
738 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
739 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
740 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
741 ; AVX512F-NEXT: vzeroupper
744 ; AVX512VL-LABEL: fptoui_4f64_to_4i32:
746 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
747 ; AVX512VL-NEXT: vzeroupper
748 ; AVX512VL-NEXT: retq
750 ; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
752 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
753 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
754 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
755 ; AVX512DQ-NEXT: vzeroupper
756 ; AVX512DQ-NEXT: retq
758 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
759 ; AVX512VLDQ: # %bb.0:
760 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
761 ; AVX512VLDQ-NEXT: vzeroupper
762 ; AVX512VLDQ-NEXT: retq
763 %cvt = fptoui <4 x double> %a to <4 x i32>
768 ; Float to Signed Integer
771 define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
772 ; SSE-LABEL: fptosi_2f32_to_2i32:
774 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
777 ; AVX-LABEL: fptosi_2f32_to_2i32:
779 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
781 %cvt = fptosi <2 x float> %a to <2 x i32>
785 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
786 ; SSE-LABEL: fptosi_4f32_to_4i32:
788 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
791 ; AVX-LABEL: fptosi_4f32_to_4i32:
793 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
795 %cvt = fptosi <4 x float> %a to <4 x i32>
799 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
800 ; SSE-LABEL: fptosi_2f32_to_2i64:
802 ; SSE-NEXT: cvttss2si %xmm0, %rax
803 ; SSE-NEXT: movq %rax, %xmm1
804 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
805 ; SSE-NEXT: cvttss2si %xmm0, %rax
806 ; SSE-NEXT: movq %rax, %xmm0
807 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
808 ; SSE-NEXT: movdqa %xmm1, %xmm0
811 ; VEX-LABEL: fptosi_2f32_to_2i64:
813 ; VEX-NEXT: vcvttss2si %xmm0, %rax
814 ; VEX-NEXT: vmovq %rax, %xmm1
815 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
816 ; VEX-NEXT: vcvttss2si %xmm0, %rax
817 ; VEX-NEXT: vmovq %rax, %xmm0
818 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
821 ; AVX512F-LABEL: fptosi_2f32_to_2i64:
823 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
824 ; AVX512F-NEXT: vmovq %rax, %xmm1
825 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
826 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
827 ; AVX512F-NEXT: vmovq %rax, %xmm0
828 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
831 ; AVX512VL-LABEL: fptosi_2f32_to_2i64:
833 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
834 ; AVX512VL-NEXT: vmovq %rax, %xmm1
835 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
836 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
837 ; AVX512VL-NEXT: vmovq %rax, %xmm0
838 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
839 ; AVX512VL-NEXT: retq
841 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
843 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
844 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
845 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
846 ; AVX512DQ-NEXT: vzeroupper
847 ; AVX512DQ-NEXT: retq
849 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
850 ; AVX512VLDQ: # %bb.0:
851 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
852 ; AVX512VLDQ-NEXT: retq
853 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
854 %cvt = fptosi <2 x float> %shuf to <2 x i64>
858 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
859 ; SSE-LABEL: fptosi_4f32_to_2i64:
861 ; SSE-NEXT: cvttss2si %xmm0, %rax
862 ; SSE-NEXT: movq %rax, %xmm1
863 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
864 ; SSE-NEXT: cvttss2si %xmm0, %rax
865 ; SSE-NEXT: movq %rax, %xmm0
866 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
867 ; SSE-NEXT: movdqa %xmm1, %xmm0
870 ; VEX-LABEL: fptosi_4f32_to_2i64:
872 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
873 ; VEX-NEXT: vcvttss2si %xmm1, %rax
874 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
875 ; VEX-NEXT: vmovq %rcx, %xmm0
876 ; VEX-NEXT: vmovq %rax, %xmm1
877 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
880 ; AVX512F-LABEL: fptosi_4f32_to_2i64:
882 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
883 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
884 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
885 ; AVX512F-NEXT: vmovq %rcx, %xmm0
886 ; AVX512F-NEXT: vmovq %rax, %xmm1
887 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
890 ; AVX512VL-LABEL: fptosi_4f32_to_2i64:
892 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
893 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
894 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
895 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
896 ; AVX512VL-NEXT: vmovq %rax, %xmm1
897 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
898 ; AVX512VL-NEXT: retq
900 ; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
902 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
903 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
904 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
905 ; AVX512DQ-NEXT: vzeroupper
906 ; AVX512DQ-NEXT: retq
908 ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
909 ; AVX512VLDQ: # %bb.0:
910 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
911 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
912 ; AVX512VLDQ-NEXT: vzeroupper
913 ; AVX512VLDQ-NEXT: retq
914 %cvt = fptosi <4 x float> %a to <4 x i64>
915 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
919 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
920 ; SSE-LABEL: fptosi_8f32_to_8i32:
922 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
923 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
926 ; AVX-LABEL: fptosi_8f32_to_8i32:
928 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
930 %cvt = fptosi <8 x float> %a to <8 x i32>
934 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
935 ; SSE-LABEL: fptosi_4f32_to_4i64:
937 ; SSE-NEXT: cvttss2si %xmm0, %rax
938 ; SSE-NEXT: movq %rax, %xmm2
939 ; SSE-NEXT: movaps %xmm0, %xmm1
940 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
941 ; SSE-NEXT: cvttss2si %xmm1, %rax
942 ; SSE-NEXT: movq %rax, %xmm1
943 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
944 ; SSE-NEXT: movaps %xmm0, %xmm1
945 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
946 ; SSE-NEXT: cvttss2si %xmm1, %rax
947 ; SSE-NEXT: movq %rax, %xmm3
948 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
949 ; SSE-NEXT: cvttss2si %xmm0, %rax
950 ; SSE-NEXT: movq %rax, %xmm1
951 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
952 ; SSE-NEXT: movdqa %xmm2, %xmm0
955 ; AVX1-LABEL: fptosi_4f32_to_4i64:
957 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
958 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
959 ; AVX1-NEXT: vmovq %rax, %xmm1
960 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
961 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
962 ; AVX1-NEXT: vmovq %rax, %xmm2
963 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
964 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
965 ; AVX1-NEXT: vmovq %rax, %xmm2
966 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
967 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
968 ; AVX1-NEXT: vmovq %rax, %xmm0
969 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
970 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
973 ; AVX2-LABEL: fptosi_4f32_to_4i64:
975 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
976 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
977 ; AVX2-NEXT: vmovq %rax, %xmm1
978 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
979 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
980 ; AVX2-NEXT: vmovq %rax, %xmm2
981 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
982 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
983 ; AVX2-NEXT: vmovq %rax, %xmm2
984 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
985 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
986 ; AVX2-NEXT: vmovq %rax, %xmm0
987 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
988 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
991 ; AVX512F-LABEL: fptosi_4f32_to_4i64:
993 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
994 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
995 ; AVX512F-NEXT: vmovq %rax, %xmm1
996 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
997 ; AVX512F-NEXT: vcvttss2si %xmm2, %rax
998 ; AVX512F-NEXT: vmovq %rax, %xmm2
999 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1000 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1001 ; AVX512F-NEXT: vmovq %rax, %xmm2
1002 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1003 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1004 ; AVX512F-NEXT: vmovq %rax, %xmm0
1005 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1006 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1007 ; AVX512F-NEXT: retq
1009 ; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1010 ; AVX512VL: # %bb.0:
1011 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1012 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1013 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1014 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1015 ; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
1016 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1017 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1018 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1019 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1020 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1021 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1022 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1023 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1024 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1025 ; AVX512VL-NEXT: retq
1027 ; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1028 ; AVX512DQ: # %bb.0:
1029 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1030 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1031 ; AVX512DQ-NEXT: retq
1033 ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1034 ; AVX512VLDQ: # %bb.0:
1035 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
1036 ; AVX512VLDQ-NEXT: retq
1037 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1038 %cvt = fptosi <4 x float> %shuf to <4 x i64>
1042 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1043 ; SSE-LABEL: fptosi_8f32_to_4i64:
1045 ; SSE-NEXT: cvttss2si %xmm0, %rax
1046 ; SSE-NEXT: movq %rax, %xmm2
1047 ; SSE-NEXT: movaps %xmm0, %xmm1
1048 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
1049 ; SSE-NEXT: cvttss2si %xmm1, %rax
1050 ; SSE-NEXT: movq %rax, %xmm1
1051 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1052 ; SSE-NEXT: movaps %xmm0, %xmm1
1053 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
1054 ; SSE-NEXT: cvttss2si %xmm1, %rax
1055 ; SSE-NEXT: movq %rax, %xmm3
1056 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1057 ; SSE-NEXT: cvttss2si %xmm0, %rax
1058 ; SSE-NEXT: movq %rax, %xmm1
1059 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1060 ; SSE-NEXT: movdqa %xmm2, %xmm0
1063 ; AVX1-LABEL: fptosi_8f32_to_4i64:
1065 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1066 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1067 ; AVX1-NEXT: vmovq %rax, %xmm1
1068 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1069 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1070 ; AVX1-NEXT: vmovq %rax, %xmm2
1071 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1072 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1073 ; AVX1-NEXT: vmovq %rax, %xmm2
1074 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1075 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1076 ; AVX1-NEXT: vmovq %rax, %xmm0
1077 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1078 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1081 ; AVX2-LABEL: fptosi_8f32_to_4i64:
1083 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1084 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1085 ; AVX2-NEXT: vmovq %rax, %xmm1
1086 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1087 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1088 ; AVX2-NEXT: vmovq %rax, %xmm2
1089 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1090 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1091 ; AVX2-NEXT: vmovq %rax, %xmm2
1092 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1093 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1094 ; AVX2-NEXT: vmovq %rax, %xmm0
1095 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1096 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1099 ; AVX512F-LABEL: fptosi_8f32_to_4i64:
1101 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1102 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1103 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
1104 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1105 ; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
1106 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1107 ; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
1108 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1109 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1110 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1111 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1112 ; AVX512F-NEXT: vmovq %rax, %xmm2
1113 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1114 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1115 ; AVX512F-NEXT: retq
1117 ; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1118 ; AVX512VL: # %bb.0:
1119 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1120 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1121 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
1122 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1123 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
1124 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1125 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
1126 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1127 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1128 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1129 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1130 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1131 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1132 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1133 ; AVX512VL-NEXT: retq
1135 ; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1136 ; AVX512DQ: # %bb.0:
1137 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1138 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1139 ; AVX512DQ-NEXT: retq
1141 ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1142 ; AVX512VLDQ: # %bb.0:
1143 ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
1144 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1145 ; AVX512VLDQ-NEXT: retq
1146 %cvt = fptosi <8 x float> %a to <8 x i64>
1147 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 ; Float to Unsigned Integer
1155 define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1156 ; SSE-LABEL: fptoui_2f32_to_2i32:
1158 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1159 ; SSE-NEXT: movdqa %xmm1, %xmm2
1160 ; SSE-NEXT: psrad $31, %xmm2
1161 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1162 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1163 ; SSE-NEXT: pand %xmm2, %xmm0
1164 ; SSE-NEXT: por %xmm1, %xmm0
1167 ; AVX1-LABEL: fptoui_2f32_to_2i32:
1169 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1170 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1171 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1172 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1173 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1174 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1177 ; AVX2-LABEL: fptoui_2f32_to_2i32:
1179 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1180 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1181 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1182 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1183 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1184 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1185 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1188 ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1190 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1191 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1192 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1193 ; AVX512F-NEXT: vzeroupper
1194 ; AVX512F-NEXT: retq
1196 ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1197 ; AVX512VL: # %bb.0:
1198 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1199 ; AVX512VL-NEXT: retq
1201 ; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1202 ; AVX512DQ: # %bb.0:
1203 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1204 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1205 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1206 ; AVX512DQ-NEXT: vzeroupper
1207 ; AVX512DQ-NEXT: retq
1209 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1210 ; AVX512VLDQ: # %bb.0:
1211 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1212 ; AVX512VLDQ-NEXT: retq
1213 %cvt = fptoui <2 x float> %a to <2 x i32>
1217 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1218 ; SSE-LABEL: fptoui_4f32_to_4i32:
1220 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1221 ; SSE-NEXT: movdqa %xmm1, %xmm2
1222 ; SSE-NEXT: psrad $31, %xmm2
1223 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1224 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1225 ; SSE-NEXT: pand %xmm2, %xmm0
1226 ; SSE-NEXT: por %xmm1, %xmm0
1229 ; AVX1-LABEL: fptoui_4f32_to_4i32:
1231 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1232 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1233 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1234 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1235 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1236 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1239 ; AVX2-LABEL: fptoui_4f32_to_4i32:
1241 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1242 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1243 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1244 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1245 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1246 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1247 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1250 ; AVX512F-LABEL: fptoui_4f32_to_4i32:
1252 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1253 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1254 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1255 ; AVX512F-NEXT: vzeroupper
1256 ; AVX512F-NEXT: retq
1258 ; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1259 ; AVX512VL: # %bb.0:
1260 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1261 ; AVX512VL-NEXT: retq
1263 ; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1264 ; AVX512DQ: # %bb.0:
1265 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1266 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1267 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1268 ; AVX512DQ-NEXT: vzeroupper
1269 ; AVX512DQ-NEXT: retq
1271 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1272 ; AVX512VLDQ: # %bb.0:
1273 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1274 ; AVX512VLDQ-NEXT: retq
1275 %cvt = fptoui <4 x float> %a to <4 x i32>
1279 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1280 ; SSE-LABEL: fptoui_2f32_to_2i64:
1282 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1283 ; SSE-NEXT: movaps %xmm0, %xmm1
1284 ; SSE-NEXT: subss %xmm2, %xmm1
1285 ; SSE-NEXT: cvttss2si %xmm1, %rax
1286 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1287 ; SSE-NEXT: movq %rcx, %rdx
1288 ; SSE-NEXT: sarq $63, %rdx
1289 ; SSE-NEXT: andq %rax, %rdx
1290 ; SSE-NEXT: orq %rcx, %rdx
1291 ; SSE-NEXT: movq %rdx, %xmm1
1292 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1293 ; SSE-NEXT: cvttss2si %xmm0, %rax
1294 ; SSE-NEXT: subss %xmm2, %xmm0
1295 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1296 ; SSE-NEXT: movq %rax, %rdx
1297 ; SSE-NEXT: sarq $63, %rdx
1298 ; SSE-NEXT: andq %rcx, %rdx
1299 ; SSE-NEXT: orq %rax, %rdx
1300 ; SSE-NEXT: movq %rdx, %xmm0
1301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1302 ; SSE-NEXT: movdqa %xmm1, %xmm0
1305 ; VEX-LABEL: fptoui_2f32_to_2i64:
1307 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1308 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
1309 ; VEX-NEXT: vcvttss2si %xmm2, %rax
1310 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1311 ; VEX-NEXT: movq %rcx, %rdx
1312 ; VEX-NEXT: sarq $63, %rdx
1313 ; VEX-NEXT: andq %rax, %rdx
1314 ; VEX-NEXT: orq %rcx, %rdx
1315 ; VEX-NEXT: vmovq %rdx, %xmm2
1316 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1317 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
1318 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1319 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1320 ; VEX-NEXT: movq %rcx, %rdx
1321 ; VEX-NEXT: sarq $63, %rdx
1322 ; VEX-NEXT: andq %rax, %rdx
1323 ; VEX-NEXT: orq %rcx, %rdx
1324 ; VEX-NEXT: vmovq %rdx, %xmm0
1325 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1328 ; AVX512F-LABEL: fptoui_2f32_to_2i64:
1330 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1331 ; AVX512F-NEXT: vmovq %rax, %xmm1
1332 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1333 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1334 ; AVX512F-NEXT: vmovq %rax, %xmm0
1335 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1336 ; AVX512F-NEXT: retq
1338 ; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1339 ; AVX512VL: # %bb.0:
1340 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1341 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1342 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1343 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1344 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1345 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1346 ; AVX512VL-NEXT: retq
1348 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1349 ; AVX512DQ: # %bb.0:
1350 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1351 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1352 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1353 ; AVX512DQ-NEXT: vzeroupper
1354 ; AVX512DQ-NEXT: retq
1356 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1357 ; AVX512VLDQ: # %bb.0:
1358 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1359 ; AVX512VLDQ-NEXT: retq
1360 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1361 %cvt = fptoui <2 x float> %shuf to <2 x i64>
1365 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1366 ; SSE-LABEL: fptoui_4f32_to_2i64:
1368 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1369 ; SSE-NEXT: movaps %xmm0, %xmm1
1370 ; SSE-NEXT: subss %xmm2, %xmm1
1371 ; SSE-NEXT: cvttss2si %xmm1, %rax
1372 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1373 ; SSE-NEXT: movq %rcx, %rdx
1374 ; SSE-NEXT: sarq $63, %rdx
1375 ; SSE-NEXT: andq %rax, %rdx
1376 ; SSE-NEXT: orq %rcx, %rdx
1377 ; SSE-NEXT: movq %rdx, %xmm1
1378 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1379 ; SSE-NEXT: cvttss2si %xmm0, %rax
1380 ; SSE-NEXT: subss %xmm2, %xmm0
1381 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1382 ; SSE-NEXT: movq %rax, %rdx
1383 ; SSE-NEXT: sarq $63, %rdx
1384 ; SSE-NEXT: andq %rcx, %rdx
1385 ; SSE-NEXT: orq %rax, %rdx
1386 ; SSE-NEXT: movq %rdx, %xmm0
1387 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1388 ; SSE-NEXT: movdqa %xmm1, %xmm0
1391 ; VEX-LABEL: fptoui_4f32_to_2i64:
1393 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1394 ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1395 ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
1396 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1397 ; VEX-NEXT: vcvttss2si %xmm1, %rcx
1398 ; VEX-NEXT: movq %rcx, %rdx
1399 ; VEX-NEXT: sarq $63, %rdx
1400 ; VEX-NEXT: andq %rax, %rdx
1401 ; VEX-NEXT: orq %rcx, %rdx
1402 ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
1403 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1404 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1405 ; VEX-NEXT: movq %rcx, %rsi
1406 ; VEX-NEXT: sarq $63, %rsi
1407 ; VEX-NEXT: andq %rax, %rsi
1408 ; VEX-NEXT: orq %rcx, %rsi
1409 ; VEX-NEXT: vmovq %rsi, %xmm0
1410 ; VEX-NEXT: vmovq %rdx, %xmm1
1411 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1414 ; AVX512F-LABEL: fptoui_4f32_to_2i64:
1416 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1417 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1418 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1419 ; AVX512F-NEXT: vmovq %rcx, %xmm0
1420 ; AVX512F-NEXT: vmovq %rax, %xmm1
1421 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1422 ; AVX512F-NEXT: retq
1424 ; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1425 ; AVX512VL: # %bb.0:
1426 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1427 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1428 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1429 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
1430 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1431 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1432 ; AVX512VL-NEXT: retq
1434 ; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1435 ; AVX512DQ: # %bb.0:
1436 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1437 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1438 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1439 ; AVX512DQ-NEXT: vzeroupper
1440 ; AVX512DQ-NEXT: retq
1442 ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1443 ; AVX512VLDQ: # %bb.0:
1444 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1445 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1446 ; AVX512VLDQ-NEXT: vzeroupper
1447 ; AVX512VLDQ-NEXT: retq
1448 %cvt = fptoui <4 x float> %a to <4 x i64>
1449 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1453 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1454 ; SSE-LABEL: fptoui_8f32_to_8i32:
1456 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1457 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1458 ; SSE-NEXT: subps %xmm2, %xmm0
1459 ; SSE-NEXT: cvttps2dq %xmm0, %xmm4
1460 ; SSE-NEXT: movdqa %xmm3, %xmm0
1461 ; SSE-NEXT: psrad $31, %xmm0
1462 ; SSE-NEXT: pand %xmm4, %xmm0
1463 ; SSE-NEXT: por %xmm3, %xmm0
1464 ; SSE-NEXT: cvttps2dq %xmm1, %xmm3
1465 ; SSE-NEXT: subps %xmm2, %xmm1
1466 ; SSE-NEXT: cvttps2dq %xmm1, %xmm2
1467 ; SSE-NEXT: movdqa %xmm3, %xmm1
1468 ; SSE-NEXT: psrad $31, %xmm1
1469 ; SSE-NEXT: pand %xmm2, %xmm1
1470 ; SSE-NEXT: por %xmm3, %xmm1
1473 ; AVX1-LABEL: fptoui_8f32_to_8i32:
1475 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm1
1476 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1477 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
1478 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
1479 ; AVX1-NEXT: vblendvps %ymm1, %ymm0, %ymm1, %ymm0
1482 ; AVX2-LABEL: fptoui_8f32_to_8i32:
1484 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1485 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1
1486 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
1487 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
1488 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm2
1489 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1490 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1493 ; AVX512F-LABEL: fptoui_8f32_to_8i32:
1495 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1496 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1497 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1498 ; AVX512F-NEXT: retq
1500 ; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1501 ; AVX512VL: # %bb.0:
1502 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1503 ; AVX512VL-NEXT: retq
1505 ; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1506 ; AVX512DQ: # %bb.0:
1507 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1508 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1509 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1510 ; AVX512DQ-NEXT: retq
1512 ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1513 ; AVX512VLDQ: # %bb.0:
1514 ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
1515 ; AVX512VLDQ-NEXT: retq
1516 %cvt = fptoui <8 x float> %a to <8 x i32>
1520 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1521 ; SSE-LABEL: fptoui_4f32_to_4i64:
1523 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1524 ; SSE-NEXT: movaps %xmm0, %xmm2
1525 ; SSE-NEXT: subss %xmm1, %xmm2
1526 ; SSE-NEXT: cvttss2si %xmm2, %rax
1527 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1528 ; SSE-NEXT: movq %rcx, %rdx
1529 ; SSE-NEXT: sarq $63, %rdx
1530 ; SSE-NEXT: andq %rax, %rdx
1531 ; SSE-NEXT: orq %rcx, %rdx
1532 ; SSE-NEXT: movq %rdx, %xmm2
1533 ; SSE-NEXT: movaps %xmm0, %xmm3
1534 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1535 ; SSE-NEXT: cvttss2si %xmm3, %rax
1536 ; SSE-NEXT: subss %xmm1, %xmm3
1537 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1538 ; SSE-NEXT: movq %rax, %rdx
1539 ; SSE-NEXT: sarq $63, %rdx
1540 ; SSE-NEXT: andq %rcx, %rdx
1541 ; SSE-NEXT: orq %rax, %rdx
1542 ; SSE-NEXT: movq %rdx, %xmm3
1543 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1544 ; SSE-NEXT: movaps %xmm0, %xmm3
1545 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1546 ; SSE-NEXT: cvttss2si %xmm3, %rax
1547 ; SSE-NEXT: subss %xmm1, %xmm3
1548 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1549 ; SSE-NEXT: movq %rax, %rdx
1550 ; SSE-NEXT: sarq $63, %rdx
1551 ; SSE-NEXT: andq %rcx, %rdx
1552 ; SSE-NEXT: orq %rax, %rdx
1553 ; SSE-NEXT: movq %rdx, %xmm3
1554 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1555 ; SSE-NEXT: cvttss2si %xmm0, %rax
1556 ; SSE-NEXT: subss %xmm1, %xmm0
1557 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1558 ; SSE-NEXT: movq %rax, %rdx
1559 ; SSE-NEXT: sarq $63, %rdx
1560 ; SSE-NEXT: andq %rcx, %rdx
1561 ; SSE-NEXT: orq %rax, %rdx
1562 ; SSE-NEXT: movq %rdx, %xmm1
1563 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1564 ; SSE-NEXT: movdqa %xmm2, %xmm0
1567 ; AVX1-LABEL: fptoui_4f32_to_4i64:
1569 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1570 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1571 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1572 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1573 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1574 ; AVX1-NEXT: movq %rcx, %rdx
1575 ; AVX1-NEXT: sarq $63, %rdx
1576 ; AVX1-NEXT: andq %rax, %rdx
1577 ; AVX1-NEXT: orq %rcx, %rdx
1578 ; AVX1-NEXT: vmovq %rdx, %xmm2
1579 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1580 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1581 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1582 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1583 ; AVX1-NEXT: movq %rcx, %rdx
1584 ; AVX1-NEXT: sarq $63, %rdx
1585 ; AVX1-NEXT: andq %rax, %rdx
1586 ; AVX1-NEXT: orq %rcx, %rdx
1587 ; AVX1-NEXT: vmovq %rdx, %xmm3
1588 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1589 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1590 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1591 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1592 ; AVX1-NEXT: movq %rcx, %rdx
1593 ; AVX1-NEXT: sarq $63, %rdx
1594 ; AVX1-NEXT: andq %rax, %rdx
1595 ; AVX1-NEXT: orq %rcx, %rdx
1596 ; AVX1-NEXT: vmovq %rdx, %xmm3
1597 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1598 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1599 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1600 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1601 ; AVX1-NEXT: movq %rcx, %rdx
1602 ; AVX1-NEXT: sarq $63, %rdx
1603 ; AVX1-NEXT: andq %rax, %rdx
1604 ; AVX1-NEXT: orq %rcx, %rdx
1605 ; AVX1-NEXT: vmovq %rdx, %xmm0
1606 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1607 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1610 ; AVX2-LABEL: fptoui_4f32_to_4i64:
1612 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1613 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1614 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1615 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1616 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1617 ; AVX2-NEXT: movq %rcx, %rdx
1618 ; AVX2-NEXT: sarq $63, %rdx
1619 ; AVX2-NEXT: andq %rax, %rdx
1620 ; AVX2-NEXT: orq %rcx, %rdx
1621 ; AVX2-NEXT: vmovq %rdx, %xmm2
1622 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1623 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1624 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1625 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1626 ; AVX2-NEXT: movq %rcx, %rdx
1627 ; AVX2-NEXT: sarq $63, %rdx
1628 ; AVX2-NEXT: andq %rax, %rdx
1629 ; AVX2-NEXT: orq %rcx, %rdx
1630 ; AVX2-NEXT: vmovq %rdx, %xmm3
1631 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1632 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1633 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1634 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1635 ; AVX2-NEXT: movq %rcx, %rdx
1636 ; AVX2-NEXT: sarq $63, %rdx
1637 ; AVX2-NEXT: andq %rax, %rdx
1638 ; AVX2-NEXT: orq %rcx, %rdx
1639 ; AVX2-NEXT: vmovq %rdx, %xmm3
1640 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1641 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1642 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1643 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1644 ; AVX2-NEXT: movq %rcx, %rdx
1645 ; AVX2-NEXT: sarq $63, %rdx
1646 ; AVX2-NEXT: andq %rax, %rdx
1647 ; AVX2-NEXT: orq %rcx, %rdx
1648 ; AVX2-NEXT: vmovq %rdx, %xmm0
1649 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1650 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1653 ; AVX512F-LABEL: fptoui_4f32_to_4i64:
1655 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1656 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1657 ; AVX512F-NEXT: vmovq %rax, %xmm1
1658 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1659 ; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
1660 ; AVX512F-NEXT: vmovq %rax, %xmm2
1661 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1662 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1663 ; AVX512F-NEXT: vmovq %rax, %xmm2
1664 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1665 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1666 ; AVX512F-NEXT: vmovq %rax, %xmm0
1667 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1668 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1669 ; AVX512F-NEXT: retq
1671 ; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1672 ; AVX512VL: # %bb.0:
1673 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1674 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1675 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1676 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1677 ; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
1678 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1679 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1680 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1681 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1682 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1683 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1684 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1685 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1686 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1687 ; AVX512VL-NEXT: retq
1689 ; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1690 ; AVX512DQ: # %bb.0:
1691 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1692 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1693 ; AVX512DQ-NEXT: retq
1695 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1696 ; AVX512VLDQ: # %bb.0:
1697 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1698 ; AVX512VLDQ-NEXT: retq
1699 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1700 %cvt = fptoui <4 x float> %shuf to <4 x i64>
1704 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1705 ; SSE-LABEL: fptoui_8f32_to_4i64:
1707 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1708 ; SSE-NEXT: movaps %xmm0, %xmm2
1709 ; SSE-NEXT: subss %xmm1, %xmm2
1710 ; SSE-NEXT: cvttss2si %xmm2, %rax
1711 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1712 ; SSE-NEXT: movq %rcx, %rdx
1713 ; SSE-NEXT: sarq $63, %rdx
1714 ; SSE-NEXT: andq %rax, %rdx
1715 ; SSE-NEXT: orq %rcx, %rdx
1716 ; SSE-NEXT: movq %rdx, %xmm2
1717 ; SSE-NEXT: movaps %xmm0, %xmm3
1718 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1719 ; SSE-NEXT: cvttss2si %xmm3, %rax
1720 ; SSE-NEXT: subss %xmm1, %xmm3
1721 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1722 ; SSE-NEXT: movq %rax, %rdx
1723 ; SSE-NEXT: sarq $63, %rdx
1724 ; SSE-NEXT: andq %rcx, %rdx
1725 ; SSE-NEXT: orq %rax, %rdx
1726 ; SSE-NEXT: movq %rdx, %xmm3
1727 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1728 ; SSE-NEXT: movaps %xmm0, %xmm3
1729 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1730 ; SSE-NEXT: cvttss2si %xmm3, %rax
1731 ; SSE-NEXT: subss %xmm1, %xmm3
1732 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1733 ; SSE-NEXT: movq %rax, %rdx
1734 ; SSE-NEXT: sarq $63, %rdx
1735 ; SSE-NEXT: andq %rcx, %rdx
1736 ; SSE-NEXT: orq %rax, %rdx
1737 ; SSE-NEXT: movq %rdx, %xmm3
1738 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1739 ; SSE-NEXT: cvttss2si %xmm0, %rax
1740 ; SSE-NEXT: subss %xmm1, %xmm0
1741 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1742 ; SSE-NEXT: movq %rax, %rdx
1743 ; SSE-NEXT: sarq $63, %rdx
1744 ; SSE-NEXT: andq %rcx, %rdx
1745 ; SSE-NEXT: orq %rax, %rdx
1746 ; SSE-NEXT: movq %rdx, %xmm1
1747 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1748 ; SSE-NEXT: movdqa %xmm2, %xmm0
1751 ; AVX1-LABEL: fptoui_8f32_to_4i64:
1753 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1754 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1755 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1756 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1757 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1758 ; AVX1-NEXT: movq %rcx, %rdx
1759 ; AVX1-NEXT: sarq $63, %rdx
1760 ; AVX1-NEXT: andq %rax, %rdx
1761 ; AVX1-NEXT: orq %rcx, %rdx
1762 ; AVX1-NEXT: vmovq %rdx, %xmm2
1763 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1764 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1765 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1766 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1767 ; AVX1-NEXT: movq %rcx, %rdx
1768 ; AVX1-NEXT: sarq $63, %rdx
1769 ; AVX1-NEXT: andq %rax, %rdx
1770 ; AVX1-NEXT: orq %rcx, %rdx
1771 ; AVX1-NEXT: vmovq %rdx, %xmm3
1772 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1773 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1774 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1775 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1776 ; AVX1-NEXT: movq %rcx, %rdx
1777 ; AVX1-NEXT: sarq $63, %rdx
1778 ; AVX1-NEXT: andq %rax, %rdx
1779 ; AVX1-NEXT: orq %rcx, %rdx
1780 ; AVX1-NEXT: vmovq %rdx, %xmm3
1781 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1782 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1783 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1784 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1785 ; AVX1-NEXT: movq %rcx, %rdx
1786 ; AVX1-NEXT: sarq $63, %rdx
1787 ; AVX1-NEXT: andq %rax, %rdx
1788 ; AVX1-NEXT: orq %rcx, %rdx
1789 ; AVX1-NEXT: vmovq %rdx, %xmm0
1790 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1794 ; AVX2-LABEL: fptoui_8f32_to_4i64:
1796 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1797 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1798 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1799 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1800 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1801 ; AVX2-NEXT: movq %rcx, %rdx
1802 ; AVX2-NEXT: sarq $63, %rdx
1803 ; AVX2-NEXT: andq %rax, %rdx
1804 ; AVX2-NEXT: orq %rcx, %rdx
1805 ; AVX2-NEXT: vmovq %rdx, %xmm2
1806 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1807 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1808 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1809 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1810 ; AVX2-NEXT: movq %rcx, %rdx
1811 ; AVX2-NEXT: sarq $63, %rdx
1812 ; AVX2-NEXT: andq %rax, %rdx
1813 ; AVX2-NEXT: orq %rcx, %rdx
1814 ; AVX2-NEXT: vmovq %rdx, %xmm3
1815 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1816 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1817 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1818 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1819 ; AVX2-NEXT: movq %rcx, %rdx
1820 ; AVX2-NEXT: sarq $63, %rdx
1821 ; AVX2-NEXT: andq %rax, %rdx
1822 ; AVX2-NEXT: orq %rcx, %rdx
1823 ; AVX2-NEXT: vmovq %rdx, %xmm3
1824 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1825 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1826 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1827 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1828 ; AVX2-NEXT: movq %rcx, %rdx
1829 ; AVX2-NEXT: sarq $63, %rdx
1830 ; AVX2-NEXT: andq %rax, %rdx
1831 ; AVX2-NEXT: orq %rcx, %rdx
1832 ; AVX2-NEXT: vmovq %rdx, %xmm0
1833 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1834 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1837 ; AVX512F-LABEL: fptoui_8f32_to_4i64:
1839 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1840 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1841 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1842 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1843 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
1844 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1845 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
1846 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1847 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1848 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1849 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1850 ; AVX512F-NEXT: vmovq %rax, %xmm2
1851 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1852 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1853 ; AVX512F-NEXT: retq
1855 ; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1856 ; AVX512VL: # %bb.0:
1857 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1858 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1859 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1860 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1861 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
1862 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1863 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
1864 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1865 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1866 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1867 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1868 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1869 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1870 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1871 ; AVX512VL-NEXT: retq
1873 ; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1874 ; AVX512DQ: # %bb.0:
1875 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1876 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1877 ; AVX512DQ-NEXT: retq
1879 ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1880 ; AVX512VLDQ: # %bb.0:
1881 ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1882 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1883 ; AVX512VLDQ-NEXT: retq
1884 %cvt = fptoui <8 x float> %a to <8 x i64>
1885 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1893 define <2 x i64> @fptosi_2f64_to_2i64_const() {
1894 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
1896 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1899 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
1901 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1903 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1907 define <4 x i32> @fptosi_2f64_to_2i32_const() {
1908 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
1910 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1913 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
1915 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1917 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1918 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1922 define <4 x i64> @fptosi_4f64_to_4i64_const() {
1923 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
1925 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1926 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1929 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
1931 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1933 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1937 define <4 x i32> @fptosi_4f64_to_4i32_const() {
1938 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
1940 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1943 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
1945 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1947 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
1951 define <2 x i64> @fptoui_2f64_to_2i64_const() {
1952 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
1954 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1957 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
1959 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
1961 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
1965 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
1966 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
1968 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
1971 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
1973 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
1975 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
1976 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1980 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
1981 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
1983 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1984 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
1987 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
1989 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
1991 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
1995 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
1996 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
1998 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
2001 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
2003 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2005 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2009 define <4 x i32> @fptosi_4f32_to_4i32_const() {
2010 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
2012 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2015 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
2017 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2019 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2023 define <4 x i64> @fptosi_4f32_to_4i64_const() {
2024 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
2026 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2027 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
2030 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
2032 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2034 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2038 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2039 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
2041 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2042 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2045 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
2047 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2049 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2053 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2054 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
2056 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2059 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
2061 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2063 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2067 define <4 x i64> @fptoui_4f32_to_4i64_const() {
2068 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
2070 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
2071 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
2074 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
2076 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2078 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2082 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2083 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
2085 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2086 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
2089 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
2091 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2093 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2101 define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
2102 ; SSE-LABEL: fptosi_2f16_to_4i32:
2104 ; SSE-NEXT: pushq %rbp
2105 ; SSE-NEXT: pushq %rbx
2106 ; SSE-NEXT: pushq %rax
2107 ; SSE-NEXT: movl %esi, %ebx
2108 ; SSE-NEXT: movzwl %di, %edi
2109 ; SSE-NEXT: callq __gnu_h2f_ieee@PLT
2110 ; SSE-NEXT: cvttss2si %xmm0, %ebp
2111 ; SSE-NEXT: movzwl %bx, %edi
2112 ; SSE-NEXT: callq __gnu_h2f_ieee@PLT
2113 ; SSE-NEXT: cvttss2si %xmm0, %eax
2114 ; SSE-NEXT: movd %eax, %xmm0
2115 ; SSE-NEXT: movd %ebp, %xmm1
2116 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2117 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2118 ; SSE-NEXT: addq $8, %rsp
2119 ; SSE-NEXT: popq %rbx
2120 ; SSE-NEXT: popq %rbp
2123 ; VEX-LABEL: fptosi_2f16_to_4i32:
2125 ; VEX-NEXT: pushq %rbp
2126 ; VEX-NEXT: pushq %rbx
2127 ; VEX-NEXT: pushq %rax
2128 ; VEX-NEXT: movl %esi, %ebx
2129 ; VEX-NEXT: movzwl %di, %edi
2130 ; VEX-NEXT: callq __gnu_h2f_ieee@PLT
2131 ; VEX-NEXT: vcvttss2si %xmm0, %ebp
2132 ; VEX-NEXT: movzwl %bx, %edi
2133 ; VEX-NEXT: callq __gnu_h2f_ieee@PLT
2134 ; VEX-NEXT: vcvttss2si %xmm0, %eax
2135 ; VEX-NEXT: vmovd %eax, %xmm0
2136 ; VEX-NEXT: vmovd %ebp, %xmm1
2137 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2138 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2139 ; VEX-NEXT: addq $8, %rsp
2140 ; VEX-NEXT: popq %rbx
2141 ; VEX-NEXT: popq %rbp
2144 ; AVX512-LABEL: fptosi_2f16_to_4i32:
2146 ; AVX512-NEXT: movzwl %di, %eax
2147 ; AVX512-NEXT: vmovd %eax, %xmm0
2148 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
2149 ; AVX512-NEXT: vcvttss2si %xmm0, %eax
2150 ; AVX512-NEXT: movzwl %si, %ecx
2151 ; AVX512-NEXT: vmovd %ecx, %xmm0
2152 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
2153 ; AVX512-NEXT: vcvttss2si %xmm0, %ecx
2154 ; AVX512-NEXT: vmovd %ecx, %xmm0
2155 ; AVX512-NEXT: vmovd %eax, %xmm1
2156 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2157 ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2159 %cvt = fptosi <2 x half> %a to <2 x i32>
2160 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2164 define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2165 ; SSE-LABEL: fptosi_2f80_to_4i32:
2167 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2168 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2169 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2170 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2171 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2172 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2173 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2174 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2175 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2176 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2177 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2178 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2179 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2180 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2181 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2182 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2183 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2184 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2185 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2186 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2189 ; AVX-LABEL: fptosi_2f80_to_4i32:
2191 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2192 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2193 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2194 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2195 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2196 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2197 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2198 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2200 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2201 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2205 define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2206 ; SSE-LABEL: fptosi_2f128_to_4i32:
2208 ; SSE-NEXT: pushq %rbx
2209 ; SSE-NEXT: subq $16, %rsp
2210 ; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
2211 ; SSE-NEXT: callq __fixtfsi@PLT
2212 ; SSE-NEXT: movl %eax, %ebx
2213 ; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2214 ; SSE-NEXT: callq __fixtfsi@PLT
2215 ; SSE-NEXT: movd %eax, %xmm0
2216 ; SSE-NEXT: movd %ebx, %xmm1
2217 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2218 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2219 ; SSE-NEXT: addq $16, %rsp
2220 ; SSE-NEXT: popq %rbx
2223 ; AVX-LABEL: fptosi_2f128_to_4i32:
2225 ; AVX-NEXT: pushq %rbx
2226 ; AVX-NEXT: subq $16, %rsp
2227 ; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
2228 ; AVX-NEXT: callq __fixtfsi@PLT
2229 ; AVX-NEXT: movl %eax, %ebx
2230 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
2231 ; AVX-NEXT: callq __fixtfsi@PLT
2232 ; AVX-NEXT: vmovd %eax, %xmm0
2233 ; AVX-NEXT: vmovd %ebx, %xmm1
2234 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2235 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2236 ; AVX-NEXT: addq $16, %rsp
2237 ; AVX-NEXT: popq %rbx
2239 %cvt = fptosi <2 x fp128> %a to <2 x i32>
2240 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2244 define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2245 ; SSE-LABEL: fptosi_2f32_to_2i8:
2247 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2248 ; SSE-NEXT: packssdw %xmm0, %xmm0
2249 ; SSE-NEXT: packsswb %xmm0, %xmm0
2252 ; VEX-LABEL: fptosi_2f32_to_2i8:
2254 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2255 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2256 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2259 ; AVX512F-LABEL: fptosi_2f32_to_2i8:
2261 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2262 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2263 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2264 ; AVX512F-NEXT: retq
2266 ; AVX512VL-LABEL: fptosi_2f32_to_2i8:
2267 ; AVX512VL: # %bb.0:
2268 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2269 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2270 ; AVX512VL-NEXT: retq
2272 ; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
2273 ; AVX512DQ: # %bb.0:
2274 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2275 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2276 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2277 ; AVX512DQ-NEXT: retq
2279 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
2280 ; AVX512VLDQ: # %bb.0:
2281 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2282 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2283 ; AVX512VLDQ-NEXT: retq
2284 %cvt = fptosi <2 x float> %a to <2 x i8>
2288 define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2289 ; SSE-LABEL: fptosi_2f32_to_2i16:
2291 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2292 ; SSE-NEXT: packssdw %xmm0, %xmm0
2295 ; AVX-LABEL: fptosi_2f32_to_2i16:
2297 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2298 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2300 %cvt = fptosi <2 x float> %a to <2 x i16>
2304 define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2305 ; SSE-LABEL: fptoui_2f32_to_2i8:
2307 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2308 ; SSE-NEXT: packuswb %xmm0, %xmm0
2309 ; SSE-NEXT: packuswb %xmm0, %xmm0
2312 ; VEX-LABEL: fptoui_2f32_to_2i8:
2314 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2315 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2316 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2319 ; AVX512F-LABEL: fptoui_2f32_to_2i8:
2321 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2322 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2323 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2324 ; AVX512F-NEXT: retq
2326 ; AVX512VL-LABEL: fptoui_2f32_to_2i8:
2327 ; AVX512VL: # %bb.0:
2328 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2329 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2330 ; AVX512VL-NEXT: retq
2332 ; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
2333 ; AVX512DQ: # %bb.0:
2334 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2335 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2336 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2337 ; AVX512DQ-NEXT: retq
2339 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
2340 ; AVX512VLDQ: # %bb.0:
2341 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2342 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2343 ; AVX512VLDQ-NEXT: retq
2344 %cvt = fptoui <2 x float> %a to <2 x i8>
2348 define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2349 ; SSE-LABEL: fptoui_2f32_to_2i16:
2351 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2352 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2355 ; AVX-LABEL: fptoui_2f32_to_2i16:
2357 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2358 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2360 %cvt = fptoui <2 x float> %a to <2 x i16>
2364 define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2365 ; SSE-LABEL: fptosi_2f64_to_2i8:
2367 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2368 ; SSE-NEXT: packssdw %xmm0, %xmm0
2369 ; SSE-NEXT: packsswb %xmm0, %xmm0
2372 ; VEX-LABEL: fptosi_2f64_to_2i8:
2374 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2375 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2376 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2379 ; AVX512F-LABEL: fptosi_2f64_to_2i8:
2381 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2382 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2383 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2384 ; AVX512F-NEXT: retq
2386 ; AVX512VL-LABEL: fptosi_2f64_to_2i8:
2387 ; AVX512VL: # %bb.0:
2388 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2389 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2390 ; AVX512VL-NEXT: retq
2392 ; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
2393 ; AVX512DQ: # %bb.0:
2394 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2395 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2396 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2397 ; AVX512DQ-NEXT: retq
2399 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
2400 ; AVX512VLDQ: # %bb.0:
2401 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2402 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2403 ; AVX512VLDQ-NEXT: retq
2404 %cvt = fptosi <2 x double> %a to <2 x i8>
2408 define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2409 ; SSE-LABEL: fptosi_2f64_to_2i16:
2411 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2412 ; SSE-NEXT: packssdw %xmm0, %xmm0
2415 ; AVX-LABEL: fptosi_2f64_to_2i16:
2417 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2418 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2420 %cvt = fptosi <2 x double> %a to <2 x i16>
2424 define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2425 ; SSE-LABEL: fptoui_2f64_to_2i8:
2427 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2428 ; SSE-NEXT: packuswb %xmm0, %xmm0
2429 ; SSE-NEXT: packuswb %xmm0, %xmm0
2432 ; VEX-LABEL: fptoui_2f64_to_2i8:
2434 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2435 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2436 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2439 ; AVX512F-LABEL: fptoui_2f64_to_2i8:
2441 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2442 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2443 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2444 ; AVX512F-NEXT: retq
2446 ; AVX512VL-LABEL: fptoui_2f64_to_2i8:
2447 ; AVX512VL: # %bb.0:
2448 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2449 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2450 ; AVX512VL-NEXT: retq
2452 ; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
2453 ; AVX512DQ: # %bb.0:
2454 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2455 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2456 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2457 ; AVX512DQ-NEXT: retq
2459 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
2460 ; AVX512VLDQ: # %bb.0:
2461 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2462 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2463 ; AVX512VLDQ-NEXT: retq
2464 %cvt = fptoui <2 x double> %a to <2 x i8>
2468 define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2469 ; SSE-LABEL: fptoui_2f64_to_2i16:
2471 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2472 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2475 ; AVX-LABEL: fptoui_2f64_to_2i16:
2477 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2478 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2480 %cvt = fptoui <2 x double> %a to <2 x i16>
2484 define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2485 ; SSE-LABEL: fptosi_8f64_to_8i16:
2487 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2488 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2489 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2490 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2491 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2492 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2493 ; SSE-NEXT: packssdw %xmm2, %xmm0
2496 ; VEX-LABEL: fptosi_8f64_to_8i16:
2498 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2499 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2500 ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2501 ; VEX-NEXT: vzeroupper
2504 ; AVX512F-LABEL: fptosi_8f64_to_8i16:
2506 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2507 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2508 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2509 ; AVX512F-NEXT: vzeroupper
2510 ; AVX512F-NEXT: retq
2512 ; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2513 ; AVX512VL: # %bb.0:
2514 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2515 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2516 ; AVX512VL-NEXT: vzeroupper
2517 ; AVX512VL-NEXT: retq
2519 ; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2520 ; AVX512DQ: # %bb.0:
2521 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2522 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2523 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2524 ; AVX512DQ-NEXT: vzeroupper
2525 ; AVX512DQ-NEXT: retq
2527 ; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2528 ; AVX512VLDQ: # %bb.0:
2529 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2530 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2531 ; AVX512VLDQ-NEXT: vzeroupper
2532 ; AVX512VLDQ-NEXT: retq
2533 %cvt = fptosi <8 x double> %a to <8 x i16>
2537 define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2538 ; SSE-LABEL: fptoui_8f64_to_8i16:
2540 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2541 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2542 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2543 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2544 ; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2545 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2546 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2547 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2548 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2549 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2550 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2551 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2552 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2555 ; VEX-LABEL: fptoui_8f64_to_8i16:
2557 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2558 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2559 ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2560 ; VEX-NEXT: vzeroupper
2563 ; AVX512F-LABEL: fptoui_8f64_to_8i16:
2565 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2566 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2567 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2568 ; AVX512F-NEXT: vzeroupper
2569 ; AVX512F-NEXT: retq
2571 ; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2572 ; AVX512VL: # %bb.0:
2573 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2574 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2575 ; AVX512VL-NEXT: vzeroupper
2576 ; AVX512VL-NEXT: retq
2578 ; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2579 ; AVX512DQ: # %bb.0:
2580 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2581 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2582 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2583 ; AVX512DQ-NEXT: vzeroupper
2584 ; AVX512DQ-NEXT: retq
2586 ; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2587 ; AVX512VLDQ: # %bb.0:
2588 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2589 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2590 ; AVX512VLDQ-NEXT: vzeroupper
2591 ; AVX512VLDQ-NEXT: retq
2592 %cvt = fptoui <8 x double> %a to <8 x i16>
2596 define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2597 ; SSE-LABEL: fptosi_16f32_to_16i8:
2599 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2600 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2601 ; SSE-NEXT: packssdw %xmm3, %xmm2
2602 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2603 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2604 ; SSE-NEXT: packssdw %xmm1, %xmm0
2605 ; SSE-NEXT: packsswb %xmm2, %xmm0
2608 ; AVX1-LABEL: fptosi_16f32_to_16i8:
2610 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2611 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2612 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2613 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2614 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2615 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2616 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2617 ; AVX1-NEXT: vzeroupper
2620 ; AVX2-LABEL: fptosi_16f32_to_16i8:
2622 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2623 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2624 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2625 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2626 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2627 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2628 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2629 ; AVX2-NEXT: vzeroupper
2632 ; AVX512-LABEL: fptosi_16f32_to_16i8:
2634 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2635 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2636 ; AVX512-NEXT: vzeroupper
2638 %cvt = fptosi <16 x float> %a to <16 x i8>
2642 define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2643 ; SSE-LABEL: fptoui_16f32_to_16i8:
2645 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2646 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2647 ; SSE-NEXT: packssdw %xmm3, %xmm2
2648 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2649 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2650 ; SSE-NEXT: packssdw %xmm1, %xmm0
2651 ; SSE-NEXT: packuswb %xmm2, %xmm0
2654 ; AVX1-LABEL: fptoui_16f32_to_16i8:
2656 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2657 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2658 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2659 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2660 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2661 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2662 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2663 ; AVX1-NEXT: vzeroupper
2666 ; AVX2-LABEL: fptoui_16f32_to_16i8:
2668 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2669 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2670 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2671 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2672 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2673 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2674 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2675 ; AVX2-NEXT: vzeroupper
2678 ; AVX512-LABEL: fptoui_16f32_to_16i8:
2680 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2681 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2682 ; AVX512-NEXT: vzeroupper
2684 %cvt = fptoui <16 x float> %a to <16 x i8>
2688 define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) {
2689 ; SSE-LABEL: fptosi_2f32_to_2i64_load:
2691 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2692 ; SSE-NEXT: cvttss2si %xmm1, %rax
2693 ; SSE-NEXT: movq %rax, %xmm0
2694 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2695 ; SSE-NEXT: cvttss2si %xmm1, %rax
2696 ; SSE-NEXT: movq %rax, %xmm1
2697 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2700 ; VEX-LABEL: fptosi_2f32_to_2i64_load:
2702 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2703 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2704 ; VEX-NEXT: vmovq %rax, %xmm1
2705 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2706 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2707 ; VEX-NEXT: vmovq %rax, %xmm0
2708 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2711 ; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2713 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2714 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2715 ; AVX512F-NEXT: vmovq %rax, %xmm1
2716 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2717 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2718 ; AVX512F-NEXT: vmovq %rax, %xmm0
2719 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2720 ; AVX512F-NEXT: retq
2722 ; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2723 ; AVX512VL: # %bb.0:
2724 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2725 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2726 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2727 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2728 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2729 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2730 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2731 ; AVX512VL-NEXT: retq
2733 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2734 ; AVX512DQ: # %bb.0:
2735 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2736 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
2737 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2738 ; AVX512DQ-NEXT: vzeroupper
2739 ; AVX512DQ-NEXT: retq
2741 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2742 ; AVX512VLDQ: # %bb.0:
2743 ; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
2744 ; AVX512VLDQ-NEXT: retq
2745 %a = load <2 x float>, <2 x float>* %x
2746 %b = fptosi <2 x float> %a to <2 x i64>
2750 define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) {
2751 ; SSE-LABEL: fptoui_2f32_to_2i64_load:
2753 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2754 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2755 ; SSE-NEXT: movaps %xmm1, %xmm0
2756 ; SSE-NEXT: subss %xmm2, %xmm0
2757 ; SSE-NEXT: cvttss2si %xmm0, %rax
2758 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2759 ; SSE-NEXT: movq %rcx, %rdx
2760 ; SSE-NEXT: sarq $63, %rdx
2761 ; SSE-NEXT: andq %rax, %rdx
2762 ; SSE-NEXT: orq %rcx, %rdx
2763 ; SSE-NEXT: movq %rdx, %xmm0
2764 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2765 ; SSE-NEXT: cvttss2si %xmm1, %rax
2766 ; SSE-NEXT: subss %xmm2, %xmm1
2767 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2768 ; SSE-NEXT: movq %rax, %rdx
2769 ; SSE-NEXT: sarq $63, %rdx
2770 ; SSE-NEXT: andq %rcx, %rdx
2771 ; SSE-NEXT: orq %rax, %rdx
2772 ; SSE-NEXT: movq %rdx, %xmm1
2773 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2776 ; VEX-LABEL: fptoui_2f32_to_2i64_load:
2778 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2779 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2780 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
2781 ; VEX-NEXT: vcvttss2si %xmm2, %rax
2782 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2783 ; VEX-NEXT: movq %rcx, %rdx
2784 ; VEX-NEXT: sarq $63, %rdx
2785 ; VEX-NEXT: andq %rax, %rdx
2786 ; VEX-NEXT: orq %rcx, %rdx
2787 ; VEX-NEXT: vmovq %rdx, %xmm2
2788 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2789 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
2790 ; VEX-NEXT: vcvttss2si %xmm1, %rax
2791 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2792 ; VEX-NEXT: movq %rcx, %rdx
2793 ; VEX-NEXT: sarq $63, %rdx
2794 ; VEX-NEXT: andq %rax, %rdx
2795 ; VEX-NEXT: orq %rcx, %rdx
2796 ; VEX-NEXT: vmovq %rdx, %xmm0
2797 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2800 ; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2802 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2803 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2804 ; AVX512F-NEXT: vmovq %rax, %xmm1
2805 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2806 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2807 ; AVX512F-NEXT: vmovq %rax, %xmm0
2808 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2809 ; AVX512F-NEXT: retq
2811 ; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2812 ; AVX512VL: # %bb.0:
2813 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2814 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2815 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2816 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2817 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2818 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2819 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2820 ; AVX512VL-NEXT: retq
2822 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2823 ; AVX512DQ: # %bb.0:
2824 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2825 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
2826 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2827 ; AVX512DQ-NEXT: vzeroupper
2828 ; AVX512DQ-NEXT: retq
2830 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2831 ; AVX512VLDQ: # %bb.0:
2832 ; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
2833 ; AVX512VLDQ-NEXT: retq
2834 %a = load <2 x float>, <2 x float>* %x
2835 %b = fptoui <2 x float> %a to <2 x i64>