1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Double to Signed Integer
19 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20 ; SSE-LABEL: fptosi_2f64_to_2i64:
22 ; SSE-NEXT: cvttsd2si %xmm0, %rax
23 ; SSE-NEXT: movq %rax, %xmm1
24 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
25 ; SSE-NEXT: cvttsd2si %xmm0, %rax
26 ; SSE-NEXT: movq %rax, %xmm0
27 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movdqa %xmm1, %xmm0
31 ; VEX-LABEL: fptosi_2f64_to_2i64:
33 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
34 ; VEX-NEXT: vmovq %rax, %xmm1
35 ; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
36 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
37 ; VEX-NEXT: vmovq %rax, %xmm0
38 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41 ; AVX512F-LABEL: fptosi_2f64_to_2i64:
43 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
44 ; AVX512F-NEXT: vmovq %rax, %xmm1
45 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
46 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
47 ; AVX512F-NEXT: vmovq %rax, %xmm0
48 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
51 ; AVX512VL-LABEL: fptosi_2f64_to_2i64:
53 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
54 ; AVX512VL-NEXT: vmovq %rax, %xmm1
55 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
56 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
57 ; AVX512VL-NEXT: vmovq %rax, %xmm0
58 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
61 ; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
63 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
64 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
65 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512DQ-NEXT: vzeroupper
69 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70 ; AVX512VLDQ: # %bb.0:
71 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
72 ; AVX512VLDQ-NEXT: retq
73 %cvt = fptosi <2 x double> %a to <2 x i64>
77 define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78 ; SSE-LABEL: fptosi_2f64_to_4i32:
80 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
83 ; AVX-LABEL: fptosi_2f64_to_4i32:
85 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
87 %cvt = fptosi <2 x double> %a to <2 x i32>
88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92 define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93 ; SSE-LABEL: fptosi_2f64_to_2i32:
95 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
98 ; AVX-LABEL: fptosi_2f64_to_2i32:
100 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
102 %cvt = fptosi <2 x double> %a to <2 x i32>
106 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107 ; SSE-LABEL: fptosi_4f64_to_2i32:
109 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
112 ; AVX-LABEL: fptosi_4f64_to_2i32:
114 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
115 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
116 ; AVX-NEXT: vzeroupper
118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119 %cvt = fptosi <4 x double> %ext to <4 x i32>
123 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124 ; SSE-LABEL: fptosi_4f64_to_4i64:
126 ; SSE-NEXT: cvttsd2si %xmm0, %rax
127 ; SSE-NEXT: movq %rax, %xmm2
128 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
129 ; SSE-NEXT: cvttsd2si %xmm0, %rax
130 ; SSE-NEXT: movq %rax, %xmm0
131 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132 ; SSE-NEXT: cvttsd2si %xmm1, %rax
133 ; SSE-NEXT: movq %rax, %xmm3
134 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
135 ; SSE-NEXT: cvttsd2si %xmm1, %rax
136 ; SSE-NEXT: movq %rax, %xmm0
137 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138 ; SSE-NEXT: movdqa %xmm2, %xmm0
139 ; SSE-NEXT: movdqa %xmm3, %xmm1
142 ; AVX1-LABEL: fptosi_4f64_to_4i64:
144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
145 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
146 ; AVX1-NEXT: vmovq %rax, %xmm2
147 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
148 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
149 ; AVX1-NEXT: vmovq %rax, %xmm1
150 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
152 ; AVX1-NEXT: vmovq %rax, %xmm2
153 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
154 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
155 ; AVX1-NEXT: vmovq %rax, %xmm0
156 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
160 ; AVX2-LABEL: fptosi_4f64_to_4i64:
162 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
163 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
164 ; AVX2-NEXT: vmovq %rax, %xmm2
165 ; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
166 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
167 ; AVX2-NEXT: vmovq %rax, %xmm1
168 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
170 ; AVX2-NEXT: vmovq %rax, %xmm2
171 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
172 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
173 ; AVX2-NEXT: vmovq %rax, %xmm0
174 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
178 ; AVX512F-LABEL: fptosi_4f64_to_4i64:
180 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
181 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
182 ; AVX512F-NEXT: vmovq %rax, %xmm2
183 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
184 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
185 ; AVX512F-NEXT: vmovq %rax, %xmm1
186 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
188 ; AVX512F-NEXT: vmovq %rax, %xmm2
189 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
190 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
191 ; AVX512F-NEXT: vmovq %rax, %xmm0
192 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
196 ; AVX512VL-LABEL: fptosi_4f64_to_4i64:
198 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
199 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
200 ; AVX512VL-NEXT: vmovq %rax, %xmm2
201 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
202 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
203 ; AVX512VL-NEXT: vmovq %rax, %xmm1
204 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
206 ; AVX512VL-NEXT: vmovq %rax, %xmm2
207 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
208 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
209 ; AVX512VL-NEXT: vmovq %rax, %xmm0
210 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
212 ; AVX512VL-NEXT: retq
214 ; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
216 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
217 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
218 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
219 ; AVX512DQ-NEXT: retq
221 ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222 ; AVX512VLDQ: # %bb.0:
223 ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
224 ; AVX512VLDQ-NEXT: retq
225 %cvt = fptosi <4 x double> %a to <4 x i64>
229 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230 ; SSE-LABEL: fptosi_4f64_to_4i32:
232 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
233 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX-LABEL: fptosi_4f64_to_4i32:
239 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
240 ; AVX-NEXT: vzeroupper
242 %cvt = fptosi <4 x double> %a to <4 x i32>
247 ; Double to Unsigned Integer
250 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251 ; SSE-LABEL: fptoui_2f64_to_2i64:
253 ; SSE-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
254 ; SSE-NEXT: movapd %xmm0, %xmm1
255 ; SSE-NEXT: subsd %xmm2, %xmm1
256 ; SSE-NEXT: cvttsd2si %xmm1, %rax
257 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
258 ; SSE-NEXT: movq %rcx, %rdx
259 ; SSE-NEXT: sarq $63, %rdx
260 ; SSE-NEXT: andq %rax, %rdx
261 ; SSE-NEXT: orq %rcx, %rdx
262 ; SSE-NEXT: movq %rdx, %xmm1
263 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264 ; SSE-NEXT: cvttsd2si %xmm0, %rax
265 ; SSE-NEXT: subsd %xmm2, %xmm0
266 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
267 ; SSE-NEXT: movq %rax, %rdx
268 ; SSE-NEXT: sarq $63, %rdx
269 ; SSE-NEXT: andq %rcx, %rdx
270 ; SSE-NEXT: orq %rax, %rdx
271 ; SSE-NEXT: movq %rdx, %xmm0
272 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273 ; SSE-NEXT: movdqa %xmm1, %xmm0
276 ; VEX-LABEL: fptoui_2f64_to_2i64:
278 ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
279 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
280 ; VEX-NEXT: vcvttsd2si %xmm2, %rax
281 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
282 ; VEX-NEXT: movq %rcx, %rdx
283 ; VEX-NEXT: sarq $63, %rdx
284 ; VEX-NEXT: andq %rax, %rdx
285 ; VEX-NEXT: orq %rcx, %rdx
286 ; VEX-NEXT: vmovq %rdx, %xmm2
287 ; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
288 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm1
289 ; VEX-NEXT: vcvttsd2si %xmm1, %rax
290 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
291 ; VEX-NEXT: movq %rcx, %rdx
292 ; VEX-NEXT: sarq $63, %rdx
293 ; VEX-NEXT: andq %rax, %rdx
294 ; VEX-NEXT: orq %rcx, %rdx
295 ; VEX-NEXT: vmovq %rdx, %xmm0
296 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
299 ; AVX512F-LABEL: fptoui_2f64_to_2i64:
301 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
302 ; AVX512F-NEXT: vmovq %rax, %xmm1
303 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
304 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
305 ; AVX512F-NEXT: vmovq %rax, %xmm0
306 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
309 ; AVX512VL-LABEL: fptoui_2f64_to_2i64:
311 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
312 ; AVX512VL-NEXT: vmovq %rax, %xmm1
313 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
314 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
315 ; AVX512VL-NEXT: vmovq %rax, %xmm0
316 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317 ; AVX512VL-NEXT: retq
319 ; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
321 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
322 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
323 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
324 ; AVX512DQ-NEXT: vzeroupper
325 ; AVX512DQ-NEXT: retq
327 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
328 ; AVX512VLDQ: # %bb.0:
329 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
330 ; AVX512VLDQ-NEXT: retq
331 %cvt = fptoui <2 x double> %a to <2 x i64>
335 define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
336 ; SSE-LABEL: fptoui_2f64_to_4i32:
338 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
339 ; SSE-NEXT: movapd %xmm1, %xmm2
340 ; SSE-NEXT: psrad $31, %xmm2
341 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
342 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
343 ; SSE-NEXT: andpd %xmm2, %xmm0
344 ; SSE-NEXT: orpd %xmm1, %xmm0
347 ; VEX-LABEL: fptoui_2f64_to_4i32:
349 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
350 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
351 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
352 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
353 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
354 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
357 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
359 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
360 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
361 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
362 ; AVX512F-NEXT: vzeroupper
365 ; AVX512VL-LABEL: fptoui_2f64_to_4i32:
367 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
368 ; AVX512VL-NEXT: retq
370 ; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
372 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
373 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
374 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
375 ; AVX512DQ-NEXT: vzeroupper
376 ; AVX512DQ-NEXT: retq
378 ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
379 ; AVX512VLDQ: # %bb.0:
380 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
381 ; AVX512VLDQ-NEXT: retq
382 %cvt = fptoui <2 x double> %a to <2 x i32>
383 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
387 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
388 ; SSE-LABEL: fptoui_2f64_to_2i32:
390 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
391 ; SSE-NEXT: movapd %xmm1, %xmm2
392 ; SSE-NEXT: psrad $31, %xmm2
393 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
394 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
395 ; SSE-NEXT: andpd %xmm2, %xmm0
396 ; SSE-NEXT: orpd %xmm1, %xmm0
399 ; VEX-LABEL: fptoui_2f64_to_2i32:
401 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1
402 ; VEX-NEXT: vpsrad $31, %xmm1, %xmm2
403 ; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
404 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
405 ; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0
406 ; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0
409 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
411 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
412 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
413 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414 ; AVX512F-NEXT: vzeroupper
417 ; AVX512VL-LABEL: fptoui_2f64_to_2i32:
419 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
420 ; AVX512VL-NEXT: retq
422 ; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
424 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
425 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
426 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
427 ; AVX512DQ-NEXT: vzeroupper
428 ; AVX512DQ-NEXT: retq
430 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
431 ; AVX512VLDQ: # %bb.0:
432 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
433 ; AVX512VLDQ-NEXT: retq
434 %cvt = fptoui <2 x double> %a to <2 x i32>
435 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
439 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
440 ; SSE-LABEL: fptoui_4f64_to_2i32:
442 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
443 ; SSE-NEXT: movapd %xmm1, %xmm2
444 ; SSE-NEXT: psrad $31, %xmm2
445 ; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
446 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
447 ; SSE-NEXT: andpd %xmm2, %xmm0
448 ; SSE-NEXT: orpd %xmm1, %xmm0
451 ; AVX1-LABEL: fptoui_4f64_to_2i32:
453 ; AVX1-NEXT: vmovapd %xmm0, %xmm0
454 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
455 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
456 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
458 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
459 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
460 ; AVX1-NEXT: vzeroupper
463 ; AVX2-LABEL: fptoui_4f64_to_2i32:
465 ; AVX2-NEXT: vmovapd %xmm0, %xmm0
466 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
467 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
468 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
469 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
470 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
471 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
472 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
473 ; AVX2-NEXT: vzeroupper
476 ; AVX512F-LABEL: fptoui_4f64_to_2i32:
478 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
479 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
480 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
481 ; AVX512F-NEXT: vzeroupper
484 ; AVX512VL-LABEL: fptoui_4f64_to_2i32:
486 ; AVX512VL-NEXT: vmovaps %xmm0, %xmm0
487 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
488 ; AVX512VL-NEXT: vzeroupper
489 ; AVX512VL-NEXT: retq
491 ; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
493 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
494 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
495 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
496 ; AVX512DQ-NEXT: vzeroupper
497 ; AVX512DQ-NEXT: retq
499 ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
500 ; AVX512VLDQ: # %bb.0:
501 ; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0
502 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
503 ; AVX512VLDQ-NEXT: vzeroupper
504 ; AVX512VLDQ-NEXT: retq
505 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
506 %cvt = fptoui <4 x double> %ext to <4 x i32>
510 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
511 ; SSE-LABEL: fptoui_4f64_to_4i64:
513 ; SSE-NEXT: movapd %xmm0, %xmm2
514 ; SSE-NEXT: movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0]
515 ; SSE-NEXT: subsd %xmm3, %xmm0
516 ; SSE-NEXT: cvttsd2si %xmm0, %rax
517 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
518 ; SSE-NEXT: movq %rcx, %rdx
519 ; SSE-NEXT: sarq $63, %rdx
520 ; SSE-NEXT: andq %rax, %rdx
521 ; SSE-NEXT: orq %rcx, %rdx
522 ; SSE-NEXT: movq %rdx, %xmm0
523 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
524 ; SSE-NEXT: cvttsd2si %xmm2, %rax
525 ; SSE-NEXT: subsd %xmm3, %xmm2
526 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
527 ; SSE-NEXT: movq %rax, %rdx
528 ; SSE-NEXT: sarq $63, %rdx
529 ; SSE-NEXT: andq %rcx, %rdx
530 ; SSE-NEXT: orq %rax, %rdx
531 ; SSE-NEXT: movq %rdx, %xmm2
532 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
533 ; SSE-NEXT: movapd %xmm1, %xmm2
534 ; SSE-NEXT: subsd %xmm3, %xmm2
535 ; SSE-NEXT: cvttsd2si %xmm2, %rax
536 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
537 ; SSE-NEXT: movq %rcx, %rdx
538 ; SSE-NEXT: sarq $63, %rdx
539 ; SSE-NEXT: andq %rax, %rdx
540 ; SSE-NEXT: orq %rcx, %rdx
541 ; SSE-NEXT: movq %rdx, %xmm2
542 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
543 ; SSE-NEXT: cvttsd2si %xmm1, %rax
544 ; SSE-NEXT: subsd %xmm3, %xmm1
545 ; SSE-NEXT: cvttsd2si %xmm1, %rcx
546 ; SSE-NEXT: movq %rax, %rdx
547 ; SSE-NEXT: sarq $63, %rdx
548 ; SSE-NEXT: andq %rcx, %rdx
549 ; SSE-NEXT: orq %rax, %rdx
550 ; SSE-NEXT: movq %rdx, %xmm1
551 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
552 ; SSE-NEXT: movdqa %xmm2, %xmm1
555 ; AVX1-LABEL: fptoui_4f64_to_4i64:
557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
558 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
559 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
560 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
561 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
562 ; AVX1-NEXT: movq %rcx, %rdx
563 ; AVX1-NEXT: sarq $63, %rdx
564 ; AVX1-NEXT: andq %rax, %rdx
565 ; AVX1-NEXT: orq %rcx, %rdx
566 ; AVX1-NEXT: vmovq %rdx, %xmm3
567 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
568 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
569 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
570 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
571 ; AVX1-NEXT: movq %rcx, %rdx
572 ; AVX1-NEXT: sarq $63, %rdx
573 ; AVX1-NEXT: andq %rax, %rdx
574 ; AVX1-NEXT: orq %rcx, %rdx
575 ; AVX1-NEXT: vmovq %rdx, %xmm2
576 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
577 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
578 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
579 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
580 ; AVX1-NEXT: movq %rcx, %rdx
581 ; AVX1-NEXT: sarq $63, %rdx
582 ; AVX1-NEXT: andq %rax, %rdx
583 ; AVX1-NEXT: orq %rcx, %rdx
584 ; AVX1-NEXT: vmovq %rdx, %xmm3
585 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
586 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm1
587 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
588 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
589 ; AVX1-NEXT: movq %rcx, %rdx
590 ; AVX1-NEXT: sarq $63, %rdx
591 ; AVX1-NEXT: andq %rax, %rdx
592 ; AVX1-NEXT: orq %rcx, %rdx
593 ; AVX1-NEXT: vmovq %rdx, %xmm0
594 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
595 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
598 ; AVX2-LABEL: fptoui_4f64_to_4i64:
600 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
601 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
602 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
603 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
604 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
605 ; AVX2-NEXT: movq %rcx, %rdx
606 ; AVX2-NEXT: sarq $63, %rdx
607 ; AVX2-NEXT: andq %rax, %rdx
608 ; AVX2-NEXT: orq %rcx, %rdx
609 ; AVX2-NEXT: vmovq %rdx, %xmm3
610 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
611 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
612 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
613 ; AVX2-NEXT: vcvttsd2si %xmm2, %rcx
614 ; AVX2-NEXT: movq %rcx, %rdx
615 ; AVX2-NEXT: sarq $63, %rdx
616 ; AVX2-NEXT: andq %rax, %rdx
617 ; AVX2-NEXT: orq %rcx, %rdx
618 ; AVX2-NEXT: vmovq %rdx, %xmm2
619 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
620 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
621 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
622 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
623 ; AVX2-NEXT: movq %rcx, %rdx
624 ; AVX2-NEXT: sarq $63, %rdx
625 ; AVX2-NEXT: andq %rax, %rdx
626 ; AVX2-NEXT: orq %rcx, %rdx
627 ; AVX2-NEXT: vmovq %rdx, %xmm3
628 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
629 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm1
630 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
631 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
632 ; AVX2-NEXT: movq %rcx, %rdx
633 ; AVX2-NEXT: sarq $63, %rdx
634 ; AVX2-NEXT: andq %rax, %rdx
635 ; AVX2-NEXT: orq %rcx, %rdx
636 ; AVX2-NEXT: vmovq %rdx, %xmm0
637 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
641 ; AVX512F-LABEL: fptoui_4f64_to_4i64:
643 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
644 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
645 ; AVX512F-NEXT: vmovq %rax, %xmm2
646 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
647 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
648 ; AVX512F-NEXT: vmovq %rax, %xmm1
649 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
650 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
651 ; AVX512F-NEXT: vmovq %rax, %xmm2
652 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
653 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
654 ; AVX512F-NEXT: vmovq %rax, %xmm0
655 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
656 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
659 ; AVX512VL-LABEL: fptoui_4f64_to_4i64:
661 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
662 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
663 ; AVX512VL-NEXT: vmovq %rax, %xmm2
664 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
665 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
666 ; AVX512VL-NEXT: vmovq %rax, %xmm1
667 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
668 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
669 ; AVX512VL-NEXT: vmovq %rax, %xmm2
670 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
671 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
672 ; AVX512VL-NEXT: vmovq %rax, %xmm0
673 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
674 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
675 ; AVX512VL-NEXT: retq
677 ; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
679 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
680 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
681 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
682 ; AVX512DQ-NEXT: retq
684 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
685 ; AVX512VLDQ: # %bb.0:
686 ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
687 ; AVX512VLDQ-NEXT: retq
688 %cvt = fptoui <4 x double> %a to <4 x i64>
692 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
693 ; SSE-LABEL: fptoui_4f64_to_4i32:
695 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
696 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm3
697 ; SSE-NEXT: subpd %xmm2, %xmm1
698 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
699 ; SSE-NEXT: movapd %xmm3, %xmm4
700 ; SSE-NEXT: psrad $31, %xmm4
701 ; SSE-NEXT: pand %xmm1, %xmm4
702 ; SSE-NEXT: por %xmm3, %xmm4
703 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
704 ; SSE-NEXT: subpd %xmm2, %xmm0
705 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm2
706 ; SSE-NEXT: movapd %xmm1, %xmm0
707 ; SSE-NEXT: psrad $31, %xmm0
708 ; SSE-NEXT: pand %xmm2, %xmm0
709 ; SSE-NEXT: por %xmm1, %xmm0
710 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
713 ; AVX1-LABEL: fptoui_4f64_to_4i32:
715 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
716 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
717 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
718 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
719 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
720 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
721 ; AVX1-NEXT: vzeroupper
724 ; AVX2-LABEL: fptoui_4f64_to_4i32:
726 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
727 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
728 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
729 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
730 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
731 ; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
732 ; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
733 ; AVX2-NEXT: vzeroupper
736 ; AVX512F-LABEL: fptoui_4f64_to_4i32:
738 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
739 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
740 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
741 ; AVX512F-NEXT: vzeroupper
744 ; AVX512VL-LABEL: fptoui_4f64_to_4i32:
746 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
747 ; AVX512VL-NEXT: vzeroupper
748 ; AVX512VL-NEXT: retq
750 ; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
752 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
753 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
754 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
755 ; AVX512DQ-NEXT: vzeroupper
756 ; AVX512DQ-NEXT: retq
758 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
759 ; AVX512VLDQ: # %bb.0:
760 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
761 ; AVX512VLDQ-NEXT: vzeroupper
762 ; AVX512VLDQ-NEXT: retq
763 %cvt = fptoui <4 x double> %a to <4 x i32>
768 ; Float to Signed Integer
771 define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
772 ; SSE-LABEL: fptosi_2f32_to_2i32:
774 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
777 ; AVX-LABEL: fptosi_2f32_to_2i32:
779 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
781 %cvt = fptosi <2 x float> %a to <2 x i32>
785 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
786 ; SSE-LABEL: fptosi_4f32_to_4i32:
788 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
791 ; AVX-LABEL: fptosi_4f32_to_4i32:
793 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
795 %cvt = fptosi <4 x float> %a to <4 x i32>
799 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
800 ; SSE-LABEL: fptosi_2f32_to_2i64:
802 ; SSE-NEXT: cvttss2si %xmm0, %rax
803 ; SSE-NEXT: movq %rax, %xmm1
804 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
805 ; SSE-NEXT: cvttss2si %xmm0, %rax
806 ; SSE-NEXT: movq %rax, %xmm0
807 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
808 ; SSE-NEXT: movdqa %xmm1, %xmm0
811 ; VEX-LABEL: fptosi_2f32_to_2i64:
813 ; VEX-NEXT: vcvttss2si %xmm0, %rax
814 ; VEX-NEXT: vmovq %rax, %xmm1
815 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
816 ; VEX-NEXT: vcvttss2si %xmm0, %rax
817 ; VEX-NEXT: vmovq %rax, %xmm0
818 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
821 ; AVX512F-LABEL: fptosi_2f32_to_2i64:
823 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
824 ; AVX512F-NEXT: vmovq %rax, %xmm1
825 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
826 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
827 ; AVX512F-NEXT: vmovq %rax, %xmm0
828 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
831 ; AVX512VL-LABEL: fptosi_2f32_to_2i64:
833 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
834 ; AVX512VL-NEXT: vmovq %rax, %xmm1
835 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
836 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
837 ; AVX512VL-NEXT: vmovq %rax, %xmm0
838 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
839 ; AVX512VL-NEXT: retq
841 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
843 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
844 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
845 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
846 ; AVX512DQ-NEXT: vzeroupper
847 ; AVX512DQ-NEXT: retq
849 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
850 ; AVX512VLDQ: # %bb.0:
851 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
852 ; AVX512VLDQ-NEXT: retq
853 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
854 %cvt = fptosi <2 x float> %shuf to <2 x i64>
858 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
859 ; SSE-LABEL: fptosi_4f32_to_2i64:
861 ; SSE-NEXT: cvttss2si %xmm0, %rax
862 ; SSE-NEXT: movq %rax, %xmm1
863 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
864 ; SSE-NEXT: cvttss2si %xmm0, %rax
865 ; SSE-NEXT: movq %rax, %xmm0
866 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
867 ; SSE-NEXT: movdqa %xmm1, %xmm0
870 ; VEX-LABEL: fptosi_4f32_to_2i64:
872 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
873 ; VEX-NEXT: vcvttss2si %xmm1, %rax
874 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
875 ; VEX-NEXT: vmovq %rcx, %xmm0
876 ; VEX-NEXT: vmovq %rax, %xmm1
877 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
880 ; AVX512F-LABEL: fptosi_4f32_to_2i64:
882 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
883 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
884 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
885 ; AVX512F-NEXT: vmovq %rcx, %xmm0
886 ; AVX512F-NEXT: vmovq %rax, %xmm1
887 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
890 ; AVX512VL-LABEL: fptosi_4f32_to_2i64:
892 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
893 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
894 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
895 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
896 ; AVX512VL-NEXT: vmovq %rax, %xmm1
897 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
898 ; AVX512VL-NEXT: retq
900 ; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
902 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
903 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
904 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
905 ; AVX512DQ-NEXT: vzeroupper
906 ; AVX512DQ-NEXT: retq
908 ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
909 ; AVX512VLDQ: # %bb.0:
910 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
911 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
912 ; AVX512VLDQ-NEXT: vzeroupper
913 ; AVX512VLDQ-NEXT: retq
914 %cvt = fptosi <4 x float> %a to <4 x i64>
915 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
919 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
920 ; SSE-LABEL: fptosi_8f32_to_8i32:
922 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
923 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
926 ; AVX-LABEL: fptosi_8f32_to_8i32:
928 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
930 %cvt = fptosi <8 x float> %a to <8 x i32>
934 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
935 ; SSE-LABEL: fptosi_4f32_to_4i64:
937 ; SSE-NEXT: cvttss2si %xmm0, %rax
938 ; SSE-NEXT: movq %rax, %xmm2
939 ; SSE-NEXT: movaps %xmm0, %xmm1
940 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
941 ; SSE-NEXT: cvttss2si %xmm1, %rax
942 ; SSE-NEXT: movq %rax, %xmm1
943 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
944 ; SSE-NEXT: movaps %xmm0, %xmm1
945 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
946 ; SSE-NEXT: cvttss2si %xmm1, %rax
947 ; SSE-NEXT: movq %rax, %xmm3
948 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
949 ; SSE-NEXT: cvttss2si %xmm0, %rax
950 ; SSE-NEXT: movq %rax, %xmm1
951 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
952 ; SSE-NEXT: movdqa %xmm2, %xmm0
955 ; AVX1-LABEL: fptosi_4f32_to_4i64:
957 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
958 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
959 ; AVX1-NEXT: vmovq %rax, %xmm1
960 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
961 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
962 ; AVX1-NEXT: vmovq %rax, %xmm2
963 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
964 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
965 ; AVX1-NEXT: vmovq %rax, %xmm2
966 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
967 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
968 ; AVX1-NEXT: vmovq %rax, %xmm0
969 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
970 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
973 ; AVX2-LABEL: fptosi_4f32_to_4i64:
975 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
976 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
977 ; AVX2-NEXT: vmovq %rax, %xmm1
978 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
979 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
980 ; AVX2-NEXT: vmovq %rax, %xmm2
981 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
982 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
983 ; AVX2-NEXT: vmovq %rax, %xmm2
984 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
985 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
986 ; AVX2-NEXT: vmovq %rax, %xmm0
987 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
988 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
991 ; AVX512F-LABEL: fptosi_4f32_to_4i64:
993 ; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
994 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
995 ; AVX512F-NEXT: vmovq %rax, %xmm1
996 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
997 ; AVX512F-NEXT: vcvttss2si %xmm2, %rax
998 ; AVX512F-NEXT: vmovq %rax, %xmm2
999 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1000 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1001 ; AVX512F-NEXT: vmovq %rax, %xmm2
1002 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1003 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1004 ; AVX512F-NEXT: vmovq %rax, %xmm0
1005 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1006 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1007 ; AVX512F-NEXT: retq
1009 ; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1010 ; AVX512VL: # %bb.0:
1011 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1012 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1013 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1014 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1015 ; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
1016 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1017 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1018 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1019 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1020 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1021 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1022 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1023 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1024 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1025 ; AVX512VL-NEXT: retq
1027 ; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1028 ; AVX512DQ: # %bb.0:
1029 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1030 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1031 ; AVX512DQ-NEXT: retq
1033 ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1034 ; AVX512VLDQ: # %bb.0:
1035 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
1036 ; AVX512VLDQ-NEXT: retq
1037 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1038 %cvt = fptosi <4 x float> %shuf to <4 x i64>
1042 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1043 ; SSE-LABEL: fptosi_8f32_to_4i64:
1045 ; SSE-NEXT: cvttss2si %xmm0, %rax
1046 ; SSE-NEXT: movq %rax, %xmm2
1047 ; SSE-NEXT: movaps %xmm0, %xmm1
1048 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
1049 ; SSE-NEXT: cvttss2si %xmm1, %rax
1050 ; SSE-NEXT: movq %rax, %xmm1
1051 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1052 ; SSE-NEXT: movaps %xmm0, %xmm1
1053 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
1054 ; SSE-NEXT: cvttss2si %xmm1, %rax
1055 ; SSE-NEXT: movq %rax, %xmm3
1056 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1057 ; SSE-NEXT: cvttss2si %xmm0, %rax
1058 ; SSE-NEXT: movq %rax, %xmm1
1059 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1060 ; SSE-NEXT: movdqa %xmm2, %xmm0
1063 ; AVX1-LABEL: fptosi_8f32_to_4i64:
1065 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1066 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1067 ; AVX1-NEXT: vmovq %rax, %xmm1
1068 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1069 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1070 ; AVX1-NEXT: vmovq %rax, %xmm2
1071 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1072 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1073 ; AVX1-NEXT: vmovq %rax, %xmm2
1074 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1075 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1076 ; AVX1-NEXT: vmovq %rax, %xmm0
1077 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1078 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1081 ; AVX2-LABEL: fptosi_8f32_to_4i64:
1083 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1084 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1085 ; AVX2-NEXT: vmovq %rax, %xmm1
1086 ; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1087 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1088 ; AVX2-NEXT: vmovq %rax, %xmm2
1089 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1090 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1091 ; AVX2-NEXT: vmovq %rax, %xmm2
1092 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1093 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1094 ; AVX2-NEXT: vmovq %rax, %xmm0
1095 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1096 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1099 ; AVX512F-LABEL: fptosi_8f32_to_4i64:
1101 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1102 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1103 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
1104 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1105 ; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
1106 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1107 ; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
1108 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1109 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1110 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1111 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1112 ; AVX512F-NEXT: vmovq %rax, %xmm2
1113 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1114 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1115 ; AVX512F-NEXT: retq
1117 ; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1118 ; AVX512VL: # %bb.0:
1119 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1120 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1121 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
1122 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1123 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
1124 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1125 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
1126 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1127 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1128 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1129 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1130 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1131 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1132 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1133 ; AVX512VL-NEXT: retq
1135 ; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1136 ; AVX512DQ: # %bb.0:
1137 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1138 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1139 ; AVX512DQ-NEXT: retq
1141 ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1142 ; AVX512VLDQ: # %bb.0:
1143 ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
1144 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1145 ; AVX512VLDQ-NEXT: retq
1146 %cvt = fptosi <8 x float> %a to <8 x i64>
1147 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 ; Float to Unsigned Integer
1155 define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1156 ; SSE-LABEL: fptoui_2f32_to_2i32:
1158 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1159 ; SSE-NEXT: movdqa %xmm1, %xmm2
1160 ; SSE-NEXT: psrad $31, %xmm2
1161 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1162 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1163 ; SSE-NEXT: pand %xmm2, %xmm0
1164 ; SSE-NEXT: por %xmm1, %xmm0
1167 ; AVX1-LABEL: fptoui_2f32_to_2i32:
1169 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1170 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1171 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1172 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1173 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1174 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1177 ; AVX2-LABEL: fptoui_2f32_to_2i32:
1179 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1180 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1181 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1182 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1183 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1184 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1185 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1188 ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1190 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1191 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1192 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1193 ; AVX512F-NEXT: vzeroupper
1194 ; AVX512F-NEXT: retq
1196 ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1197 ; AVX512VL: # %bb.0:
1198 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1199 ; AVX512VL-NEXT: retq
1201 ; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1202 ; AVX512DQ: # %bb.0:
1203 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1204 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1205 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1206 ; AVX512DQ-NEXT: vzeroupper
1207 ; AVX512DQ-NEXT: retq
1209 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1210 ; AVX512VLDQ: # %bb.0:
1211 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1212 ; AVX512VLDQ-NEXT: retq
1213 %cvt = fptoui <2 x float> %a to <2 x i32>
1217 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1218 ; SSE-LABEL: fptoui_4f32_to_4i32:
1220 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
1221 ; SSE-NEXT: movdqa %xmm1, %xmm2
1222 ; SSE-NEXT: psrad $31, %xmm2
1223 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1224 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1225 ; SSE-NEXT: pand %xmm2, %xmm0
1226 ; SSE-NEXT: por %xmm1, %xmm0
1229 ; AVX1-LABEL: fptoui_4f32_to_4i32:
1231 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
1232 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
1233 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1234 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1235 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1236 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1239 ; AVX2-LABEL: fptoui_4f32_to_4i32:
1241 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1242 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1243 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1244 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1245 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
1246 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1247 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1250 ; AVX512F-LABEL: fptoui_4f32_to_4i32:
1252 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1253 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1254 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1255 ; AVX512F-NEXT: vzeroupper
1256 ; AVX512F-NEXT: retq
1258 ; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1259 ; AVX512VL: # %bb.0:
1260 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1261 ; AVX512VL-NEXT: retq
1263 ; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1264 ; AVX512DQ: # %bb.0:
1265 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1266 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1267 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1268 ; AVX512DQ-NEXT: vzeroupper
1269 ; AVX512DQ-NEXT: retq
1271 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1272 ; AVX512VLDQ: # %bb.0:
1273 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1274 ; AVX512VLDQ-NEXT: retq
1275 %cvt = fptoui <4 x float> %a to <4 x i32>
1279 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1280 ; SSE-LABEL: fptoui_2f32_to_2i64:
1282 ; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1283 ; SSE-NEXT: movaps %xmm0, %xmm1
1284 ; SSE-NEXT: subss %xmm2, %xmm1
1285 ; SSE-NEXT: cvttss2si %xmm1, %rax
1286 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1287 ; SSE-NEXT: movq %rcx, %rdx
1288 ; SSE-NEXT: sarq $63, %rdx
1289 ; SSE-NEXT: andq %rax, %rdx
1290 ; SSE-NEXT: orq %rcx, %rdx
1291 ; SSE-NEXT: movq %rdx, %xmm1
1292 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1293 ; SSE-NEXT: cvttss2si %xmm0, %rax
1294 ; SSE-NEXT: subss %xmm2, %xmm0
1295 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1296 ; SSE-NEXT: movq %rax, %rdx
1297 ; SSE-NEXT: sarq $63, %rdx
1298 ; SSE-NEXT: andq %rcx, %rdx
1299 ; SSE-NEXT: orq %rax, %rdx
1300 ; SSE-NEXT: movq %rdx, %xmm0
1301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1302 ; SSE-NEXT: movdqa %xmm1, %xmm0
1305 ; VEX-LABEL: fptoui_2f32_to_2i64:
1307 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1308 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
1309 ; VEX-NEXT: vcvttss2si %xmm2, %rax
1310 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1311 ; VEX-NEXT: movq %rcx, %rdx
1312 ; VEX-NEXT: sarq $63, %rdx
1313 ; VEX-NEXT: andq %rax, %rdx
1314 ; VEX-NEXT: orq %rcx, %rdx
1315 ; VEX-NEXT: vmovq %rdx, %xmm2
1316 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1317 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
1318 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1319 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1320 ; VEX-NEXT: movq %rcx, %rdx
1321 ; VEX-NEXT: sarq $63, %rdx
1322 ; VEX-NEXT: andq %rax, %rdx
1323 ; VEX-NEXT: orq %rcx, %rdx
1324 ; VEX-NEXT: vmovq %rdx, %xmm0
1325 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1328 ; AVX512F-LABEL: fptoui_2f32_to_2i64:
1330 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1331 ; AVX512F-NEXT: vmovq %rax, %xmm1
1332 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1333 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1334 ; AVX512F-NEXT: vmovq %rax, %xmm0
1335 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1336 ; AVX512F-NEXT: retq
1338 ; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1339 ; AVX512VL: # %bb.0:
1340 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1341 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1342 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1343 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1344 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1345 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1346 ; AVX512VL-NEXT: retq
1348 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1349 ; AVX512DQ: # %bb.0:
1350 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1351 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1352 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1353 ; AVX512DQ-NEXT: vzeroupper
1354 ; AVX512DQ-NEXT: retq
1356 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1357 ; AVX512VLDQ: # %bb.0:
1358 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1359 ; AVX512VLDQ-NEXT: retq
1360 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1361 %cvt = fptoui <2 x float> %shuf to <2 x i64>
1365 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1366 ; SSE-LABEL: fptoui_4f32_to_2i64:
1368 ; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1369 ; SSE-NEXT: movaps %xmm0, %xmm1
1370 ; SSE-NEXT: subss %xmm2, %xmm1
1371 ; SSE-NEXT: cvttss2si %xmm1, %rax
1372 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1373 ; SSE-NEXT: movq %rcx, %rdx
1374 ; SSE-NEXT: sarq $63, %rdx
1375 ; SSE-NEXT: andq %rax, %rdx
1376 ; SSE-NEXT: orq %rcx, %rdx
1377 ; SSE-NEXT: movq %rdx, %xmm1
1378 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1379 ; SSE-NEXT: cvttss2si %xmm0, %rax
1380 ; SSE-NEXT: subss %xmm2, %xmm0
1381 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1382 ; SSE-NEXT: movq %rax, %rdx
1383 ; SSE-NEXT: sarq $63, %rdx
1384 ; SSE-NEXT: andq %rcx, %rdx
1385 ; SSE-NEXT: orq %rax, %rdx
1386 ; SSE-NEXT: movq %rdx, %xmm0
1387 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1388 ; SSE-NEXT: movdqa %xmm1, %xmm0
1391 ; VEX-LABEL: fptoui_4f32_to_2i64:
1393 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1394 ; VEX-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1395 ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
1396 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1397 ; VEX-NEXT: vcvttss2si %xmm1, %rcx
1398 ; VEX-NEXT: movq %rcx, %rdx
1399 ; VEX-NEXT: sarq $63, %rdx
1400 ; VEX-NEXT: andq %rax, %rdx
1401 ; VEX-NEXT: orq %rcx, %rdx
1402 ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
1403 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1404 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1405 ; VEX-NEXT: movq %rcx, %rsi
1406 ; VEX-NEXT: sarq $63, %rsi
1407 ; VEX-NEXT: andq %rax, %rsi
1408 ; VEX-NEXT: orq %rcx, %rsi
1409 ; VEX-NEXT: vmovq %rsi, %xmm0
1410 ; VEX-NEXT: vmovq %rdx, %xmm1
1411 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1414 ; AVX512F-LABEL: fptoui_4f32_to_2i64:
1416 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1417 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1418 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1419 ; AVX512F-NEXT: vmovq %rcx, %xmm0
1420 ; AVX512F-NEXT: vmovq %rax, %xmm1
1421 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1422 ; AVX512F-NEXT: retq
1424 ; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1425 ; AVX512VL: # %bb.0:
1426 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1427 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1428 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1429 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
1430 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1431 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1432 ; AVX512VL-NEXT: retq
1434 ; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1435 ; AVX512DQ: # %bb.0:
1436 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1437 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1438 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1439 ; AVX512DQ-NEXT: vzeroupper
1440 ; AVX512DQ-NEXT: retq
1442 ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1443 ; AVX512VLDQ: # %bb.0:
1444 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1445 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1446 ; AVX512VLDQ-NEXT: vzeroupper
1447 ; AVX512VLDQ-NEXT: retq
1448 %cvt = fptoui <4 x float> %a to <4 x i64>
1449 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1453 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1454 ; SSE-LABEL: fptoui_8f32_to_8i32:
1456 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1457 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1458 ; SSE-NEXT: subps %xmm2, %xmm0
1459 ; SSE-NEXT: cvttps2dq %xmm0, %xmm4
1460 ; SSE-NEXT: movdqa %xmm3, %xmm0
1461 ; SSE-NEXT: psrad $31, %xmm0
1462 ; SSE-NEXT: pand %xmm4, %xmm0
1463 ; SSE-NEXT: por %xmm3, %xmm0
1464 ; SSE-NEXT: cvttps2dq %xmm1, %xmm3
1465 ; SSE-NEXT: subps %xmm2, %xmm1
1466 ; SSE-NEXT: cvttps2dq %xmm1, %xmm2
1467 ; SSE-NEXT: movdqa %xmm3, %xmm1
1468 ; SSE-NEXT: psrad $31, %xmm1
1469 ; SSE-NEXT: pand %xmm2, %xmm1
1470 ; SSE-NEXT: por %xmm3, %xmm1
1473 ; AVX1-LABEL: fptoui_8f32_to_8i32:
1475 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm1
1476 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1477 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
1478 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
1479 ; AVX1-NEXT: vblendvps %ymm1, %ymm0, %ymm1, %ymm0
1482 ; AVX2-LABEL: fptoui_8f32_to_8i32:
1484 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1485 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1
1486 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
1487 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
1488 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm2
1489 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1490 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1493 ; AVX512F-LABEL: fptoui_8f32_to_8i32:
1495 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1496 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1497 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1498 ; AVX512F-NEXT: retq
1500 ; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1501 ; AVX512VL: # %bb.0:
1502 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1503 ; AVX512VL-NEXT: retq
1505 ; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1506 ; AVX512DQ: # %bb.0:
1507 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1508 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1509 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1510 ; AVX512DQ-NEXT: retq
1512 ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1513 ; AVX512VLDQ: # %bb.0:
1514 ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
1515 ; AVX512VLDQ-NEXT: retq
1516 %cvt = fptoui <8 x float> %a to <8 x i32>
1520 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1521 ; SSE-LABEL: fptoui_4f32_to_4i64:
1523 ; SSE-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1524 ; SSE-NEXT: movaps %xmm0, %xmm2
1525 ; SSE-NEXT: subss %xmm1, %xmm2
1526 ; SSE-NEXT: cvttss2si %xmm2, %rax
1527 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1528 ; SSE-NEXT: movq %rcx, %rdx
1529 ; SSE-NEXT: sarq $63, %rdx
1530 ; SSE-NEXT: andq %rax, %rdx
1531 ; SSE-NEXT: orq %rcx, %rdx
1532 ; SSE-NEXT: movq %rdx, %xmm2
1533 ; SSE-NEXT: movaps %xmm0, %xmm3
1534 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1535 ; SSE-NEXT: cvttss2si %xmm3, %rax
1536 ; SSE-NEXT: subss %xmm1, %xmm3
1537 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1538 ; SSE-NEXT: movq %rax, %rdx
1539 ; SSE-NEXT: sarq $63, %rdx
1540 ; SSE-NEXT: andq %rcx, %rdx
1541 ; SSE-NEXT: orq %rax, %rdx
1542 ; SSE-NEXT: movq %rdx, %xmm3
1543 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1544 ; SSE-NEXT: movaps %xmm0, %xmm3
1545 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1546 ; SSE-NEXT: cvttss2si %xmm3, %rax
1547 ; SSE-NEXT: subss %xmm1, %xmm3
1548 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1549 ; SSE-NEXT: movq %rax, %rdx
1550 ; SSE-NEXT: sarq $63, %rdx
1551 ; SSE-NEXT: andq %rcx, %rdx
1552 ; SSE-NEXT: orq %rax, %rdx
1553 ; SSE-NEXT: movq %rdx, %xmm3
1554 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1555 ; SSE-NEXT: cvttss2si %xmm0, %rax
1556 ; SSE-NEXT: subss %xmm1, %xmm0
1557 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1558 ; SSE-NEXT: movq %rax, %rdx
1559 ; SSE-NEXT: sarq $63, %rdx
1560 ; SSE-NEXT: andq %rcx, %rdx
1561 ; SSE-NEXT: orq %rax, %rdx
1562 ; SSE-NEXT: movq %rdx, %xmm1
1563 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1564 ; SSE-NEXT: movdqa %xmm2, %xmm0
1567 ; AVX1-LABEL: fptoui_4f32_to_4i64:
1569 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1570 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1571 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1572 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1573 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1574 ; AVX1-NEXT: movq %rcx, %rdx
1575 ; AVX1-NEXT: sarq $63, %rdx
1576 ; AVX1-NEXT: andq %rax, %rdx
1577 ; AVX1-NEXT: orq %rcx, %rdx
1578 ; AVX1-NEXT: vmovq %rdx, %xmm2
1579 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1580 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1581 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1582 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1583 ; AVX1-NEXT: movq %rcx, %rdx
1584 ; AVX1-NEXT: sarq $63, %rdx
1585 ; AVX1-NEXT: andq %rax, %rdx
1586 ; AVX1-NEXT: orq %rcx, %rdx
1587 ; AVX1-NEXT: vmovq %rdx, %xmm3
1588 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1589 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1590 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1591 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1592 ; AVX1-NEXT: movq %rcx, %rdx
1593 ; AVX1-NEXT: sarq $63, %rdx
1594 ; AVX1-NEXT: andq %rax, %rdx
1595 ; AVX1-NEXT: orq %rcx, %rdx
1596 ; AVX1-NEXT: vmovq %rdx, %xmm3
1597 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1598 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1599 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1600 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1601 ; AVX1-NEXT: movq %rcx, %rdx
1602 ; AVX1-NEXT: sarq $63, %rdx
1603 ; AVX1-NEXT: andq %rax, %rdx
1604 ; AVX1-NEXT: orq %rcx, %rdx
1605 ; AVX1-NEXT: vmovq %rdx, %xmm0
1606 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1607 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1610 ; AVX2-LABEL: fptoui_4f32_to_4i64:
1612 ; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1613 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1614 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1615 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1616 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1617 ; AVX2-NEXT: movq %rcx, %rdx
1618 ; AVX2-NEXT: sarq $63, %rdx
1619 ; AVX2-NEXT: andq %rax, %rdx
1620 ; AVX2-NEXT: orq %rcx, %rdx
1621 ; AVX2-NEXT: vmovq %rdx, %xmm2
1622 ; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1623 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1624 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1625 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1626 ; AVX2-NEXT: movq %rcx, %rdx
1627 ; AVX2-NEXT: sarq $63, %rdx
1628 ; AVX2-NEXT: andq %rax, %rdx
1629 ; AVX2-NEXT: orq %rcx, %rdx
1630 ; AVX2-NEXT: vmovq %rdx, %xmm3
1631 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1632 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1633 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1634 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1635 ; AVX2-NEXT: movq %rcx, %rdx
1636 ; AVX2-NEXT: sarq $63, %rdx
1637 ; AVX2-NEXT: andq %rax, %rdx
1638 ; AVX2-NEXT: orq %rcx, %rdx
1639 ; AVX2-NEXT: vmovq %rdx, %xmm3
1640 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1641 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1642 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1643 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1644 ; AVX2-NEXT: movq %rcx, %rdx
1645 ; AVX2-NEXT: sarq $63, %rdx
1646 ; AVX2-NEXT: andq %rax, %rdx
1647 ; AVX2-NEXT: orq %rcx, %rdx
1648 ; AVX2-NEXT: vmovq %rdx, %xmm0
1649 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1650 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1653 ; AVX512F-LABEL: fptoui_4f32_to_4i64:
1655 ; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1656 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1657 ; AVX512F-NEXT: vmovq %rax, %xmm1
1658 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1659 ; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
1660 ; AVX512F-NEXT: vmovq %rax, %xmm2
1661 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1662 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1663 ; AVX512F-NEXT: vmovq %rax, %xmm2
1664 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1665 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1666 ; AVX512F-NEXT: vmovq %rax, %xmm0
1667 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1668 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1669 ; AVX512F-NEXT: retq
1671 ; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1672 ; AVX512VL: # %bb.0:
1673 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1674 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1675 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1676 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1677 ; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
1678 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1679 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1680 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1681 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1682 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1683 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1684 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1685 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1686 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1687 ; AVX512VL-NEXT: retq
1689 ; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1690 ; AVX512DQ: # %bb.0:
1691 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1692 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1693 ; AVX512DQ-NEXT: retq
1695 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1696 ; AVX512VLDQ: # %bb.0:
1697 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1698 ; AVX512VLDQ-NEXT: retq
1699 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1700 %cvt = fptoui <4 x float> %shuf to <4 x i64>
1704 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1705 ; SSE-LABEL: fptoui_8f32_to_4i64:
1707 ; SSE-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1708 ; SSE-NEXT: movaps %xmm0, %xmm2
1709 ; SSE-NEXT: subss %xmm1, %xmm2
1710 ; SSE-NEXT: cvttss2si %xmm2, %rax
1711 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1712 ; SSE-NEXT: movq %rcx, %rdx
1713 ; SSE-NEXT: sarq $63, %rdx
1714 ; SSE-NEXT: andq %rax, %rdx
1715 ; SSE-NEXT: orq %rcx, %rdx
1716 ; SSE-NEXT: movq %rdx, %xmm2
1717 ; SSE-NEXT: movaps %xmm0, %xmm3
1718 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1719 ; SSE-NEXT: cvttss2si %xmm3, %rax
1720 ; SSE-NEXT: subss %xmm1, %xmm3
1721 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1722 ; SSE-NEXT: movq %rax, %rdx
1723 ; SSE-NEXT: sarq $63, %rdx
1724 ; SSE-NEXT: andq %rcx, %rdx
1725 ; SSE-NEXT: orq %rax, %rdx
1726 ; SSE-NEXT: movq %rdx, %xmm3
1727 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1728 ; SSE-NEXT: movaps %xmm0, %xmm3
1729 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1730 ; SSE-NEXT: cvttss2si %xmm3, %rax
1731 ; SSE-NEXT: subss %xmm1, %xmm3
1732 ; SSE-NEXT: cvttss2si %xmm3, %rcx
1733 ; SSE-NEXT: movq %rax, %rdx
1734 ; SSE-NEXT: sarq $63, %rdx
1735 ; SSE-NEXT: andq %rcx, %rdx
1736 ; SSE-NEXT: orq %rax, %rdx
1737 ; SSE-NEXT: movq %rdx, %xmm3
1738 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1739 ; SSE-NEXT: cvttss2si %xmm0, %rax
1740 ; SSE-NEXT: subss %xmm1, %xmm0
1741 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1742 ; SSE-NEXT: movq %rax, %rdx
1743 ; SSE-NEXT: sarq $63, %rdx
1744 ; SSE-NEXT: andq %rcx, %rdx
1745 ; SSE-NEXT: orq %rax, %rdx
1746 ; SSE-NEXT: movq %rdx, %xmm1
1747 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1748 ; SSE-NEXT: movdqa %xmm2, %xmm0
1751 ; AVX1-LABEL: fptoui_8f32_to_4i64:
1753 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1754 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1755 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1756 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1757 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx
1758 ; AVX1-NEXT: movq %rcx, %rdx
1759 ; AVX1-NEXT: sarq $63, %rdx
1760 ; AVX1-NEXT: andq %rax, %rdx
1761 ; AVX1-NEXT: orq %rcx, %rdx
1762 ; AVX1-NEXT: vmovq %rdx, %xmm2
1763 ; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1764 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1765 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1766 ; AVX1-NEXT: vcvttss2si %xmm3, %rcx
1767 ; AVX1-NEXT: movq %rcx, %rdx
1768 ; AVX1-NEXT: sarq $63, %rdx
1769 ; AVX1-NEXT: andq %rax, %rdx
1770 ; AVX1-NEXT: orq %rcx, %rdx
1771 ; AVX1-NEXT: vmovq %rdx, %xmm3
1772 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1773 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1774 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1775 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1776 ; AVX1-NEXT: movq %rcx, %rdx
1777 ; AVX1-NEXT: sarq $63, %rdx
1778 ; AVX1-NEXT: andq %rax, %rdx
1779 ; AVX1-NEXT: orq %rcx, %rdx
1780 ; AVX1-NEXT: vmovq %rdx, %xmm3
1781 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1782 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1
1783 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1784 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1785 ; AVX1-NEXT: movq %rcx, %rdx
1786 ; AVX1-NEXT: sarq $63, %rdx
1787 ; AVX1-NEXT: andq %rax, %rdx
1788 ; AVX1-NEXT: orq %rcx, %rdx
1789 ; AVX1-NEXT: vmovq %rdx, %xmm0
1790 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1794 ; AVX2-LABEL: fptoui_8f32_to_4i64:
1796 ; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1797 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1798 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1799 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1800 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx
1801 ; AVX2-NEXT: movq %rcx, %rdx
1802 ; AVX2-NEXT: sarq $63, %rdx
1803 ; AVX2-NEXT: andq %rax, %rdx
1804 ; AVX2-NEXT: orq %rcx, %rdx
1805 ; AVX2-NEXT: vmovq %rdx, %xmm2
1806 ; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1807 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1808 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1809 ; AVX2-NEXT: vcvttss2si %xmm3, %rcx
1810 ; AVX2-NEXT: movq %rcx, %rdx
1811 ; AVX2-NEXT: sarq $63, %rdx
1812 ; AVX2-NEXT: andq %rax, %rdx
1813 ; AVX2-NEXT: orq %rcx, %rdx
1814 ; AVX2-NEXT: vmovq %rdx, %xmm3
1815 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1816 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1817 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1818 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1819 ; AVX2-NEXT: movq %rcx, %rdx
1820 ; AVX2-NEXT: sarq $63, %rdx
1821 ; AVX2-NEXT: andq %rax, %rdx
1822 ; AVX2-NEXT: orq %rcx, %rdx
1823 ; AVX2-NEXT: vmovq %rdx, %xmm3
1824 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1825 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
1826 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1827 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1828 ; AVX2-NEXT: movq %rcx, %rdx
1829 ; AVX2-NEXT: sarq $63, %rdx
1830 ; AVX2-NEXT: andq %rax, %rdx
1831 ; AVX2-NEXT: orq %rcx, %rdx
1832 ; AVX2-NEXT: vmovq %rdx, %xmm0
1833 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1834 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1837 ; AVX512F-LABEL: fptoui_8f32_to_4i64:
1839 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1840 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1841 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1842 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1843 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
1844 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1845 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
1846 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1847 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1848 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1849 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1850 ; AVX512F-NEXT: vmovq %rax, %xmm2
1851 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1852 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1853 ; AVX512F-NEXT: retq
1855 ; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1856 ; AVX512VL: # %bb.0:
1857 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1858 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1859 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1860 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1861 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
1862 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1863 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
1864 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1865 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1866 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1867 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1868 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1869 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1870 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1871 ; AVX512VL-NEXT: retq
1873 ; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1874 ; AVX512DQ: # %bb.0:
1875 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1876 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1877 ; AVX512DQ-NEXT: retq
1879 ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1880 ; AVX512VLDQ: # %bb.0:
1881 ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1882 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1883 ; AVX512VLDQ-NEXT: retq
1884 %cvt = fptoui <8 x float> %a to <8 x i64>
1885 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1893 define <2 x i64> @fptosi_2f64_to_2i64_const() {
1894 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
1896 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1899 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
1901 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1903 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1907 define <4 x i32> @fptosi_2f64_to_2i32_const() {
1908 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
1910 ; SSE-NEXT: movsd {{.*#+}} xmm0 = [4294967295,1,0,0]
1913 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
1915 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4294967295,1,0,0]
1917 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1918 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1922 define <4 x i64> @fptosi_4f64_to_4i64_const() {
1923 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
1925 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1926 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1929 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
1931 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1933 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1937 define <4 x i32> @fptosi_4f64_to_4i32_const() {
1938 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
1940 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1943 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
1945 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1947 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
1951 define <2 x i64> @fptoui_2f64_to_2i64_const() {
1952 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
1954 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1957 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
1959 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
1961 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
1965 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
1966 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
1968 ; SSE-NEXT: movsd {{.*#+}} xmm0 = [2,4,0,0]
1971 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
1973 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [2,4,0,0]
1975 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
1976 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1980 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
1981 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
1983 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
1984 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
1987 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
1989 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
1991 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
1995 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
1996 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
1998 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
2001 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
2003 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2005 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2009 define <4 x i32> @fptosi_4f32_to_4i32_const() {
2010 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
2012 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2015 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
2017 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2019 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2023 define <4 x i64> @fptosi_4f32_to_4i64_const() {
2024 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
2026 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2027 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
2030 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
2032 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2034 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2038 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2039 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
2041 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2042 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2045 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
2047 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2049 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2053 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2054 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
2056 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2059 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
2061 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2063 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2067 define <4 x i64> @fptoui_4f32_to_4i64_const() {
2068 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
2070 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
2071 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
2074 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
2076 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2078 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2082 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2083 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
2085 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2086 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
2089 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
2091 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2093 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2097 define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2098 ; SSE-LABEL: fptosi_2f80_to_4i32:
2100 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2101 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2102 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2103 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2104 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2105 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2106 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2107 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2108 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2109 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2110 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2111 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2112 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2113 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2114 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2115 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2116 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2117 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2118 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2119 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2122 ; AVX-LABEL: fptosi_2f80_to_4i32:
2124 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2125 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2126 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2127 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2128 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2129 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2130 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2131 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2133 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2134 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2138 define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2139 ; SSE-LABEL: fptosi_2f128_to_4i32:
2141 ; SSE-NEXT: pushq %rbx
2142 ; SSE-NEXT: subq $16, %rsp
2143 ; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
2144 ; SSE-NEXT: callq __fixtfsi@PLT
2145 ; SSE-NEXT: movl %eax, %ebx
2146 ; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2147 ; SSE-NEXT: callq __fixtfsi@PLT
2148 ; SSE-NEXT: movd %eax, %xmm0
2149 ; SSE-NEXT: movd %ebx, %xmm1
2150 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2151 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2152 ; SSE-NEXT: addq $16, %rsp
2153 ; SSE-NEXT: popq %rbx
2156 ; AVX-LABEL: fptosi_2f128_to_4i32:
2158 ; AVX-NEXT: pushq %rbx
2159 ; AVX-NEXT: subq $16, %rsp
2160 ; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
2161 ; AVX-NEXT: callq __fixtfsi@PLT
2162 ; AVX-NEXT: movl %eax, %ebx
2163 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
2164 ; AVX-NEXT: callq __fixtfsi@PLT
2165 ; AVX-NEXT: vmovd %eax, %xmm0
2166 ; AVX-NEXT: vmovd %ebx, %xmm1
2167 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2168 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2169 ; AVX-NEXT: addq $16, %rsp
2170 ; AVX-NEXT: popq %rbx
2172 %cvt = fptosi <2 x fp128> %a to <2 x i32>
2173 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2177 define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2178 ; SSE-LABEL: fptosi_2f32_to_2i8:
2180 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2181 ; SSE-NEXT: packssdw %xmm0, %xmm0
2182 ; SSE-NEXT: packsswb %xmm0, %xmm0
2185 ; VEX-LABEL: fptosi_2f32_to_2i8:
2187 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2188 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2189 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2192 ; AVX512F-LABEL: fptosi_2f32_to_2i8:
2194 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2195 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2196 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2197 ; AVX512F-NEXT: retq
2199 ; AVX512VL-LABEL: fptosi_2f32_to_2i8:
2200 ; AVX512VL: # %bb.0:
2201 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2202 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2203 ; AVX512VL-NEXT: retq
2205 ; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
2206 ; AVX512DQ: # %bb.0:
2207 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2208 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2209 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2210 ; AVX512DQ-NEXT: retq
2212 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
2213 ; AVX512VLDQ: # %bb.0:
2214 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2215 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2216 ; AVX512VLDQ-NEXT: retq
2217 %cvt = fptosi <2 x float> %a to <2 x i8>
2221 define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2222 ; SSE-LABEL: fptosi_2f32_to_2i16:
2224 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2225 ; SSE-NEXT: packssdw %xmm0, %xmm0
2228 ; AVX-LABEL: fptosi_2f32_to_2i16:
2230 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2231 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2233 %cvt = fptosi <2 x float> %a to <2 x i16>
2237 define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2238 ; SSE-LABEL: fptoui_2f32_to_2i8:
2240 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2241 ; SSE-NEXT: packuswb %xmm0, %xmm0
2242 ; SSE-NEXT: packuswb %xmm0, %xmm0
2245 ; VEX-LABEL: fptoui_2f32_to_2i8:
2247 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
2248 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2249 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2252 ; AVX512F-LABEL: fptoui_2f32_to_2i8:
2254 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2255 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2256 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2257 ; AVX512F-NEXT: retq
2259 ; AVX512VL-LABEL: fptoui_2f32_to_2i8:
2260 ; AVX512VL: # %bb.0:
2261 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2262 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2263 ; AVX512VL-NEXT: retq
2265 ; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
2266 ; AVX512DQ: # %bb.0:
2267 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2268 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2269 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2270 ; AVX512DQ-NEXT: retq
2272 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
2273 ; AVX512VLDQ: # %bb.0:
2274 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2275 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2276 ; AVX512VLDQ-NEXT: retq
2277 %cvt = fptoui <2 x float> %a to <2 x i8>
2281 define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2282 ; SSE-LABEL: fptoui_2f32_to_2i16:
2284 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2285 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2288 ; AVX-LABEL: fptoui_2f32_to_2i16:
2290 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2291 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2293 %cvt = fptoui <2 x float> %a to <2 x i16>
2297 define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2298 ; SSE-LABEL: fptosi_2f64_to_2i8:
2300 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2301 ; SSE-NEXT: packssdw %xmm0, %xmm0
2302 ; SSE-NEXT: packsswb %xmm0, %xmm0
2305 ; VEX-LABEL: fptosi_2f64_to_2i8:
2307 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2308 ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2309 ; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2312 ; AVX512F-LABEL: fptosi_2f64_to_2i8:
2314 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2315 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2316 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2317 ; AVX512F-NEXT: retq
2319 ; AVX512VL-LABEL: fptosi_2f64_to_2i8:
2320 ; AVX512VL: # %bb.0:
2321 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2322 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2323 ; AVX512VL-NEXT: retq
2325 ; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
2326 ; AVX512DQ: # %bb.0:
2327 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2328 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2329 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2330 ; AVX512DQ-NEXT: retq
2332 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
2333 ; AVX512VLDQ: # %bb.0:
2334 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2335 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2336 ; AVX512VLDQ-NEXT: retq
2337 %cvt = fptosi <2 x double> %a to <2 x i8>
2341 define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2342 ; SSE-LABEL: fptosi_2f64_to_2i16:
2344 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2345 ; SSE-NEXT: packssdw %xmm0, %xmm0
2348 ; AVX-LABEL: fptosi_2f64_to_2i16:
2350 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2351 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2353 %cvt = fptosi <2 x double> %a to <2 x i16>
2357 define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2358 ; SSE-LABEL: fptoui_2f64_to_2i8:
2360 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2361 ; SSE-NEXT: packuswb %xmm0, %xmm0
2362 ; SSE-NEXT: packuswb %xmm0, %xmm0
2365 ; VEX-LABEL: fptoui_2f64_to_2i8:
2367 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
2368 ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2369 ; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2372 ; AVX512F-LABEL: fptoui_2f64_to_2i8:
2374 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2375 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2376 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2377 ; AVX512F-NEXT: retq
2379 ; AVX512VL-LABEL: fptoui_2f64_to_2i8:
2380 ; AVX512VL: # %bb.0:
2381 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2382 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2383 ; AVX512VL-NEXT: retq
2385 ; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
2386 ; AVX512DQ: # %bb.0:
2387 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2388 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2389 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2390 ; AVX512DQ-NEXT: retq
2392 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
2393 ; AVX512VLDQ: # %bb.0:
2394 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2395 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2396 ; AVX512VLDQ-NEXT: retq
2397 %cvt = fptoui <2 x double> %a to <2 x i8>
2401 define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2402 ; SSE-LABEL: fptoui_2f64_to_2i16:
2404 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2405 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2408 ; AVX-LABEL: fptoui_2f64_to_2i16:
2410 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2411 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2413 %cvt = fptoui <2 x double> %a to <2 x i16>
2417 define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2418 ; SSE-LABEL: fptosi_8f64_to_8i16:
2420 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2421 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2422 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2423 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2424 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2425 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2426 ; SSE-NEXT: packssdw %xmm2, %xmm0
2429 ; VEX-LABEL: fptosi_8f64_to_8i16:
2431 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2432 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2433 ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2434 ; VEX-NEXT: vzeroupper
2437 ; AVX512F-LABEL: fptosi_8f64_to_8i16:
2439 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2440 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2441 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2442 ; AVX512F-NEXT: vzeroupper
2443 ; AVX512F-NEXT: retq
2445 ; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2446 ; AVX512VL: # %bb.0:
2447 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2448 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2449 ; AVX512VL-NEXT: vzeroupper
2450 ; AVX512VL-NEXT: retq
2452 ; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2453 ; AVX512DQ: # %bb.0:
2454 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2455 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2456 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2457 ; AVX512DQ-NEXT: vzeroupper
2458 ; AVX512DQ-NEXT: retq
2460 ; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2461 ; AVX512VLDQ: # %bb.0:
2462 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2463 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2464 ; AVX512VLDQ-NEXT: vzeroupper
2465 ; AVX512VLDQ-NEXT: retq
2466 %cvt = fptosi <8 x double> %a to <8 x i16>
2470 define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2471 ; SSE-LABEL: fptoui_8f64_to_8i16:
2473 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2474 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2475 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2476 ; SSE-NEXT: pslld $16, %xmm2
2477 ; SSE-NEXT: psrad $16, %xmm2
2478 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2479 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2480 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2481 ; SSE-NEXT: pslld $16, %xmm0
2482 ; SSE-NEXT: psrad $16, %xmm0
2483 ; SSE-NEXT: packssdw %xmm2, %xmm0
2486 ; VEX-LABEL: fptoui_8f64_to_8i16:
2488 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2489 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2490 ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2491 ; VEX-NEXT: vzeroupper
2494 ; AVX512F-LABEL: fptoui_8f64_to_8i16:
2496 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2497 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2498 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2499 ; AVX512F-NEXT: vzeroupper
2500 ; AVX512F-NEXT: retq
2502 ; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2503 ; AVX512VL: # %bb.0:
2504 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2505 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2506 ; AVX512VL-NEXT: vzeroupper
2507 ; AVX512VL-NEXT: retq
2509 ; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2510 ; AVX512DQ: # %bb.0:
2511 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2512 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2513 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2514 ; AVX512DQ-NEXT: vzeroupper
2515 ; AVX512DQ-NEXT: retq
2517 ; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2518 ; AVX512VLDQ: # %bb.0:
2519 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2520 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2521 ; AVX512VLDQ-NEXT: vzeroupper
2522 ; AVX512VLDQ-NEXT: retq
2523 %cvt = fptoui <8 x double> %a to <8 x i16>
2527 define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2528 ; SSE-LABEL: fptosi_16f32_to_16i8:
2530 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2531 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2532 ; SSE-NEXT: packssdw %xmm3, %xmm2
2533 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2534 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2535 ; SSE-NEXT: packssdw %xmm1, %xmm0
2536 ; SSE-NEXT: packsswb %xmm2, %xmm0
2539 ; AVX1-LABEL: fptosi_16f32_to_16i8:
2541 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2542 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2543 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2544 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2545 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2546 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2547 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2548 ; AVX1-NEXT: vzeroupper
2551 ; AVX2-LABEL: fptosi_16f32_to_16i8:
2553 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2554 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2555 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2556 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2557 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2558 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2559 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2560 ; AVX2-NEXT: vzeroupper
2563 ; AVX512-LABEL: fptosi_16f32_to_16i8:
2565 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2566 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2567 ; AVX512-NEXT: vzeroupper
2569 %cvt = fptosi <16 x float> %a to <16 x i8>
2573 define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2574 ; SSE-LABEL: fptoui_16f32_to_16i8:
2576 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2577 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2578 ; SSE-NEXT: packssdw %xmm3, %xmm2
2579 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2580 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2581 ; SSE-NEXT: packssdw %xmm1, %xmm0
2582 ; SSE-NEXT: packuswb %xmm2, %xmm0
2585 ; AVX1-LABEL: fptoui_16f32_to_16i8:
2587 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2588 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2589 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2590 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2591 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2592 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2593 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2594 ; AVX1-NEXT: vzeroupper
2597 ; AVX2-LABEL: fptoui_16f32_to_16i8:
2599 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2600 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2601 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2602 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2603 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2604 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2605 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2606 ; AVX2-NEXT: vzeroupper
2609 ; AVX512-LABEL: fptoui_16f32_to_16i8:
2611 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2612 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2613 ; AVX512-NEXT: vzeroupper
2615 %cvt = fptoui <16 x float> %a to <16 x i8>
2619 define <2 x i64> @fptosi_2f32_to_2i64_load(ptr %x) {
2620 ; SSE-LABEL: fptosi_2f32_to_2i64_load:
2622 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2623 ; SSE-NEXT: cvttss2si %xmm1, %rax
2624 ; SSE-NEXT: movq %rax, %xmm0
2625 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2626 ; SSE-NEXT: cvttss2si %xmm1, %rax
2627 ; SSE-NEXT: movq %rax, %xmm1
2628 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2631 ; VEX-LABEL: fptosi_2f32_to_2i64_load:
2633 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2634 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2635 ; VEX-NEXT: vmovq %rax, %xmm1
2636 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2637 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2638 ; VEX-NEXT: vmovq %rax, %xmm0
2639 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2642 ; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2644 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2645 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2646 ; AVX512F-NEXT: vmovq %rax, %xmm1
2647 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2648 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2649 ; AVX512F-NEXT: vmovq %rax, %xmm0
2650 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2651 ; AVX512F-NEXT: retq
2653 ; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2654 ; AVX512VL: # %bb.0:
2655 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2656 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2657 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2658 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2659 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2660 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2661 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2662 ; AVX512VL-NEXT: retq
2664 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2665 ; AVX512DQ: # %bb.0:
2666 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2667 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
2668 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2669 ; AVX512DQ-NEXT: vzeroupper
2670 ; AVX512DQ-NEXT: retq
2672 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2673 ; AVX512VLDQ: # %bb.0:
2674 ; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
2675 ; AVX512VLDQ-NEXT: retq
2676 %a = load <2 x float>, ptr %x
2677 %b = fptosi <2 x float> %a to <2 x i64>
2681 define <2 x i64> @fptoui_2f32_to_2i64_load(ptr %x) {
2682 ; SSE-LABEL: fptoui_2f32_to_2i64_load:
2684 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2685 ; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2686 ; SSE-NEXT: movaps %xmm1, %xmm0
2687 ; SSE-NEXT: subss %xmm2, %xmm0
2688 ; SSE-NEXT: cvttss2si %xmm0, %rax
2689 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2690 ; SSE-NEXT: movq %rcx, %rdx
2691 ; SSE-NEXT: sarq $63, %rdx
2692 ; SSE-NEXT: andq %rax, %rdx
2693 ; SSE-NEXT: orq %rcx, %rdx
2694 ; SSE-NEXT: movq %rdx, %xmm0
2695 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2696 ; SSE-NEXT: cvttss2si %xmm1, %rax
2697 ; SSE-NEXT: subss %xmm2, %xmm1
2698 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2699 ; SSE-NEXT: movq %rax, %rdx
2700 ; SSE-NEXT: sarq $63, %rdx
2701 ; SSE-NEXT: andq %rcx, %rdx
2702 ; SSE-NEXT: orq %rax, %rdx
2703 ; SSE-NEXT: movq %rdx, %xmm1
2704 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2707 ; VEX-LABEL: fptoui_2f32_to_2i64_load:
2709 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2710 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2711 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
2712 ; VEX-NEXT: vcvttss2si %xmm2, %rax
2713 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2714 ; VEX-NEXT: movq %rcx, %rdx
2715 ; VEX-NEXT: sarq $63, %rdx
2716 ; VEX-NEXT: andq %rax, %rdx
2717 ; VEX-NEXT: orq %rcx, %rdx
2718 ; VEX-NEXT: vmovq %rdx, %xmm2
2719 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2720 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1
2721 ; VEX-NEXT: vcvttss2si %xmm1, %rax
2722 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2723 ; VEX-NEXT: movq %rcx, %rdx
2724 ; VEX-NEXT: sarq $63, %rdx
2725 ; VEX-NEXT: andq %rax, %rdx
2726 ; VEX-NEXT: orq %rcx, %rdx
2727 ; VEX-NEXT: vmovq %rdx, %xmm0
2728 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2731 ; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2733 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2734 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2735 ; AVX512F-NEXT: vmovq %rax, %xmm1
2736 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2737 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2738 ; AVX512F-NEXT: vmovq %rax, %xmm0
2739 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2740 ; AVX512F-NEXT: retq
2742 ; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2743 ; AVX512VL: # %bb.0:
2744 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2745 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2746 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2747 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2748 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2749 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2750 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2751 ; AVX512VL-NEXT: retq
2753 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2754 ; AVX512DQ: # %bb.0:
2755 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2756 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
2757 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2758 ; AVX512DQ-NEXT: vzeroupper
2759 ; AVX512DQ-NEXT: retq
2761 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2762 ; AVX512VLDQ: # %bb.0:
2763 ; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
2764 ; AVX512VLDQ-NEXT: retq
2765 %a = load <2 x float>, ptr %x
2766 %b = fptoui <2 x float> %a to <2 x i64>