1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
4 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
5 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
7 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
8 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Double to Signed Integer
19 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20 ; SSE-LABEL: fptosi_2f64_to_2i64:
22 ; SSE-NEXT: cvttsd2si %xmm0, %rax
23 ; SSE-NEXT: movq %rax, %xmm1
24 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
25 ; SSE-NEXT: cvttsd2si %xmm0, %rax
26 ; SSE-NEXT: movq %rax, %xmm0
27 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movdqa %xmm1, %xmm0
31 ; VEX-LABEL: fptosi_2f64_to_2i64:
33 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
34 ; VEX-NEXT: vmovq %rax, %xmm1
35 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
36 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
37 ; VEX-NEXT: vmovq %rax, %xmm0
38 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41 ; AVX512F-LABEL: fptosi_2f64_to_2i64:
43 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
44 ; AVX512F-NEXT: vmovq %rax, %xmm1
45 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
46 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
47 ; AVX512F-NEXT: vmovq %rax, %xmm0
48 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
51 ; AVX512VL-LABEL: fptosi_2f64_to_2i64:
53 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
54 ; AVX512VL-NEXT: vmovq %rax, %xmm1
55 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
56 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
57 ; AVX512VL-NEXT: vmovq %rax, %xmm0
58 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
61 ; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
63 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
64 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
65 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512DQ-NEXT: vzeroupper
69 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70 ; AVX512VLDQ: # %bb.0:
71 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
72 ; AVX512VLDQ-NEXT: retq
73 %cvt = fptosi <2 x double> %a to <2 x i64>
77 define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78 ; SSE-LABEL: fptosi_2f64_to_4i32:
80 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
83 ; AVX-LABEL: fptosi_2f64_to_4i32:
85 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
87 %cvt = fptosi <2 x double> %a to <2 x i32>
88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92 define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93 ; SSE-LABEL: fptosi_2f64_to_2i32:
95 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
98 ; AVX-LABEL: fptosi_2f64_to_2i32:
100 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
102 %cvt = fptosi <2 x double> %a to <2 x i32>
106 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107 ; SSE-LABEL: fptosi_4f64_to_2i32:
109 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
112 ; AVX-LABEL: fptosi_4f64_to_2i32:
114 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
115 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
116 ; AVX-NEXT: vzeroupper
118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119 %cvt = fptosi <4 x double> %ext to <4 x i32>
123 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124 ; SSE-LABEL: fptosi_4f64_to_4i64:
126 ; SSE-NEXT: cvttsd2si %xmm0, %rax
127 ; SSE-NEXT: movq %rax, %xmm2
128 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
129 ; SSE-NEXT: cvttsd2si %xmm0, %rax
130 ; SSE-NEXT: movq %rax, %xmm0
131 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132 ; SSE-NEXT: cvttsd2si %xmm1, %rax
133 ; SSE-NEXT: movq %rax, %xmm3
134 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
135 ; SSE-NEXT: cvttsd2si %xmm1, %rax
136 ; SSE-NEXT: movq %rax, %xmm0
137 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138 ; SSE-NEXT: movdqa %xmm2, %xmm0
139 ; SSE-NEXT: movdqa %xmm3, %xmm1
142 ; AVX1-LABEL: fptosi_4f64_to_4i64:
144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
145 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
146 ; AVX1-NEXT: vmovq %rax, %xmm2
147 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
148 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
149 ; AVX1-NEXT: vmovq %rax, %xmm1
150 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
152 ; AVX1-NEXT: vmovq %rax, %xmm2
153 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
154 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
155 ; AVX1-NEXT: vmovq %rax, %xmm0
156 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
160 ; AVX2-LABEL: fptosi_4f64_to_4i64:
162 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
163 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
164 ; AVX2-NEXT: vmovq %rax, %xmm2
165 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
166 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
167 ; AVX2-NEXT: vmovq %rax, %xmm1
168 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
170 ; AVX2-NEXT: vmovq %rax, %xmm2
171 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
172 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
173 ; AVX2-NEXT: vmovq %rax, %xmm0
174 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
178 ; AVX512F-LABEL: fptosi_4f64_to_4i64:
180 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
181 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
182 ; AVX512F-NEXT: vmovq %rax, %xmm2
183 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
184 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
185 ; AVX512F-NEXT: vmovq %rax, %xmm1
186 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
188 ; AVX512F-NEXT: vmovq %rax, %xmm2
189 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
190 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
191 ; AVX512F-NEXT: vmovq %rax, %xmm0
192 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
196 ; AVX512VL-LABEL: fptosi_4f64_to_4i64:
198 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
199 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
200 ; AVX512VL-NEXT: vmovq %rax, %xmm2
201 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
202 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
203 ; AVX512VL-NEXT: vmovq %rax, %xmm1
204 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
206 ; AVX512VL-NEXT: vmovq %rax, %xmm2
207 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
208 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
209 ; AVX512VL-NEXT: vmovq %rax, %xmm0
210 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
212 ; AVX512VL-NEXT: retq
214 ; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
216 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
217 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
218 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
219 ; AVX512DQ-NEXT: retq
221 ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222 ; AVX512VLDQ: # %bb.0:
223 ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
224 ; AVX512VLDQ-NEXT: retq
225 %cvt = fptosi <4 x double> %a to <4 x i64>
229 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230 ; SSE-LABEL: fptosi_4f64_to_4i32:
232 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
233 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX-LABEL: fptosi_4f64_to_4i32:
239 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
240 ; AVX-NEXT: vzeroupper
242 %cvt = fptosi <4 x double> %a to <4 x i32>
247 ; Double to Unsigned Integer
250 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251 ; SSE-LABEL: fptoui_2f64_to_2i64:
253 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
254 ; SSE-NEXT: movapd %xmm0, %xmm1
255 ; SSE-NEXT: subsd %xmm2, %xmm1
256 ; SSE-NEXT: cvttsd2si %xmm1, %rax
257 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
258 ; SSE-NEXT: xorq %rcx, %rax
259 ; SSE-NEXT: cvttsd2si %xmm0, %rdx
260 ; SSE-NEXT: ucomisd %xmm2, %xmm0
261 ; SSE-NEXT: cmovaeq %rax, %rdx
262 ; SSE-NEXT: movq %rdx, %xmm1
263 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264 ; SSE-NEXT: movapd %xmm0, %xmm3
265 ; SSE-NEXT: subsd %xmm2, %xmm3
266 ; SSE-NEXT: cvttsd2si %xmm3, %rax
267 ; SSE-NEXT: xorq %rcx, %rax
268 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
269 ; SSE-NEXT: ucomisd %xmm2, %xmm0
270 ; SSE-NEXT: cmovaeq %rax, %rcx
271 ; SSE-NEXT: movq %rcx, %xmm0
272 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273 ; SSE-NEXT: movdqa %xmm1, %xmm0
276 ; VEX-LABEL: fptoui_2f64_to_2i64:
278 ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
279 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
280 ; VEX-NEXT: vcvttsd2si %xmm2, %rax
281 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
282 ; VEX-NEXT: xorq %rcx, %rax
283 ; VEX-NEXT: vcvttsd2si %xmm0, %rdx
284 ; VEX-NEXT: vucomisd %xmm1, %xmm0
285 ; VEX-NEXT: cmovaeq %rax, %rdx
286 ; VEX-NEXT: vmovq %rdx, %xmm2
287 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
288 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
289 ; VEX-NEXT: vcvttsd2si %xmm3, %rax
290 ; VEX-NEXT: xorq %rcx, %rax
291 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
292 ; VEX-NEXT: vucomisd %xmm1, %xmm0
293 ; VEX-NEXT: cmovaeq %rax, %rcx
294 ; VEX-NEXT: vmovq %rcx, %xmm0
295 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
298 ; AVX512F-LABEL: fptoui_2f64_to_2i64:
300 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
301 ; AVX512F-NEXT: vmovq %rax, %xmm1
302 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
303 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
304 ; AVX512F-NEXT: vmovq %rax, %xmm0
305 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
308 ; AVX512VL-LABEL: fptoui_2f64_to_2i64:
310 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
311 ; AVX512VL-NEXT: vmovq %rax, %xmm1
312 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
313 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
314 ; AVX512VL-NEXT: vmovq %rax, %xmm0
315 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
316 ; AVX512VL-NEXT: retq
318 ; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
320 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
321 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
322 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
323 ; AVX512DQ-NEXT: vzeroupper
324 ; AVX512DQ-NEXT: retq
326 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
327 ; AVX512VLDQ: # %bb.0:
328 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
329 ; AVX512VLDQ-NEXT: retq
330 %cvt = fptoui <2 x double> %a to <2 x i64>
334 define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
335 ; SSE-LABEL: fptoui_2f64_to_4i32:
337 ; SSE-NEXT: cvttsd2si %xmm0, %rax
338 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
339 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
340 ; SSE-NEXT: movd %eax, %xmm0
341 ; SSE-NEXT: movd %ecx, %xmm1
342 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
343 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
346 ; AVX1-LABEL: fptoui_2f64_to_4i32:
348 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
349 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
350 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
351 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
352 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
353 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
354 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
355 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
356 ; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
357 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
358 ; AVX1-NEXT: vzeroupper
361 ; AVX2-LABEL: fptoui_2f64_to_4i32:
363 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
364 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
365 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
366 ; AVX2-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
367 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
368 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
369 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
370 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
371 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
372 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
373 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
374 ; AVX2-NEXT: vzeroupper
377 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
379 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
380 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
381 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
382 ; AVX512F-NEXT: vzeroupper
385 ; AVX512VL-LABEL: fptoui_2f64_to_4i32:
387 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
388 ; AVX512VL-NEXT: retq
390 ; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
392 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
393 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
394 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
395 ; AVX512DQ-NEXT: vzeroupper
396 ; AVX512DQ-NEXT: retq
398 ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
399 ; AVX512VLDQ: # %bb.0:
400 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
401 ; AVX512VLDQ-NEXT: retq
402 %cvt = fptoui <2 x double> %a to <2 x i32>
403 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
408 ; SSE-LABEL: fptoui_2f64_to_2i32:
410 ; SSE-NEXT: cvttsd2si %xmm0, %rax
411 ; SSE-NEXT: movd %eax, %xmm1
412 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
413 ; SSE-NEXT: cvttsd2si %xmm0, %rax
414 ; SSE-NEXT: movd %eax, %xmm0
415 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
416 ; SSE-NEXT: movdqa %xmm1, %xmm0
419 ; AVX1-LABEL: fptoui_2f64_to_2i32:
421 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
422 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
423 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
424 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
425 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
426 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
427 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
428 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
429 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
430 ; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
431 ; AVX1-NEXT: vzeroupper
434 ; AVX2-LABEL: fptoui_2f64_to_2i32:
436 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
437 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
438 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
439 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
440 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
441 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
442 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
443 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
444 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
445 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
446 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
447 ; AVX2-NEXT: vzeroupper
450 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
452 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
453 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
454 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
455 ; AVX512F-NEXT: vzeroupper
458 ; AVX512VL-LABEL: fptoui_2f64_to_2i32:
460 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
461 ; AVX512VL-NEXT: retq
463 ; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
465 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
466 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
467 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
468 ; AVX512DQ-NEXT: vzeroupper
469 ; AVX512DQ-NEXT: retq
471 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
472 ; AVX512VLDQ: # %bb.0:
473 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
474 ; AVX512VLDQ-NEXT: retq
475 %cvt = fptoui <2 x double> %a to <2 x i32>
476 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
480 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
481 ; SSE-LABEL: fptoui_4f64_to_2i32:
483 ; SSE-NEXT: cvttsd2si %xmm0, %rax
484 ; SSE-NEXT: movd %eax, %xmm1
485 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
486 ; SSE-NEXT: cvttsd2si %xmm0, %rax
487 ; SSE-NEXT: movd %eax, %xmm0
488 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
489 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
492 ; AVX1-LABEL: fptoui_4f64_to_2i32:
494 ; AVX1-NEXT: vmovapd %xmm0, %xmm0
495 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
496 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
497 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
498 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
499 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1
500 ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
501 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
502 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
503 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
504 ; AVX1-NEXT: vzeroupper
507 ; AVX2-LABEL: fptoui_4f64_to_2i32:
509 ; AVX2-NEXT: vmovapd %xmm0, %xmm0
510 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
511 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
512 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
513 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
514 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
515 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
516 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
517 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
518 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
519 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
520 ; AVX2-NEXT: vzeroupper
523 ; AVX512F-LABEL: fptoui_4f64_to_2i32:
525 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
526 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
527 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
528 ; AVX512F-NEXT: vzeroupper
531 ; AVX512VL-LABEL: fptoui_4f64_to_2i32:
533 ; AVX512VL-NEXT: vmovaps %xmm0, %xmm0
534 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
535 ; AVX512VL-NEXT: vzeroupper
536 ; AVX512VL-NEXT: retq
538 ; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
540 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
541 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
542 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
543 ; AVX512DQ-NEXT: vzeroupper
544 ; AVX512DQ-NEXT: retq
546 ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
547 ; AVX512VLDQ: # %bb.0:
548 ; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0
549 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
550 ; AVX512VLDQ-NEXT: vzeroupper
551 ; AVX512VLDQ-NEXT: retq
552 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
553 %cvt = fptoui <4 x double> %ext to <4 x i32>
557 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
558 ; SSE-LABEL: fptoui_4f64_to_4i64:
560 ; SSE-NEXT: movapd %xmm0, %xmm2
561 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
562 ; SSE-NEXT: subsd %xmm3, %xmm0
563 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
564 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
565 ; SSE-NEXT: xorq %rax, %rcx
566 ; SSE-NEXT: cvttsd2si %xmm2, %rdx
567 ; SSE-NEXT: ucomisd %xmm3, %xmm2
568 ; SSE-NEXT: cmovaeq %rcx, %rdx
569 ; SSE-NEXT: movq %rdx, %xmm0
570 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
571 ; SSE-NEXT: movapd %xmm2, %xmm4
572 ; SSE-NEXT: subsd %xmm3, %xmm4
573 ; SSE-NEXT: cvttsd2si %xmm4, %rcx
574 ; SSE-NEXT: xorq %rax, %rcx
575 ; SSE-NEXT: cvttsd2si %xmm2, %rdx
576 ; SSE-NEXT: ucomisd %xmm3, %xmm2
577 ; SSE-NEXT: cmovaeq %rcx, %rdx
578 ; SSE-NEXT: movq %rdx, %xmm2
579 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
580 ; SSE-NEXT: movapd %xmm1, %xmm2
581 ; SSE-NEXT: subsd %xmm3, %xmm2
582 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
583 ; SSE-NEXT: xorq %rax, %rcx
584 ; SSE-NEXT: cvttsd2si %xmm1, %rdx
585 ; SSE-NEXT: ucomisd %xmm3, %xmm1
586 ; SSE-NEXT: cmovaeq %rcx, %rdx
587 ; SSE-NEXT: movq %rdx, %xmm2
588 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
589 ; SSE-NEXT: movapd %xmm1, %xmm4
590 ; SSE-NEXT: subsd %xmm3, %xmm4
591 ; SSE-NEXT: cvttsd2si %xmm4, %rcx
592 ; SSE-NEXT: xorq %rax, %rcx
593 ; SSE-NEXT: cvttsd2si %xmm1, %rax
594 ; SSE-NEXT: ucomisd %xmm3, %xmm1
595 ; SSE-NEXT: cmovaeq %rcx, %rax
596 ; SSE-NEXT: movq %rax, %xmm1
597 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
598 ; SSE-NEXT: movdqa %xmm2, %xmm1
601 ; AVX1-LABEL: fptoui_4f64_to_4i64:
603 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
604 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
605 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
606 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
607 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
608 ; AVX1-NEXT: xorq %rcx, %rax
609 ; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
610 ; AVX1-NEXT: vucomisd %xmm1, %xmm2
611 ; AVX1-NEXT: cmovaeq %rax, %rdx
612 ; AVX1-NEXT: vmovq %rdx, %xmm3
613 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
614 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
615 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
616 ; AVX1-NEXT: xorq %rcx, %rax
617 ; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
618 ; AVX1-NEXT: vucomisd %xmm1, %xmm2
619 ; AVX1-NEXT: cmovaeq %rax, %rdx
620 ; AVX1-NEXT: vmovq %rdx, %xmm2
621 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
622 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
623 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
624 ; AVX1-NEXT: xorq %rcx, %rax
625 ; AVX1-NEXT: vcvttsd2si %xmm0, %rdx
626 ; AVX1-NEXT: vucomisd %xmm1, %xmm0
627 ; AVX1-NEXT: cmovaeq %rax, %rdx
628 ; AVX1-NEXT: vmovq %rdx, %xmm3
629 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
630 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4
631 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
632 ; AVX1-NEXT: xorq %rcx, %rax
633 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
634 ; AVX1-NEXT: vucomisd %xmm1, %xmm0
635 ; AVX1-NEXT: cmovaeq %rax, %rcx
636 ; AVX1-NEXT: vmovq %rcx, %xmm0
637 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
641 ; AVX2-LABEL: fptoui_4f64_to_4i64:
643 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
644 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
645 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
646 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
647 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
648 ; AVX2-NEXT: xorq %rcx, %rax
649 ; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
650 ; AVX2-NEXT: vucomisd %xmm1, %xmm2
651 ; AVX2-NEXT: cmovaeq %rax, %rdx
652 ; AVX2-NEXT: vmovq %rdx, %xmm3
653 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
654 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
655 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
656 ; AVX2-NEXT: xorq %rcx, %rax
657 ; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
658 ; AVX2-NEXT: vucomisd %xmm1, %xmm2
659 ; AVX2-NEXT: cmovaeq %rax, %rdx
660 ; AVX2-NEXT: vmovq %rdx, %xmm2
661 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
662 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
663 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
664 ; AVX2-NEXT: xorq %rcx, %rax
665 ; AVX2-NEXT: vcvttsd2si %xmm0, %rdx
666 ; AVX2-NEXT: vucomisd %xmm1, %xmm0
667 ; AVX2-NEXT: cmovaeq %rax, %rdx
668 ; AVX2-NEXT: vmovq %rdx, %xmm3
669 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
670 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4
671 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
672 ; AVX2-NEXT: xorq %rcx, %rax
673 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
674 ; AVX2-NEXT: vucomisd %xmm1, %xmm0
675 ; AVX2-NEXT: cmovaeq %rax, %rcx
676 ; AVX2-NEXT: vmovq %rcx, %xmm0
677 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
678 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
681 ; AVX512F-LABEL: fptoui_4f64_to_4i64:
683 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
684 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
685 ; AVX512F-NEXT: vmovq %rax, %xmm2
686 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
687 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
688 ; AVX512F-NEXT: vmovq %rax, %xmm1
689 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
690 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
691 ; AVX512F-NEXT: vmovq %rax, %xmm2
692 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
693 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
694 ; AVX512F-NEXT: vmovq %rax, %xmm0
695 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
696 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
699 ; AVX512VL-LABEL: fptoui_4f64_to_4i64:
701 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
702 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
703 ; AVX512VL-NEXT: vmovq %rax, %xmm2
704 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
705 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
706 ; AVX512VL-NEXT: vmovq %rax, %xmm1
707 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
708 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
709 ; AVX512VL-NEXT: vmovq %rax, %xmm2
710 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
711 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
712 ; AVX512VL-NEXT: vmovq %rax, %xmm0
713 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
714 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
715 ; AVX512VL-NEXT: retq
717 ; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
719 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
720 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
721 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
722 ; AVX512DQ-NEXT: retq
724 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
725 ; AVX512VLDQ: # %bb.0:
726 ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
727 ; AVX512VLDQ-NEXT: retq
728 %cvt = fptoui <4 x double> %a to <4 x i64>
732 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
733 ; SSE-LABEL: fptoui_4f64_to_4i32:
735 ; SSE-NEXT: cvttsd2si %xmm1, %rax
736 ; SSE-NEXT: movd %eax, %xmm2
737 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
738 ; SSE-NEXT: cvttsd2si %xmm1, %rax
739 ; SSE-NEXT: movd %eax, %xmm1
740 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
741 ; SSE-NEXT: cvttsd2si %xmm0, %rax
742 ; SSE-NEXT: movd %eax, %xmm1
743 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
744 ; SSE-NEXT: cvttsd2si %xmm0, %rax
745 ; SSE-NEXT: movd %eax, %xmm0
746 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
747 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
748 ; SSE-NEXT: movdqa %xmm1, %xmm0
751 ; AVX1-LABEL: fptoui_4f64_to_4i32:
753 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
754 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
755 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
756 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
757 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1
758 ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
759 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
760 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
761 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
762 ; AVX1-NEXT: vzeroupper
765 ; AVX2-LABEL: fptoui_4f64_to_4i32:
767 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
768 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
769 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
770 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
771 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
772 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
773 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
774 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
775 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
776 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
777 ; AVX2-NEXT: vzeroupper
780 ; AVX512F-LABEL: fptoui_4f64_to_4i32:
782 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
783 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
784 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
785 ; AVX512F-NEXT: vzeroupper
788 ; AVX512VL-LABEL: fptoui_4f64_to_4i32:
790 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
791 ; AVX512VL-NEXT: vzeroupper
792 ; AVX512VL-NEXT: retq
794 ; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
796 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
797 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
798 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
799 ; AVX512DQ-NEXT: vzeroupper
800 ; AVX512DQ-NEXT: retq
802 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
803 ; AVX512VLDQ: # %bb.0:
804 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
805 ; AVX512VLDQ-NEXT: vzeroupper
806 ; AVX512VLDQ-NEXT: retq
807 %cvt = fptoui <4 x double> %a to <4 x i32>
812 ; Float to Signed Integer
815 define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
816 ; SSE-LABEL: fptosi_2f32_to_2i32:
818 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
821 ; AVX-LABEL: fptosi_2f32_to_2i32:
823 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
825 %cvt = fptosi <2 x float> %a to <2 x i32>
829 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
830 ; SSE-LABEL: fptosi_4f32_to_4i32:
832 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
835 ; AVX-LABEL: fptosi_4f32_to_4i32:
837 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
839 %cvt = fptosi <4 x float> %a to <4 x i32>
843 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
844 ; SSE-LABEL: fptosi_2f32_to_2i64:
846 ; SSE-NEXT: cvttss2si %xmm0, %rax
847 ; SSE-NEXT: movq %rax, %xmm1
848 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
849 ; SSE-NEXT: cvttss2si %xmm0, %rax
850 ; SSE-NEXT: movq %rax, %xmm0
851 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
852 ; SSE-NEXT: movdqa %xmm1, %xmm0
855 ; VEX-LABEL: fptosi_2f32_to_2i64:
857 ; VEX-NEXT: vcvttss2si %xmm0, %rax
858 ; VEX-NEXT: vmovq %rax, %xmm1
859 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
860 ; VEX-NEXT: vcvttss2si %xmm0, %rax
861 ; VEX-NEXT: vmovq %rax, %xmm0
862 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
865 ; AVX512F-LABEL: fptosi_2f32_to_2i64:
867 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
868 ; AVX512F-NEXT: vmovq %rax, %xmm1
869 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
870 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
871 ; AVX512F-NEXT: vmovq %rax, %xmm0
872 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
875 ; AVX512VL-LABEL: fptosi_2f32_to_2i64:
877 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
878 ; AVX512VL-NEXT: vmovq %rax, %xmm1
879 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
880 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
881 ; AVX512VL-NEXT: vmovq %rax, %xmm0
882 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
883 ; AVX512VL-NEXT: retq
885 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
887 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
888 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
889 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
890 ; AVX512DQ-NEXT: vzeroupper
891 ; AVX512DQ-NEXT: retq
893 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
894 ; AVX512VLDQ: # %bb.0:
895 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
896 ; AVX512VLDQ-NEXT: retq
897 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
898 %cvt = fptosi <2 x float> %shuf to <2 x i64>
902 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
903 ; SSE-LABEL: fptosi_4f32_to_2i64:
905 ; SSE-NEXT: cvttss2si %xmm0, %rax
906 ; SSE-NEXT: movq %rax, %xmm1
907 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
908 ; SSE-NEXT: cvttss2si %xmm0, %rax
909 ; SSE-NEXT: movq %rax, %xmm0
910 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
911 ; SSE-NEXT: movdqa %xmm1, %xmm0
914 ; VEX-LABEL: fptosi_4f32_to_2i64:
916 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
917 ; VEX-NEXT: vcvttss2si %xmm1, %rax
918 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
919 ; VEX-NEXT: vmovq %rcx, %xmm0
920 ; VEX-NEXT: vmovq %rax, %xmm1
921 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
924 ; AVX512F-LABEL: fptosi_4f32_to_2i64:
926 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
927 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
928 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
929 ; AVX512F-NEXT: vmovq %rcx, %xmm0
930 ; AVX512F-NEXT: vmovq %rax, %xmm1
931 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
934 ; AVX512VL-LABEL: fptosi_4f32_to_2i64:
936 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
937 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
938 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
939 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
940 ; AVX512VL-NEXT: vmovq %rax, %xmm1
941 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
942 ; AVX512VL-NEXT: retq
944 ; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
946 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
947 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
948 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
949 ; AVX512DQ-NEXT: vzeroupper
950 ; AVX512DQ-NEXT: retq
952 ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
953 ; AVX512VLDQ: # %bb.0:
954 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
955 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
956 ; AVX512VLDQ-NEXT: vzeroupper
957 ; AVX512VLDQ-NEXT: retq
958 %cvt = fptosi <4 x float> %a to <4 x i64>
959 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
963 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
964 ; SSE-LABEL: fptosi_8f32_to_8i32:
966 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
967 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
970 ; AVX-LABEL: fptosi_8f32_to_8i32:
972 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
974 %cvt = fptosi <8 x float> %a to <8 x i32>
978 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
979 ; SSE-LABEL: fptosi_4f32_to_4i64:
981 ; SSE-NEXT: cvttss2si %xmm0, %rax
982 ; SSE-NEXT: movq %rax, %xmm2
983 ; SSE-NEXT: movaps %xmm0, %xmm1
984 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
985 ; SSE-NEXT: cvttss2si %xmm1, %rax
986 ; SSE-NEXT: movq %rax, %xmm1
987 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
988 ; SSE-NEXT: movaps %xmm0, %xmm1
989 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
990 ; SSE-NEXT: cvttss2si %xmm1, %rax
991 ; SSE-NEXT: movq %rax, %xmm3
992 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
993 ; SSE-NEXT: cvttss2si %xmm0, %rax
994 ; SSE-NEXT: movq %rax, %xmm1
995 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
996 ; SSE-NEXT: movdqa %xmm2, %xmm0
999 ; AVX1-LABEL: fptosi_4f32_to_4i64:
1001 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1002 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1003 ; AVX1-NEXT: vmovq %rax, %xmm1
1004 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1005 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1006 ; AVX1-NEXT: vmovq %rax, %xmm2
1007 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1008 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1009 ; AVX1-NEXT: vmovq %rax, %xmm2
1010 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1011 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1012 ; AVX1-NEXT: vmovq %rax, %xmm0
1013 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1014 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1017 ; AVX2-LABEL: fptosi_4f32_to_4i64:
1019 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1020 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1021 ; AVX2-NEXT: vmovq %rax, %xmm1
1022 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1023 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1024 ; AVX2-NEXT: vmovq %rax, %xmm2
1025 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1026 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1027 ; AVX2-NEXT: vmovq %rax, %xmm2
1028 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1029 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1030 ; AVX2-NEXT: vmovq %rax, %xmm0
1031 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1032 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1035 ; AVX512F-LABEL: fptosi_4f32_to_4i64:
1037 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1038 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1039 ; AVX512F-NEXT: vmovq %rax, %xmm1
1040 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1041 ; AVX512F-NEXT: vcvttss2si %xmm2, %rax
1042 ; AVX512F-NEXT: vmovq %rax, %xmm2
1043 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1044 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1045 ; AVX512F-NEXT: vmovq %rax, %xmm2
1046 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1047 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1048 ; AVX512F-NEXT: vmovq %rax, %xmm0
1049 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1050 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1051 ; AVX512F-NEXT: retq
1053 ; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1054 ; AVX512VL: # %bb.0:
1055 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1056 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1057 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1058 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1059 ; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
1060 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1061 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1062 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1063 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1064 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1065 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1066 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1067 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1068 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1069 ; AVX512VL-NEXT: retq
1071 ; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1072 ; AVX512DQ: # %bb.0:
1073 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1074 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1075 ; AVX512DQ-NEXT: retq
1077 ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1078 ; AVX512VLDQ: # %bb.0:
1079 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
1080 ; AVX512VLDQ-NEXT: retq
1081 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1082 %cvt = fptosi <4 x float> %shuf to <4 x i64>
1086 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1087 ; SSE-LABEL: fptosi_8f32_to_4i64:
1089 ; SSE-NEXT: cvttss2si %xmm0, %rax
1090 ; SSE-NEXT: movq %rax, %xmm2
1091 ; SSE-NEXT: movaps %xmm0, %xmm1
1092 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
1093 ; SSE-NEXT: cvttss2si %xmm1, %rax
1094 ; SSE-NEXT: movq %rax, %xmm1
1095 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1096 ; SSE-NEXT: movaps %xmm0, %xmm1
1097 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
1098 ; SSE-NEXT: cvttss2si %xmm1, %rax
1099 ; SSE-NEXT: movq %rax, %xmm3
1100 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1101 ; SSE-NEXT: cvttss2si %xmm0, %rax
1102 ; SSE-NEXT: movq %rax, %xmm1
1103 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1104 ; SSE-NEXT: movdqa %xmm2, %xmm0
1107 ; AVX1-LABEL: fptosi_8f32_to_4i64:
1109 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1110 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1111 ; AVX1-NEXT: vmovq %rax, %xmm1
1112 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1113 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1114 ; AVX1-NEXT: vmovq %rax, %xmm2
1115 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1116 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1117 ; AVX1-NEXT: vmovq %rax, %xmm2
1118 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1119 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1120 ; AVX1-NEXT: vmovq %rax, %xmm0
1121 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1122 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1125 ; AVX2-LABEL: fptosi_8f32_to_4i64:
1127 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1128 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1129 ; AVX2-NEXT: vmovq %rax, %xmm1
1130 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1131 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1132 ; AVX2-NEXT: vmovq %rax, %xmm2
1133 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1134 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1135 ; AVX2-NEXT: vmovq %rax, %xmm2
1136 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1137 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1138 ; AVX2-NEXT: vmovq %rax, %xmm0
1139 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1140 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1143 ; AVX512F-LABEL: fptosi_8f32_to_4i64:
1145 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1146 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1147 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
1148 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1149 ; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
1150 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1151 ; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
1152 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1153 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1154 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1155 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1156 ; AVX512F-NEXT: vmovq %rax, %xmm2
1157 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1158 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1159 ; AVX512F-NEXT: retq
1161 ; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1162 ; AVX512VL: # %bb.0:
1163 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1164 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1165 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
1166 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1167 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
1168 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1169 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
1170 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1171 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1172 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1173 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1174 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1175 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1176 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1177 ; AVX512VL-NEXT: retq
1179 ; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1180 ; AVX512DQ: # %bb.0:
1181 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1182 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1183 ; AVX512DQ-NEXT: retq
1185 ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1186 ; AVX512VLDQ: # %bb.0:
1187 ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
1188 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1189 ; AVX512VLDQ-NEXT: retq
1190 %cvt = fptosi <8 x float> %a to <8 x i64>
1191 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1196 ; Float to Unsigned Integer
1199 define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1200 ; SSE-LABEL: fptoui_2f32_to_2i32:
1202 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1203 ; SSE-NEXT: movaps %xmm0, %xmm1
1204 ; SSE-NEXT: cmpltps %xmm2, %xmm1
1205 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1206 ; SSE-NEXT: subps %xmm2, %xmm0
1207 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1208 ; SSE-NEXT: xorps {{.*}}(%rip), %xmm0
1209 ; SSE-NEXT: andps %xmm1, %xmm3
1210 ; SSE-NEXT: andnps %xmm0, %xmm1
1211 ; SSE-NEXT: orps %xmm3, %xmm1
1212 ; SSE-NEXT: movaps %xmm1, %xmm0
1215 ; AVX1-LABEL: fptoui_2f32_to_2i32:
1217 ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1218 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1219 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
1220 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
1221 ; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
1222 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1223 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1226 ; AVX2-LABEL: fptoui_2f32_to_2i32:
1228 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1229 ; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1230 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1231 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1232 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1233 ; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1
1234 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1235 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1238 ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1240 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1241 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1242 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1243 ; AVX512F-NEXT: vzeroupper
1244 ; AVX512F-NEXT: retq
1246 ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1247 ; AVX512VL: # %bb.0:
1248 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1249 ; AVX512VL-NEXT: retq
1251 ; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1252 ; AVX512DQ: # %bb.0:
1253 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1254 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1255 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1256 ; AVX512DQ-NEXT: vzeroupper
1257 ; AVX512DQ-NEXT: retq
1259 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1260 ; AVX512VLDQ: # %bb.0:
1261 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1262 ; AVX512VLDQ-NEXT: retq
1263 %cvt = fptoui <2 x float> %a to <2 x i32>
1267 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1268 ; SSE-LABEL: fptoui_4f32_to_4i32:
1270 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1271 ; SSE-NEXT: movaps %xmm0, %xmm1
1272 ; SSE-NEXT: cmpltps %xmm2, %xmm1
1273 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1274 ; SSE-NEXT: subps %xmm2, %xmm0
1275 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1276 ; SSE-NEXT: xorps {{.*}}(%rip), %xmm0
1277 ; SSE-NEXT: andps %xmm1, %xmm3
1278 ; SSE-NEXT: andnps %xmm0, %xmm1
1279 ; SSE-NEXT: orps %xmm3, %xmm1
1280 ; SSE-NEXT: movaps %xmm1, %xmm0
1283 ; AVX1-LABEL: fptoui_4f32_to_4i32:
1285 ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1286 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1287 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
1288 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
1289 ; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
1290 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1291 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1294 ; AVX2-LABEL: fptoui_4f32_to_4i32:
1296 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1297 ; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1298 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1299 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1300 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1301 ; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1
1302 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1303 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1306 ; AVX512F-LABEL: fptoui_4f32_to_4i32:
1308 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1309 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1310 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1311 ; AVX512F-NEXT: vzeroupper
1312 ; AVX512F-NEXT: retq
1314 ; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1315 ; AVX512VL: # %bb.0:
1316 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1317 ; AVX512VL-NEXT: retq
1319 ; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1320 ; AVX512DQ: # %bb.0:
1321 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1322 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1323 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1324 ; AVX512DQ-NEXT: vzeroupper
1325 ; AVX512DQ-NEXT: retq
1327 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1328 ; AVX512VLDQ: # %bb.0:
1329 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1330 ; AVX512VLDQ-NEXT: retq
1331 %cvt = fptoui <4 x float> %a to <4 x i32>
1335 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1336 ; SSE-LABEL: fptoui_2f32_to_2i64:
1338 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1339 ; SSE-NEXT: movaps %xmm0, %xmm1
1340 ; SSE-NEXT: subss %xmm2, %xmm1
1341 ; SSE-NEXT: cvttss2si %xmm1, %rax
1342 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1343 ; SSE-NEXT: xorq %rcx, %rax
1344 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1345 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1346 ; SSE-NEXT: cmovaeq %rax, %rdx
1347 ; SSE-NEXT: movq %rdx, %xmm1
1348 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
1349 ; SSE-NEXT: movaps %xmm0, %xmm3
1350 ; SSE-NEXT: subss %xmm2, %xmm3
1351 ; SSE-NEXT: cvttss2si %xmm3, %rax
1352 ; SSE-NEXT: xorq %rcx, %rax
1353 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1354 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1355 ; SSE-NEXT: cmovaeq %rax, %rcx
1356 ; SSE-NEXT: movq %rcx, %xmm0
1357 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1358 ; SSE-NEXT: movdqa %xmm1, %xmm0
1361 ; VEX-LABEL: fptoui_2f32_to_2i64:
1363 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1364 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
1365 ; VEX-NEXT: vcvttss2si %xmm2, %rax
1366 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1367 ; VEX-NEXT: xorq %rcx, %rax
1368 ; VEX-NEXT: vcvttss2si %xmm0, %rdx
1369 ; VEX-NEXT: vucomiss %xmm1, %xmm0
1370 ; VEX-NEXT: cmovaeq %rax, %rdx
1371 ; VEX-NEXT: vmovq %rdx, %xmm2
1372 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1373 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3
1374 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1375 ; VEX-NEXT: xorq %rcx, %rax
1376 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1377 ; VEX-NEXT: vucomiss %xmm1, %xmm0
1378 ; VEX-NEXT: cmovaeq %rax, %rcx
1379 ; VEX-NEXT: vmovq %rcx, %xmm0
1380 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1383 ; AVX512F-LABEL: fptoui_2f32_to_2i64:
1385 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1386 ; AVX512F-NEXT: vmovq %rax, %xmm1
1387 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1388 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1389 ; AVX512F-NEXT: vmovq %rax, %xmm0
1390 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1391 ; AVX512F-NEXT: retq
1393 ; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1394 ; AVX512VL: # %bb.0:
1395 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1396 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1397 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1398 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1399 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1400 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1401 ; AVX512VL-NEXT: retq
1403 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1404 ; AVX512DQ: # %bb.0:
1405 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1406 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1407 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1408 ; AVX512DQ-NEXT: vzeroupper
1409 ; AVX512DQ-NEXT: retq
1411 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1412 ; AVX512VLDQ: # %bb.0:
1413 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1414 ; AVX512VLDQ-NEXT: retq
1415 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1416 %cvt = fptoui <2 x float> %shuf to <2 x i64>
1420 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1421 ; SSE-LABEL: fptoui_4f32_to_2i64:
1423 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1424 ; SSE-NEXT: movaps %xmm0, %xmm1
1425 ; SSE-NEXT: subss %xmm2, %xmm1
1426 ; SSE-NEXT: cvttss2si %xmm1, %rax
1427 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1428 ; SSE-NEXT: xorq %rcx, %rax
1429 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1430 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1431 ; SSE-NEXT: cmovaeq %rax, %rdx
1432 ; SSE-NEXT: movq %rdx, %xmm1
1433 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
1434 ; SSE-NEXT: movaps %xmm0, %xmm3
1435 ; SSE-NEXT: subss %xmm2, %xmm3
1436 ; SSE-NEXT: cvttss2si %xmm3, %rax
1437 ; SSE-NEXT: xorq %rcx, %rax
1438 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1439 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1440 ; SSE-NEXT: cmovaeq %rax, %rcx
1441 ; SSE-NEXT: movq %rcx, %xmm0
1442 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1443 ; SSE-NEXT: movdqa %xmm1, %xmm0
1446 ; VEX-LABEL: fptoui_4f32_to_2i64:
1448 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1449 ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1450 ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
1451 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1452 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1453 ; VEX-NEXT: xorq %rcx, %rax
1454 ; VEX-NEXT: vcvttss2si %xmm1, %rdx
1455 ; VEX-NEXT: vucomiss %xmm2, %xmm1
1456 ; VEX-NEXT: cmovaeq %rax, %rdx
1457 ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
1458 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1459 ; VEX-NEXT: xorq %rcx, %rax
1460 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1461 ; VEX-NEXT: vucomiss %xmm2, %xmm0
1462 ; VEX-NEXT: cmovaeq %rax, %rcx
1463 ; VEX-NEXT: vmovq %rcx, %xmm0
1464 ; VEX-NEXT: vmovq %rdx, %xmm1
1465 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1468 ; AVX512F-LABEL: fptoui_4f32_to_2i64:
1470 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1471 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1472 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1473 ; AVX512F-NEXT: vmovq %rcx, %xmm0
1474 ; AVX512F-NEXT: vmovq %rax, %xmm1
1475 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1476 ; AVX512F-NEXT: retq
1478 ; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1479 ; AVX512VL: # %bb.0:
1480 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1481 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1482 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1483 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
1484 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1485 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1486 ; AVX512VL-NEXT: retq
1488 ; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1489 ; AVX512DQ: # %bb.0:
1490 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1491 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1492 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1493 ; AVX512DQ-NEXT: vzeroupper
1494 ; AVX512DQ-NEXT: retq
1496 ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1497 ; AVX512VLDQ: # %bb.0:
1498 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1499 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1500 ; AVX512VLDQ-NEXT: vzeroupper
1501 ; AVX512VLDQ-NEXT: retq
1502 %cvt = fptoui <4 x float> %a to <4 x i64>
1503 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1507 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1508 ; SSE-LABEL: fptoui_8f32_to_8i32:
1510 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1511 ; SSE-NEXT: movaps %xmm0, %xmm2
1512 ; SSE-NEXT: cmpltps %xmm4, %xmm2
1513 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1514 ; SSE-NEXT: subps %xmm4, %xmm0
1515 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1516 ; SSE-NEXT: movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1517 ; SSE-NEXT: xorps %xmm5, %xmm0
1518 ; SSE-NEXT: andps %xmm2, %xmm3
1519 ; SSE-NEXT: andnps %xmm0, %xmm2
1520 ; SSE-NEXT: orps %xmm3, %xmm2
1521 ; SSE-NEXT: movaps %xmm1, %xmm3
1522 ; SSE-NEXT: cmpltps %xmm4, %xmm3
1523 ; SSE-NEXT: cvttps2dq %xmm1, %xmm0
1524 ; SSE-NEXT: subps %xmm4, %xmm1
1525 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
1526 ; SSE-NEXT: xorps %xmm5, %xmm1
1527 ; SSE-NEXT: andps %xmm3, %xmm0
1528 ; SSE-NEXT: andnps %xmm1, %xmm3
1529 ; SSE-NEXT: orps %xmm0, %xmm3
1530 ; SSE-NEXT: movaps %xmm2, %xmm0
1531 ; SSE-NEXT: movaps %xmm3, %xmm1
1534 ; AVX1-LABEL: fptoui_8f32_to_8i32:
1536 ; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1537 ; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
1538 ; AVX1-NEXT: vsubps %ymm1, %ymm0, %ymm1
1539 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
1540 ; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
1541 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
1542 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1545 ; AVX2-LABEL: fptoui_8f32_to_8i32:
1547 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1548 ; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
1549 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1
1550 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
1551 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
1552 ; AVX2-NEXT: vxorps %ymm3, %ymm1, %ymm1
1553 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
1554 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1557 ; AVX512F-LABEL: fptoui_8f32_to_8i32:
1559 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1560 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1561 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1562 ; AVX512F-NEXT: retq
1564 ; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1565 ; AVX512VL: # %bb.0:
1566 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1567 ; AVX512VL-NEXT: retq
1569 ; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1570 ; AVX512DQ: # %bb.0:
1571 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1572 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1573 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1574 ; AVX512DQ-NEXT: retq
1576 ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1577 ; AVX512VLDQ: # %bb.0:
1578 ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
1579 ; AVX512VLDQ-NEXT: retq
1580 %cvt = fptoui <8 x float> %a to <8 x i32>
1584 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1585 ; SSE-LABEL: fptoui_4f32_to_4i64:
1587 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1588 ; SSE-NEXT: movaps %xmm0, %xmm2
1589 ; SSE-NEXT: subss %xmm1, %xmm2
1590 ; SSE-NEXT: cvttss2si %xmm2, %rcx
1591 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1592 ; SSE-NEXT: xorq %rax, %rcx
1593 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1594 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1595 ; SSE-NEXT: cmovaeq %rcx, %rdx
1596 ; SSE-NEXT: movq %rdx, %xmm2
1597 ; SSE-NEXT: movaps %xmm0, %xmm3
1598 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
1599 ; SSE-NEXT: movaps %xmm3, %xmm4
1600 ; SSE-NEXT: subss %xmm1, %xmm4
1601 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1602 ; SSE-NEXT: xorq %rax, %rcx
1603 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1604 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1605 ; SSE-NEXT: cmovaeq %rcx, %rdx
1606 ; SSE-NEXT: movq %rdx, %xmm3
1607 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1608 ; SSE-NEXT: movaps %xmm0, %xmm3
1609 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
1610 ; SSE-NEXT: movaps %xmm3, %xmm4
1611 ; SSE-NEXT: subss %xmm1, %xmm4
1612 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1613 ; SSE-NEXT: xorq %rax, %rcx
1614 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1615 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1616 ; SSE-NEXT: cmovaeq %rcx, %rdx
1617 ; SSE-NEXT: movq %rdx, %xmm3
1618 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1619 ; SSE-NEXT: movaps %xmm0, %xmm4
1620 ; SSE-NEXT: subss %xmm1, %xmm4
1621 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1622 ; SSE-NEXT: xorq %rax, %rcx
1623 ; SSE-NEXT: cvttss2si %xmm0, %rax
1624 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1625 ; SSE-NEXT: cmovaeq %rcx, %rax
1626 ; SSE-NEXT: movq %rax, %xmm1
1627 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1628 ; SSE-NEXT: movdqa %xmm2, %xmm0
1631 ; AVX1-LABEL: fptoui_4f32_to_4i64:
1633 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1634 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1635 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1636 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1637 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1638 ; AVX1-NEXT: xorq %rcx, %rax
1639 ; AVX1-NEXT: vcvttss2si %xmm2, %rdx
1640 ; AVX1-NEXT: vucomiss %xmm1, %xmm2
1641 ; AVX1-NEXT: cmovaeq %rax, %rdx
1642 ; AVX1-NEXT: vmovq %rdx, %xmm2
1643 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1644 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1645 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1646 ; AVX1-NEXT: xorq %rcx, %rax
1647 ; AVX1-NEXT: vcvttss2si %xmm3, %rdx
1648 ; AVX1-NEXT: vucomiss %xmm1, %xmm3
1649 ; AVX1-NEXT: cmovaeq %rax, %rdx
1650 ; AVX1-NEXT: vmovq %rdx, %xmm3
1651 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1652 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1653 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1654 ; AVX1-NEXT: xorq %rcx, %rax
1655 ; AVX1-NEXT: vcvttss2si %xmm0, %rdx
1656 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1657 ; AVX1-NEXT: cmovaeq %rax, %rdx
1658 ; AVX1-NEXT: vmovq %rdx, %xmm3
1659 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1660 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
1661 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1662 ; AVX1-NEXT: xorq %rcx, %rax
1663 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1664 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1665 ; AVX1-NEXT: cmovaeq %rax, %rcx
1666 ; AVX1-NEXT: vmovq %rcx, %xmm0
1667 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1668 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1671 ; AVX2-LABEL: fptoui_4f32_to_4i64:
1673 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1674 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1675 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1676 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1677 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1678 ; AVX2-NEXT: xorq %rcx, %rax
1679 ; AVX2-NEXT: vcvttss2si %xmm2, %rdx
1680 ; AVX2-NEXT: vucomiss %xmm1, %xmm2
1681 ; AVX2-NEXT: cmovaeq %rax, %rdx
1682 ; AVX2-NEXT: vmovq %rdx, %xmm2
1683 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1684 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1685 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1686 ; AVX2-NEXT: xorq %rcx, %rax
1687 ; AVX2-NEXT: vcvttss2si %xmm3, %rdx
1688 ; AVX2-NEXT: vucomiss %xmm1, %xmm3
1689 ; AVX2-NEXT: cmovaeq %rax, %rdx
1690 ; AVX2-NEXT: vmovq %rdx, %xmm3
1691 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1692 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1693 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1694 ; AVX2-NEXT: xorq %rcx, %rax
1695 ; AVX2-NEXT: vcvttss2si %xmm0, %rdx
1696 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1697 ; AVX2-NEXT: cmovaeq %rax, %rdx
1698 ; AVX2-NEXT: vmovq %rdx, %xmm3
1699 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1700 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
1701 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1702 ; AVX2-NEXT: xorq %rcx, %rax
1703 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1704 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1705 ; AVX2-NEXT: cmovaeq %rax, %rcx
1706 ; AVX2-NEXT: vmovq %rcx, %xmm0
1707 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1708 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1711 ; AVX512F-LABEL: fptoui_4f32_to_4i64:
1713 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1714 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1715 ; AVX512F-NEXT: vmovq %rax, %xmm1
1716 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1717 ; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
1718 ; AVX512F-NEXT: vmovq %rax, %xmm2
1719 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1720 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1721 ; AVX512F-NEXT: vmovq %rax, %xmm2
1722 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1723 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1724 ; AVX512F-NEXT: vmovq %rax, %xmm0
1725 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1726 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1727 ; AVX512F-NEXT: retq
1729 ; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1730 ; AVX512VL: # %bb.0:
1731 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1732 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1733 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1734 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1735 ; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
1736 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1737 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1738 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1739 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1740 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1741 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1742 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1743 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1744 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1745 ; AVX512VL-NEXT: retq
1747 ; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1748 ; AVX512DQ: # %bb.0:
1749 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1750 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1751 ; AVX512DQ-NEXT: retq
1753 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1754 ; AVX512VLDQ: # %bb.0:
1755 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1756 ; AVX512VLDQ-NEXT: retq
1757 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1758 %cvt = fptoui <4 x float> %shuf to <4 x i64>
1762 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1763 ; SSE-LABEL: fptoui_8f32_to_4i64:
1765 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1766 ; SSE-NEXT: movaps %xmm0, %xmm2
1767 ; SSE-NEXT: subss %xmm1, %xmm2
1768 ; SSE-NEXT: cvttss2si %xmm2, %rcx
1769 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1770 ; SSE-NEXT: xorq %rax, %rcx
1771 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1772 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1773 ; SSE-NEXT: cmovaeq %rcx, %rdx
1774 ; SSE-NEXT: movq %rdx, %xmm2
1775 ; SSE-NEXT: movaps %xmm0, %xmm3
1776 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
1777 ; SSE-NEXT: movaps %xmm3, %xmm4
1778 ; SSE-NEXT: subss %xmm1, %xmm4
1779 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1780 ; SSE-NEXT: xorq %rax, %rcx
1781 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1782 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1783 ; SSE-NEXT: cmovaeq %rcx, %rdx
1784 ; SSE-NEXT: movq %rdx, %xmm3
1785 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1786 ; SSE-NEXT: movaps %xmm0, %xmm3
1787 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
1788 ; SSE-NEXT: movaps %xmm3, %xmm4
1789 ; SSE-NEXT: subss %xmm1, %xmm4
1790 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1791 ; SSE-NEXT: xorq %rax, %rcx
1792 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1793 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1794 ; SSE-NEXT: cmovaeq %rcx, %rdx
1795 ; SSE-NEXT: movq %rdx, %xmm3
1796 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1797 ; SSE-NEXT: movaps %xmm0, %xmm4
1798 ; SSE-NEXT: subss %xmm1, %xmm4
1799 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1800 ; SSE-NEXT: xorq %rax, %rcx
1801 ; SSE-NEXT: cvttss2si %xmm0, %rax
1802 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1803 ; SSE-NEXT: cmovaeq %rcx, %rax
1804 ; SSE-NEXT: movq %rax, %xmm1
1805 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1806 ; SSE-NEXT: movdqa %xmm2, %xmm0
1809 ; AVX1-LABEL: fptoui_8f32_to_4i64:
1811 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1812 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1813 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1814 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1815 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1816 ; AVX1-NEXT: xorq %rcx, %rax
1817 ; AVX1-NEXT: vcvttss2si %xmm2, %rdx
1818 ; AVX1-NEXT: vucomiss %xmm1, %xmm2
1819 ; AVX1-NEXT: cmovaeq %rax, %rdx
1820 ; AVX1-NEXT: vmovq %rdx, %xmm2
1821 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1822 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1823 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1824 ; AVX1-NEXT: xorq %rcx, %rax
1825 ; AVX1-NEXT: vcvttss2si %xmm3, %rdx
1826 ; AVX1-NEXT: vucomiss %xmm1, %xmm3
1827 ; AVX1-NEXT: cmovaeq %rax, %rdx
1828 ; AVX1-NEXT: vmovq %rdx, %xmm3
1829 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1830 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1831 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1832 ; AVX1-NEXT: xorq %rcx, %rax
1833 ; AVX1-NEXT: vcvttss2si %xmm0, %rdx
1834 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1835 ; AVX1-NEXT: cmovaeq %rax, %rdx
1836 ; AVX1-NEXT: vmovq %rdx, %xmm3
1837 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1838 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
1839 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1840 ; AVX1-NEXT: xorq %rcx, %rax
1841 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1842 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1843 ; AVX1-NEXT: cmovaeq %rax, %rcx
1844 ; AVX1-NEXT: vmovq %rcx, %xmm0
1845 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1846 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1849 ; AVX2-LABEL: fptoui_8f32_to_4i64:
1851 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1852 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1853 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1854 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1855 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1856 ; AVX2-NEXT: xorq %rcx, %rax
1857 ; AVX2-NEXT: vcvttss2si %xmm2, %rdx
1858 ; AVX2-NEXT: vucomiss %xmm1, %xmm2
1859 ; AVX2-NEXT: cmovaeq %rax, %rdx
1860 ; AVX2-NEXT: vmovq %rdx, %xmm2
1861 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1862 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1863 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1864 ; AVX2-NEXT: xorq %rcx, %rax
1865 ; AVX2-NEXT: vcvttss2si %xmm3, %rdx
1866 ; AVX2-NEXT: vucomiss %xmm1, %xmm3
1867 ; AVX2-NEXT: cmovaeq %rax, %rdx
1868 ; AVX2-NEXT: vmovq %rdx, %xmm3
1869 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1870 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1871 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1872 ; AVX2-NEXT: xorq %rcx, %rax
1873 ; AVX2-NEXT: vcvttss2si %xmm0, %rdx
1874 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1875 ; AVX2-NEXT: cmovaeq %rax, %rdx
1876 ; AVX2-NEXT: vmovq %rdx, %xmm3
1877 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1878 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
1879 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1880 ; AVX2-NEXT: xorq %rcx, %rax
1881 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1882 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1883 ; AVX2-NEXT: cmovaeq %rax, %rcx
1884 ; AVX2-NEXT: vmovq %rcx, %xmm0
1885 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1886 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1889 ; AVX512F-LABEL: fptoui_8f32_to_4i64:
1891 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1892 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1893 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1894 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1895 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
1896 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1897 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
1898 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1899 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1900 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1901 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1902 ; AVX512F-NEXT: vmovq %rax, %xmm2
1903 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1904 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1905 ; AVX512F-NEXT: retq
1907 ; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1908 ; AVX512VL: # %bb.0:
1909 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1910 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1911 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1912 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1913 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
1914 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1915 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
1916 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1917 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1918 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1919 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1920 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1921 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1922 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1923 ; AVX512VL-NEXT: retq
1925 ; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1926 ; AVX512DQ: # %bb.0:
1927 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1928 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1929 ; AVX512DQ-NEXT: retq
1931 ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1932 ; AVX512VLDQ: # %bb.0:
1933 ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1934 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1935 ; AVX512VLDQ-NEXT: retq
1936 %cvt = fptoui <8 x float> %a to <8 x i64>
1937 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1945 define <2 x i64> @fptosi_2f64_to_2i64_const() {
1946 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
1948 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1951 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
1953 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1955 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1959 define <4 x i32> @fptosi_2f64_to_2i32_const() {
1960 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
1962 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1965 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
1967 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1969 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1970 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1974 define <4 x i64> @fptosi_4f64_to_4i64_const() {
1975 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
1977 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1978 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1981 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
1983 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1985 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1989 define <4 x i32> @fptosi_4f64_to_4i32_const() {
1990 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
1992 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1995 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
1997 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1999 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
2003 define <2 x i64> @fptoui_2f64_to_2i64_const() {
2004 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
2006 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
2009 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
2011 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
2013 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
2017 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
2018 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
2020 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
2023 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
2025 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
2027 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
2028 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2032 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
2033 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
2035 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
2036 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
2039 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
2041 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
2043 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
2047 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
2048 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
2050 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
2053 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
2055 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2057 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2061 define <4 x i32> @fptosi_4f32_to_4i32_const() {
2062 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
2064 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2067 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
2069 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2071 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2075 define <4 x i64> @fptosi_4f32_to_4i64_const() {
2076 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
2078 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2079 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
2082 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
2084 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2086 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2090 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2091 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
2093 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2094 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2097 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
2099 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2101 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2105 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2106 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
2108 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2111 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
2113 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2115 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2119 define <4 x i64> @fptoui_4f32_to_4i64_const() {
2120 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
2122 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
2123 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
2126 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
2128 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2130 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2134 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2135 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
2137 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2138 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
2141 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
2143 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2145 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2153 define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
2154 ; SSE-LABEL: fptosi_2f16_to_4i32:
2156 ; SSE-NEXT: pushq %rax
2157 ; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2158 ; SSE-NEXT: movaps %xmm1, %xmm0
2159 ; SSE-NEXT: callq __gnu_f2h_ieee
2160 ; SSE-NEXT: movzwl %ax, %edi
2161 ; SSE-NEXT: callq __gnu_h2f_ieee
2162 ; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
2163 ; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
2164 ; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2165 ; SSE-NEXT: callq __gnu_f2h_ieee
2166 ; SSE-NEXT: movzwl %ax, %edi
2167 ; SSE-NEXT: callq __gnu_h2f_ieee
2168 ; SSE-NEXT: cvttss2si %xmm0, %eax
2169 ; SSE-NEXT: cvttss2si (%rsp), %ecx # 4-byte Folded Reload
2170 ; SSE-NEXT: movd %ecx, %xmm0
2171 ; SSE-NEXT: movd %eax, %xmm1
2172 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2173 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2174 ; SSE-NEXT: popq %rax
2177 ; VEX-LABEL: fptosi_2f16_to_4i32:
2179 ; VEX-NEXT: pushq %rax
2180 ; VEX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2181 ; VEX-NEXT: vmovaps %xmm1, %xmm0
2182 ; VEX-NEXT: callq __gnu_f2h_ieee
2183 ; VEX-NEXT: movzwl %ax, %edi
2184 ; VEX-NEXT: callq __gnu_h2f_ieee
2185 ; VEX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
2186 ; VEX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
2187 ; VEX-NEXT: # xmm0 = mem[0],zero,zero,zero
2188 ; VEX-NEXT: callq __gnu_f2h_ieee
2189 ; VEX-NEXT: movzwl %ax, %edi
2190 ; VEX-NEXT: callq __gnu_h2f_ieee
2191 ; VEX-NEXT: vcvttss2si %xmm0, %eax
2192 ; VEX-NEXT: vcvttss2si (%rsp), %ecx # 4-byte Folded Reload
2193 ; VEX-NEXT: vmovd %ecx, %xmm0
2194 ; VEX-NEXT: vmovd %eax, %xmm1
2195 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2196 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2197 ; VEX-NEXT: popq %rax
2200 ; AVX512-LABEL: fptosi_2f16_to_4i32:
2202 ; AVX512-NEXT: vcvtps2ph $4, %xmm1, %xmm1
2203 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
2204 ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
2205 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
2206 ; AVX512-NEXT: vcvttss2si %xmm0, %eax
2207 ; AVX512-NEXT: vcvttss2si %xmm1, %ecx
2208 ; AVX512-NEXT: vmovd %ecx, %xmm0
2209 ; AVX512-NEXT: vmovd %eax, %xmm1
2210 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2211 ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2213 %cvt = fptosi <2 x half> %a to <2 x i32>
2214 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2218 define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2219 ; SSE-LABEL: fptosi_2f80_to_4i32:
2221 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2222 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2223 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2224 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2225 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2226 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2227 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2228 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2229 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2230 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2231 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2232 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2233 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2234 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2235 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2236 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2237 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2238 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2239 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2240 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2243 ; AVX-LABEL: fptosi_2f80_to_4i32:
2245 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2246 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2247 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2248 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2249 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2250 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2251 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2252 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2254 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2255 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2259 define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2260 ; SSE-LABEL: fptosi_2f128_to_4i32:
2262 ; SSE-NEXT: pushq %rbp
2263 ; SSE-NEXT: pushq %r14
2264 ; SSE-NEXT: pushq %rbx
2265 ; SSE-NEXT: movq %rcx, %r14
2266 ; SSE-NEXT: movq %rdx, %rbx
2267 ; SSE-NEXT: callq __fixtfsi
2268 ; SSE-NEXT: movl %eax, %ebp
2269 ; SSE-NEXT: movq %rbx, %rdi
2270 ; SSE-NEXT: movq %r14, %rsi
2271 ; SSE-NEXT: callq __fixtfsi
2272 ; SSE-NEXT: movd %eax, %xmm0
2273 ; SSE-NEXT: movd %ebp, %xmm1
2274 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2275 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2276 ; SSE-NEXT: popq %rbx
2277 ; SSE-NEXT: popq %r14
2278 ; SSE-NEXT: popq %rbp
2281 ; AVX-LABEL: fptosi_2f128_to_4i32:
2283 ; AVX-NEXT: pushq %rbp
2284 ; AVX-NEXT: pushq %r14
2285 ; AVX-NEXT: pushq %rbx
2286 ; AVX-NEXT: movq %rcx, %r14
2287 ; AVX-NEXT: movq %rdx, %rbx
2288 ; AVX-NEXT: callq __fixtfsi
2289 ; AVX-NEXT: movl %eax, %ebp
2290 ; AVX-NEXT: movq %rbx, %rdi
2291 ; AVX-NEXT: movq %r14, %rsi
2292 ; AVX-NEXT: callq __fixtfsi
2293 ; AVX-NEXT: vmovd %eax, %xmm0
2294 ; AVX-NEXT: vmovd %ebp, %xmm1
2295 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2296 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2297 ; AVX-NEXT: popq %rbx
2298 ; AVX-NEXT: popq %r14
2299 ; AVX-NEXT: popq %rbp
2301 %cvt = fptosi <2 x fp128> %a to <2 x i32>
2302 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2306 define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2307 ; SSE-LABEL: fptosi_2f32_to_2i8:
2309 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2310 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
2311 ; SSE-NEXT: packuswb %xmm0, %xmm0
2312 ; SSE-NEXT: packuswb %xmm0, %xmm0
2315 ; AVX-LABEL: fptosi_2f32_to_2i8:
2317 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2318 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2320 %cvt = fptosi <2 x float> %a to <2 x i8>
2324 define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2325 ; SSE-LABEL: fptosi_2f32_to_2i16:
2327 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2328 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2331 ; AVX-LABEL: fptosi_2f32_to_2i16:
2333 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2334 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2336 %cvt = fptosi <2 x float> %a to <2 x i16>
2340 define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2341 ; SSE-LABEL: fptoui_2f32_to_2i8:
2343 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2344 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
2345 ; SSE-NEXT: packuswb %xmm0, %xmm0
2346 ; SSE-NEXT: packuswb %xmm0, %xmm0
2349 ; AVX-LABEL: fptoui_2f32_to_2i8:
2351 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2352 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2354 %cvt = fptoui <2 x float> %a to <2 x i8>
2358 define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2359 ; SSE-LABEL: fptoui_2f32_to_2i16:
2361 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2362 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2365 ; AVX-LABEL: fptoui_2f32_to_2i16:
2367 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2368 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2370 %cvt = fptoui <2 x float> %a to <2 x i16>
2374 define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2375 ; SSE-LABEL: fptosi_2f64_to_2i8:
2377 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2378 ; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
2379 ; SSE-NEXT: packuswb %xmm0, %xmm0
2380 ; SSE-NEXT: packuswb %xmm0, %xmm0
2383 ; AVX-LABEL: fptosi_2f64_to_2i8:
2385 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2386 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2388 %cvt = fptosi <2 x double> %a to <2 x i8>
2392 define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2393 ; SSE-LABEL: fptosi_2f64_to_2i16:
2395 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2396 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2399 ; AVX-LABEL: fptosi_2f64_to_2i16:
2401 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2402 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2404 %cvt = fptosi <2 x double> %a to <2 x i16>
2408 define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2409 ; SSE-LABEL: fptoui_2f64_to_2i8:
2411 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2412 ; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
2413 ; SSE-NEXT: packuswb %xmm0, %xmm0
2414 ; SSE-NEXT: packuswb %xmm0, %xmm0
2417 ; AVX-LABEL: fptoui_2f64_to_2i8:
2419 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2420 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2422 %cvt = fptoui <2 x double> %a to <2 x i8>
2426 define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2427 ; SSE-LABEL: fptoui_2f64_to_2i16:
2429 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2430 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2433 ; AVX-LABEL: fptoui_2f64_to_2i16:
2435 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2436 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2438 %cvt = fptoui <2 x double> %a to <2 x i16>
2442 define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2443 ; SSE-LABEL: fptosi_8f64_to_8i16:
2445 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2446 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2447 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2448 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2449 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2450 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2451 ; SSE-NEXT: packssdw %xmm2, %xmm0
2454 ; VEX-LABEL: fptosi_8f64_to_8i16:
2456 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2457 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2458 ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2459 ; VEX-NEXT: vzeroupper
2462 ; AVX512F-LABEL: fptosi_8f64_to_8i16:
2464 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2465 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2466 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2467 ; AVX512F-NEXT: vzeroupper
2468 ; AVX512F-NEXT: retq
2470 ; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2471 ; AVX512VL: # %bb.0:
2472 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2473 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2474 ; AVX512VL-NEXT: vzeroupper
2475 ; AVX512VL-NEXT: retq
2477 ; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2478 ; AVX512DQ: # %bb.0:
2479 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2480 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2481 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2482 ; AVX512DQ-NEXT: vzeroupper
2483 ; AVX512DQ-NEXT: retq
2485 ; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2486 ; AVX512VLDQ: # %bb.0:
2487 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2488 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2489 ; AVX512VLDQ-NEXT: vzeroupper
2490 ; AVX512VLDQ-NEXT: retq
2491 %cvt = fptosi <8 x double> %a to <8 x i16>
2495 define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2496 ; SSE-LABEL: fptoui_8f64_to_8i16:
2498 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2499 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2500 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2501 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2502 ; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2503 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2504 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2505 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2506 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2507 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2508 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2509 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2510 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2513 ; VEX-LABEL: fptoui_8f64_to_8i16:
2515 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2516 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2517 ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2518 ; VEX-NEXT: vzeroupper
2521 ; AVX512F-LABEL: fptoui_8f64_to_8i16:
2523 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2524 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2525 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2526 ; AVX512F-NEXT: vzeroupper
2527 ; AVX512F-NEXT: retq
2529 ; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2530 ; AVX512VL: # %bb.0:
2531 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2532 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2533 ; AVX512VL-NEXT: vzeroupper
2534 ; AVX512VL-NEXT: retq
2536 ; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2537 ; AVX512DQ: # %bb.0:
2538 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2539 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2540 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2541 ; AVX512DQ-NEXT: vzeroupper
2542 ; AVX512DQ-NEXT: retq
2544 ; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2545 ; AVX512VLDQ: # %bb.0:
2546 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2547 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2548 ; AVX512VLDQ-NEXT: vzeroupper
2549 ; AVX512VLDQ-NEXT: retq
2550 %cvt = fptoui <8 x double> %a to <8 x i16>
2554 define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2555 ; SSE-LABEL: fptosi_16f32_to_16i8:
2557 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2558 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2559 ; SSE-NEXT: packssdw %xmm3, %xmm2
2560 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2561 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2562 ; SSE-NEXT: packssdw %xmm1, %xmm0
2563 ; SSE-NEXT: packsswb %xmm2, %xmm0
2566 ; AVX1-LABEL: fptosi_16f32_to_16i8:
2568 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2569 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2570 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2571 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2572 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2573 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2574 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2575 ; AVX1-NEXT: vzeroupper
2578 ; AVX2-LABEL: fptosi_16f32_to_16i8:
2580 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2581 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2582 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2583 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2584 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2585 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2586 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2587 ; AVX2-NEXT: vzeroupper
2590 ; AVX512-LABEL: fptosi_16f32_to_16i8:
2592 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2593 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2594 ; AVX512-NEXT: vzeroupper
2596 %cvt = fptosi <16 x float> %a to <16 x i8>
2600 define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2601 ; SSE-LABEL: fptoui_16f32_to_16i8:
2603 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2604 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2605 ; SSE-NEXT: packssdw %xmm3, %xmm2
2606 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2607 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2608 ; SSE-NEXT: packssdw %xmm1, %xmm0
2609 ; SSE-NEXT: packuswb %xmm2, %xmm0
2612 ; AVX1-LABEL: fptoui_16f32_to_16i8:
2614 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2615 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2616 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2617 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2618 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2619 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2620 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2621 ; AVX1-NEXT: vzeroupper
2624 ; AVX2-LABEL: fptoui_16f32_to_16i8:
2626 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2627 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2628 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2629 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2630 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2631 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2632 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2633 ; AVX2-NEXT: vzeroupper
2636 ; AVX512-LABEL: fptoui_16f32_to_16i8:
2638 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2639 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2640 ; AVX512-NEXT: vzeroupper
2642 %cvt = fptoui <16 x float> %a to <16 x i8>
2646 define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) {
2647 ; SSE-LABEL: fptosi_2f32_to_2i64_load:
2649 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2650 ; SSE-NEXT: cvttss2si %xmm1, %rax
2651 ; SSE-NEXT: movq %rax, %xmm0
2652 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
2653 ; SSE-NEXT: cvttss2si %xmm1, %rax
2654 ; SSE-NEXT: movq %rax, %xmm1
2655 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2658 ; VEX-LABEL: fptosi_2f32_to_2i64_load:
2660 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2661 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2662 ; VEX-NEXT: vmovq %rax, %xmm1
2663 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2664 ; VEX-NEXT: vcvttss2si %xmm0, %rax
2665 ; VEX-NEXT: vmovq %rax, %xmm0
2666 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2669 ; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2671 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2672 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2673 ; AVX512F-NEXT: vmovq %rax, %xmm1
2674 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2675 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
2676 ; AVX512F-NEXT: vmovq %rax, %xmm0
2677 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2678 ; AVX512F-NEXT: retq
2680 ; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2681 ; AVX512VL: # %bb.0:
2682 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2683 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2684 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2685 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2686 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
2687 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2688 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2689 ; AVX512VL-NEXT: retq
2691 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2692 ; AVX512DQ: # %bb.0:
2693 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2694 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
2695 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2696 ; AVX512DQ-NEXT: vzeroupper
2697 ; AVX512DQ-NEXT: retq
2699 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2700 ; AVX512VLDQ: # %bb.0:
2701 ; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
2702 ; AVX512VLDQ-NEXT: retq
2703 %a = load <2 x float>, <2 x float>* %x
2704 %b = fptosi <2 x float> %a to <2 x i64>
2708 define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) {
2709 ; SSE-LABEL: fptoui_2f32_to_2i64_load:
2711 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2712 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2713 ; SSE-NEXT: movaps %xmm1, %xmm0
2714 ; SSE-NEXT: subss %xmm2, %xmm0
2715 ; SSE-NEXT: cvttss2si %xmm0, %rax
2716 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
2717 ; SSE-NEXT: xorq %rcx, %rax
2718 ; SSE-NEXT: cvttss2si %xmm1, %rdx
2719 ; SSE-NEXT: ucomiss %xmm2, %xmm1
2720 ; SSE-NEXT: cmovaeq %rax, %rdx
2721 ; SSE-NEXT: movq %rdx, %xmm0
2722 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
2723 ; SSE-NEXT: movaps %xmm1, %xmm3
2724 ; SSE-NEXT: subss %xmm2, %xmm3
2725 ; SSE-NEXT: cvttss2si %xmm3, %rax
2726 ; SSE-NEXT: xorq %rcx, %rax
2727 ; SSE-NEXT: cvttss2si %xmm1, %rcx
2728 ; SSE-NEXT: ucomiss %xmm2, %xmm1
2729 ; SSE-NEXT: cmovaeq %rax, %rcx
2730 ; SSE-NEXT: movq %rcx, %xmm1
2731 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2734 ; VEX-LABEL: fptoui_2f32_to_2i64_load:
2736 ; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2737 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2738 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
2739 ; VEX-NEXT: vcvttss2si %xmm2, %rax
2740 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
2741 ; VEX-NEXT: xorq %rcx, %rax
2742 ; VEX-NEXT: vcvttss2si %xmm0, %rdx
2743 ; VEX-NEXT: vucomiss %xmm1, %xmm0
2744 ; VEX-NEXT: cmovaeq %rax, %rdx
2745 ; VEX-NEXT: vmovq %rdx, %xmm2
2746 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2747 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3
2748 ; VEX-NEXT: vcvttss2si %xmm3, %rax
2749 ; VEX-NEXT: xorq %rcx, %rax
2750 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
2751 ; VEX-NEXT: vucomiss %xmm1, %xmm0
2752 ; VEX-NEXT: cmovaeq %rax, %rcx
2753 ; VEX-NEXT: vmovq %rcx, %xmm0
2754 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2757 ; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2759 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2760 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2761 ; AVX512F-NEXT: vmovq %rax, %xmm1
2762 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2763 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
2764 ; AVX512F-NEXT: vmovq %rax, %xmm0
2765 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2766 ; AVX512F-NEXT: retq
2768 ; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2769 ; AVX512VL: # %bb.0:
2770 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2771 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2772 ; AVX512VL-NEXT: vmovq %rax, %xmm1
2773 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2774 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
2775 ; AVX512VL-NEXT: vmovq %rax, %xmm0
2776 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2777 ; AVX512VL-NEXT: retq
2779 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2780 ; AVX512DQ: # %bb.0:
2781 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2782 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
2783 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2784 ; AVX512DQ-NEXT: vzeroupper
2785 ; AVX512DQ-NEXT: retq
2787 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2788 ; AVX512VLDQ: # %bb.0:
2789 ; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
2790 ; AVX512VLDQ-NEXT: retq
2791 %a = load <2 x float>, <2 x float>* %x
2792 %b = fptoui <2 x float> %a to <2 x i64>