1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
4 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
5 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
7 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
8 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Double to Signed Integer
19 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20 ; SSE-LABEL: fptosi_2f64_to_2i64:
22 ; SSE-NEXT: cvttsd2si %xmm0, %rax
23 ; SSE-NEXT: movq %rax, %xmm1
24 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
25 ; SSE-NEXT: cvttsd2si %xmm0, %rax
26 ; SSE-NEXT: movq %rax, %xmm0
27 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movdqa %xmm1, %xmm0
31 ; VEX-LABEL: fptosi_2f64_to_2i64:
33 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
34 ; VEX-NEXT: vmovq %rax, %xmm1
35 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
36 ; VEX-NEXT: vcvttsd2si %xmm0, %rax
37 ; VEX-NEXT: vmovq %rax, %xmm0
38 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41 ; AVX512F-LABEL: fptosi_2f64_to_2i64:
43 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
44 ; AVX512F-NEXT: vmovq %rax, %xmm1
45 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
46 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
47 ; AVX512F-NEXT: vmovq %rax, %xmm0
48 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
51 ; AVX512VL-LABEL: fptosi_2f64_to_2i64:
53 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
54 ; AVX512VL-NEXT: vmovq %rax, %xmm1
55 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
56 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
57 ; AVX512VL-NEXT: vmovq %rax, %xmm0
58 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
61 ; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
63 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
64 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
65 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512DQ-NEXT: vzeroupper
69 ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70 ; AVX512VLDQ: # %bb.0:
71 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
72 ; AVX512VLDQ-NEXT: retq
73 %cvt = fptosi <2 x double> %a to <2 x i64>
77 define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78 ; SSE-LABEL: fptosi_2f64_to_4i32:
80 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
83 ; AVX-LABEL: fptosi_2f64_to_4i32:
85 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
87 %cvt = fptosi <2 x double> %a to <2 x i32>
88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92 define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93 ; SSE-LABEL: fptosi_2f64_to_2i32:
95 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
98 ; AVX-LABEL: fptosi_2f64_to_2i32:
100 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
102 %cvt = fptosi <2 x double> %a to <2 x i32>
106 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107 ; SSE-LABEL: fptosi_4f64_to_2i32:
109 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
110 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
111 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
114 ; AVX-LABEL: fptosi_4f64_to_2i32:
116 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
117 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
118 ; AVX-NEXT: vzeroupper
120 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
121 %cvt = fptosi <4 x double> %ext to <4 x i32>
125 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
126 ; SSE-LABEL: fptosi_4f64_to_4i64:
128 ; SSE-NEXT: cvttsd2si %xmm0, %rax
129 ; SSE-NEXT: movq %rax, %xmm2
130 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
131 ; SSE-NEXT: cvttsd2si %xmm0, %rax
132 ; SSE-NEXT: movq %rax, %xmm0
133 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
134 ; SSE-NEXT: cvttsd2si %xmm1, %rax
135 ; SSE-NEXT: movq %rax, %xmm3
136 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
137 ; SSE-NEXT: cvttsd2si %xmm1, %rax
138 ; SSE-NEXT: movq %rax, %xmm0
139 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
140 ; SSE-NEXT: movdqa %xmm2, %xmm0
141 ; SSE-NEXT: movdqa %xmm3, %xmm1
144 ; AVX1-LABEL: fptosi_4f64_to_4i64:
146 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
147 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
148 ; AVX1-NEXT: vmovq %rax, %xmm2
149 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
150 ; AVX1-NEXT: vcvttsd2si %xmm1, %rax
151 ; AVX1-NEXT: vmovq %rax, %xmm1
152 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
153 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
154 ; AVX1-NEXT: vmovq %rax, %xmm2
155 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
156 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax
157 ; AVX1-NEXT: vmovq %rax, %xmm0
158 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
159 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
162 ; AVX2-LABEL: fptosi_4f64_to_4i64:
164 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
165 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
166 ; AVX2-NEXT: vmovq %rax, %xmm2
167 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
168 ; AVX2-NEXT: vcvttsd2si %xmm1, %rax
169 ; AVX2-NEXT: vmovq %rax, %xmm1
170 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
171 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
172 ; AVX2-NEXT: vmovq %rax, %xmm2
173 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
174 ; AVX2-NEXT: vcvttsd2si %xmm0, %rax
175 ; AVX2-NEXT: vmovq %rax, %xmm0
176 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
177 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
180 ; AVX512F-LABEL: fptosi_4f64_to_4i64:
182 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
183 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
184 ; AVX512F-NEXT: vmovq %rax, %xmm2
185 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
186 ; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
187 ; AVX512F-NEXT: vmovq %rax, %xmm1
188 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
189 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
190 ; AVX512F-NEXT: vmovq %rax, %xmm2
191 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
192 ; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
193 ; AVX512F-NEXT: vmovq %rax, %xmm0
194 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
195 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
198 ; AVX512VL-LABEL: fptosi_4f64_to_4i64:
200 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
201 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
202 ; AVX512VL-NEXT: vmovq %rax, %xmm2
203 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
204 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
205 ; AVX512VL-NEXT: vmovq %rax, %xmm1
206 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
207 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
208 ; AVX512VL-NEXT: vmovq %rax, %xmm2
209 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
210 ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
211 ; AVX512VL-NEXT: vmovq %rax, %xmm0
212 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
213 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
214 ; AVX512VL-NEXT: retq
216 ; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
218 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
219 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
220 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
221 ; AVX512DQ-NEXT: retq
223 ; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
224 ; AVX512VLDQ: # %bb.0:
225 ; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
226 ; AVX512VLDQ-NEXT: retq
227 %cvt = fptosi <4 x double> %a to <4 x i64>
231 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
232 ; SSE-LABEL: fptosi_4f64_to_4i32:
234 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
235 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
236 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
239 ; AVX-LABEL: fptosi_4f64_to_4i32:
241 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
242 ; AVX-NEXT: vzeroupper
244 %cvt = fptosi <4 x double> %a to <4 x i32>
249 ; Double to Unsigned Integer
252 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
253 ; SSE-LABEL: fptoui_2f64_to_2i64:
255 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
256 ; SSE-NEXT: movapd %xmm0, %xmm1
257 ; SSE-NEXT: subsd %xmm2, %xmm1
258 ; SSE-NEXT: cvttsd2si %xmm1, %rax
259 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
260 ; SSE-NEXT: xorq %rcx, %rax
261 ; SSE-NEXT: cvttsd2si %xmm0, %rdx
262 ; SSE-NEXT: ucomisd %xmm2, %xmm0
263 ; SSE-NEXT: cmovaeq %rax, %rdx
264 ; SSE-NEXT: movq %rdx, %xmm1
265 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
266 ; SSE-NEXT: movapd %xmm0, %xmm3
267 ; SSE-NEXT: subsd %xmm2, %xmm3
268 ; SSE-NEXT: cvttsd2si %xmm3, %rax
269 ; SSE-NEXT: xorq %rcx, %rax
270 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
271 ; SSE-NEXT: ucomisd %xmm2, %xmm0
272 ; SSE-NEXT: cmovaeq %rax, %rcx
273 ; SSE-NEXT: movq %rcx, %xmm0
274 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
275 ; SSE-NEXT: movdqa %xmm1, %xmm0
278 ; VEX-LABEL: fptoui_2f64_to_2i64:
280 ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
281 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
282 ; VEX-NEXT: vcvttsd2si %xmm2, %rax
283 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
284 ; VEX-NEXT: xorq %rcx, %rax
285 ; VEX-NEXT: vcvttsd2si %xmm0, %rdx
286 ; VEX-NEXT: vucomisd %xmm1, %xmm0
287 ; VEX-NEXT: cmovaeq %rax, %rdx
288 ; VEX-NEXT: vmovq %rdx, %xmm2
289 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
290 ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
291 ; VEX-NEXT: vcvttsd2si %xmm3, %rax
292 ; VEX-NEXT: xorq %rcx, %rax
293 ; VEX-NEXT: vcvttsd2si %xmm0, %rcx
294 ; VEX-NEXT: vucomisd %xmm1, %xmm0
295 ; VEX-NEXT: cmovaeq %rax, %rcx
296 ; VEX-NEXT: vmovq %rcx, %xmm0
297 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
300 ; AVX512F-LABEL: fptoui_2f64_to_2i64:
302 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
303 ; AVX512F-NEXT: vmovq %rax, %xmm1
304 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
305 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
306 ; AVX512F-NEXT: vmovq %rax, %xmm0
307 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
310 ; AVX512VL-LABEL: fptoui_2f64_to_2i64:
312 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
313 ; AVX512VL-NEXT: vmovq %rax, %xmm1
314 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
315 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
316 ; AVX512VL-NEXT: vmovq %rax, %xmm0
317 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
318 ; AVX512VL-NEXT: retq
320 ; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
322 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
323 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
324 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
325 ; AVX512DQ-NEXT: vzeroupper
326 ; AVX512DQ-NEXT: retq
328 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
329 ; AVX512VLDQ: # %bb.0:
330 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
331 ; AVX512VLDQ-NEXT: retq
332 %cvt = fptoui <2 x double> %a to <2 x i64>
336 define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
337 ; SSE-LABEL: fptoui_2f64_to_4i32:
339 ; SSE-NEXT: cvttsd2si %xmm0, %rax
340 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
341 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
342 ; SSE-NEXT: movd %eax, %xmm0
343 ; SSE-NEXT: movd %ecx, %xmm1
344 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
345 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
348 ; AVX1-LABEL: fptoui_2f64_to_4i32:
350 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
351 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
352 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
353 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
354 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
355 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
356 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
357 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
358 ; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
359 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
360 ; AVX1-NEXT: vzeroupper
363 ; AVX2-LABEL: fptoui_2f64_to_4i32:
365 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
366 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
367 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
368 ; AVX2-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
369 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
370 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
371 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
372 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
373 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
374 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
375 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
376 ; AVX2-NEXT: vzeroupper
379 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
381 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
382 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
383 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
384 ; AVX512F-NEXT: vzeroupper
387 ; AVX512VL-LABEL: fptoui_2f64_to_4i32:
389 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
390 ; AVX512VL-NEXT: retq
392 ; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
394 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
395 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
396 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
397 ; AVX512DQ-NEXT: vzeroupper
398 ; AVX512DQ-NEXT: retq
400 ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
401 ; AVX512VLDQ: # %bb.0:
402 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
403 ; AVX512VLDQ-NEXT: retq
404 %cvt = fptoui <2 x double> %a to <2 x i32>
405 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
409 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
410 ; SSE-LABEL: fptoui_2f64_to_2i32:
412 ; SSE-NEXT: cvttsd2si %xmm0, %rax
413 ; SSE-NEXT: movd %eax, %xmm1
414 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
415 ; SSE-NEXT: cvttsd2si %xmm0, %rax
416 ; SSE-NEXT: movd %eax, %xmm0
417 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
418 ; SSE-NEXT: movdqa %xmm1, %xmm0
421 ; AVX1-LABEL: fptoui_2f64_to_2i32:
423 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
424 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
425 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
426 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
427 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
428 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
429 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
430 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
431 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
432 ; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
433 ; AVX1-NEXT: vzeroupper
436 ; AVX2-LABEL: fptoui_2f64_to_2i32:
438 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
439 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
440 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
441 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
442 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
443 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
444 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
445 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
446 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
447 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
448 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
449 ; AVX2-NEXT: vzeroupper
452 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
454 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
455 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
456 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
457 ; AVX512F-NEXT: vzeroupper
460 ; AVX512VL-LABEL: fptoui_2f64_to_2i32:
462 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
463 ; AVX512VL-NEXT: retq
465 ; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
467 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
468 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
469 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
470 ; AVX512DQ-NEXT: vzeroupper
471 ; AVX512DQ-NEXT: retq
473 ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
474 ; AVX512VLDQ: # %bb.0:
475 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
476 ; AVX512VLDQ-NEXT: retq
477 %cvt = fptoui <2 x double> %a to <2 x i32>
478 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
482 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
483 ; SSE-LABEL: fptoui_4f64_to_2i32:
485 ; SSE-NEXT: cvttsd2si %xmm0, %rax
486 ; SSE-NEXT: movd %eax, %xmm1
487 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
488 ; SSE-NEXT: cvttsd2si %xmm0, %rax
489 ; SSE-NEXT: movd %eax, %xmm0
490 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
491 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
494 ; AVX1-LABEL: fptoui_4f64_to_2i32:
496 ; AVX1-NEXT: vmovapd %xmm0, %xmm0
497 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
498 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
499 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
500 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
501 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1
502 ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
503 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
504 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
505 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
506 ; AVX1-NEXT: vzeroupper
509 ; AVX2-LABEL: fptoui_4f64_to_2i32:
511 ; AVX2-NEXT: vmovapd %xmm0, %xmm0
512 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
513 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
514 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
515 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
516 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
517 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
518 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
519 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
520 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
521 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
522 ; AVX2-NEXT: vzeroupper
525 ; AVX512F-LABEL: fptoui_4f64_to_2i32:
527 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
528 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
529 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
530 ; AVX512F-NEXT: vzeroupper
533 ; AVX512VL-LABEL: fptoui_4f64_to_2i32:
535 ; AVX512VL-NEXT: vmovaps %xmm0, %xmm0
536 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
537 ; AVX512VL-NEXT: vzeroupper
538 ; AVX512VL-NEXT: retq
540 ; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
542 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
543 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
544 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
545 ; AVX512DQ-NEXT: vzeroupper
546 ; AVX512DQ-NEXT: retq
548 ; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
549 ; AVX512VLDQ: # %bb.0:
550 ; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0
551 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
552 ; AVX512VLDQ-NEXT: vzeroupper
553 ; AVX512VLDQ-NEXT: retq
554 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
555 %cvt = fptoui <4 x double> %ext to <4 x i32>
559 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
560 ; SSE-LABEL: fptoui_4f64_to_4i64:
562 ; SSE-NEXT: movapd %xmm0, %xmm2
563 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
564 ; SSE-NEXT: subsd %xmm3, %xmm0
565 ; SSE-NEXT: cvttsd2si %xmm0, %rcx
566 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
567 ; SSE-NEXT: xorq %rax, %rcx
568 ; SSE-NEXT: cvttsd2si %xmm2, %rdx
569 ; SSE-NEXT: ucomisd %xmm3, %xmm2
570 ; SSE-NEXT: cmovaeq %rcx, %rdx
571 ; SSE-NEXT: movq %rdx, %xmm0
572 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
573 ; SSE-NEXT: movapd %xmm2, %xmm4
574 ; SSE-NEXT: subsd %xmm3, %xmm4
575 ; SSE-NEXT: cvttsd2si %xmm4, %rcx
576 ; SSE-NEXT: xorq %rax, %rcx
577 ; SSE-NEXT: cvttsd2si %xmm2, %rdx
578 ; SSE-NEXT: ucomisd %xmm3, %xmm2
579 ; SSE-NEXT: cmovaeq %rcx, %rdx
580 ; SSE-NEXT: movq %rdx, %xmm2
581 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
582 ; SSE-NEXT: movapd %xmm1, %xmm2
583 ; SSE-NEXT: subsd %xmm3, %xmm2
584 ; SSE-NEXT: cvttsd2si %xmm2, %rcx
585 ; SSE-NEXT: xorq %rax, %rcx
586 ; SSE-NEXT: cvttsd2si %xmm1, %rdx
587 ; SSE-NEXT: ucomisd %xmm3, %xmm1
588 ; SSE-NEXT: cmovaeq %rcx, %rdx
589 ; SSE-NEXT: movq %rdx, %xmm2
590 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
591 ; SSE-NEXT: movapd %xmm1, %xmm4
592 ; SSE-NEXT: subsd %xmm3, %xmm4
593 ; SSE-NEXT: cvttsd2si %xmm4, %rcx
594 ; SSE-NEXT: xorq %rax, %rcx
595 ; SSE-NEXT: cvttsd2si %xmm1, %rax
596 ; SSE-NEXT: ucomisd %xmm3, %xmm1
597 ; SSE-NEXT: cmovaeq %rcx, %rax
598 ; SSE-NEXT: movq %rax, %xmm1
599 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
600 ; SSE-NEXT: movdqa %xmm2, %xmm1
603 ; AVX1-LABEL: fptoui_4f64_to_4i64:
605 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
606 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
607 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
608 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
609 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
610 ; AVX1-NEXT: xorq %rcx, %rax
611 ; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
612 ; AVX1-NEXT: vucomisd %xmm1, %xmm2
613 ; AVX1-NEXT: cmovaeq %rax, %rdx
614 ; AVX1-NEXT: vmovq %rdx, %xmm3
615 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
616 ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
617 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
618 ; AVX1-NEXT: xorq %rcx, %rax
619 ; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
620 ; AVX1-NEXT: vucomisd %xmm1, %xmm2
621 ; AVX1-NEXT: cmovaeq %rax, %rdx
622 ; AVX1-NEXT: vmovq %rdx, %xmm2
623 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
624 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
625 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax
626 ; AVX1-NEXT: xorq %rcx, %rax
627 ; AVX1-NEXT: vcvttsd2si %xmm0, %rdx
628 ; AVX1-NEXT: vucomisd %xmm1, %xmm0
629 ; AVX1-NEXT: cmovaeq %rax, %rdx
630 ; AVX1-NEXT: vmovq %rdx, %xmm3
631 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
632 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4
633 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax
634 ; AVX1-NEXT: xorq %rcx, %rax
635 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
636 ; AVX1-NEXT: vucomisd %xmm1, %xmm0
637 ; AVX1-NEXT: cmovaeq %rax, %rcx
638 ; AVX1-NEXT: vmovq %rcx, %xmm0
639 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
640 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
643 ; AVX2-LABEL: fptoui_4f64_to_4i64:
645 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
646 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
647 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
648 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
649 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
650 ; AVX2-NEXT: xorq %rcx, %rax
651 ; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
652 ; AVX2-NEXT: vucomisd %xmm1, %xmm2
653 ; AVX2-NEXT: cmovaeq %rax, %rdx
654 ; AVX2-NEXT: vmovq %rdx, %xmm3
655 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
656 ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
657 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
658 ; AVX2-NEXT: xorq %rcx, %rax
659 ; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
660 ; AVX2-NEXT: vucomisd %xmm1, %xmm2
661 ; AVX2-NEXT: cmovaeq %rax, %rdx
662 ; AVX2-NEXT: vmovq %rdx, %xmm2
663 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
664 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
665 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax
666 ; AVX2-NEXT: xorq %rcx, %rax
667 ; AVX2-NEXT: vcvttsd2si %xmm0, %rdx
668 ; AVX2-NEXT: vucomisd %xmm1, %xmm0
669 ; AVX2-NEXT: cmovaeq %rax, %rdx
670 ; AVX2-NEXT: vmovq %rdx, %xmm3
671 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
672 ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4
673 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax
674 ; AVX2-NEXT: xorq %rcx, %rax
675 ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
676 ; AVX2-NEXT: vucomisd %xmm1, %xmm0
677 ; AVX2-NEXT: cmovaeq %rax, %rcx
678 ; AVX2-NEXT: vmovq %rcx, %xmm0
679 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
680 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
683 ; AVX512F-LABEL: fptoui_4f64_to_4i64:
685 ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
686 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
687 ; AVX512F-NEXT: vmovq %rax, %xmm2
688 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
689 ; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
690 ; AVX512F-NEXT: vmovq %rax, %xmm1
691 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
692 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
693 ; AVX512F-NEXT: vmovq %rax, %xmm2
694 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
695 ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
696 ; AVX512F-NEXT: vmovq %rax, %xmm0
697 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
698 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
701 ; AVX512VL-LABEL: fptoui_4f64_to_4i64:
703 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
704 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
705 ; AVX512VL-NEXT: vmovq %rax, %xmm2
706 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
707 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
708 ; AVX512VL-NEXT: vmovq %rax, %xmm1
709 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
710 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
711 ; AVX512VL-NEXT: vmovq %rax, %xmm2
712 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
713 ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
714 ; AVX512VL-NEXT: vmovq %rax, %xmm0
715 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
716 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
717 ; AVX512VL-NEXT: retq
719 ; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
721 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
722 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
723 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
724 ; AVX512DQ-NEXT: retq
726 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
727 ; AVX512VLDQ: # %bb.0:
728 ; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
729 ; AVX512VLDQ-NEXT: retq
730 %cvt = fptoui <4 x double> %a to <4 x i64>
734 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
735 ; SSE-LABEL: fptoui_4f64_to_4i32:
737 ; SSE-NEXT: cvttsd2si %xmm1, %rax
738 ; SSE-NEXT: movd %eax, %xmm2
739 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
740 ; SSE-NEXT: cvttsd2si %xmm1, %rax
741 ; SSE-NEXT: movd %eax, %xmm1
742 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
743 ; SSE-NEXT: cvttsd2si %xmm0, %rax
744 ; SSE-NEXT: movd %eax, %xmm1
745 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
746 ; SSE-NEXT: cvttsd2si %xmm0, %rax
747 ; SSE-NEXT: movd %eax, %xmm0
748 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
749 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
750 ; SSE-NEXT: movdqa %xmm1, %xmm0
753 ; AVX1-LABEL: fptoui_4f64_to_4i32:
755 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
756 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
757 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
758 ; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
759 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1
760 ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
761 ; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
762 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
763 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
764 ; AVX1-NEXT: vzeroupper
767 ; AVX2-LABEL: fptoui_4f64_to_4i32:
769 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
770 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
771 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
772 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
773 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
774 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
775 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
776 ; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1
777 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
778 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
779 ; AVX2-NEXT: vzeroupper
782 ; AVX512F-LABEL: fptoui_4f64_to_4i32:
784 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
785 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
786 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
787 ; AVX512F-NEXT: vzeroupper
790 ; AVX512VL-LABEL: fptoui_4f64_to_4i32:
792 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
793 ; AVX512VL-NEXT: vzeroupper
794 ; AVX512VL-NEXT: retq
796 ; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
798 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
799 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
800 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
801 ; AVX512DQ-NEXT: vzeroupper
802 ; AVX512DQ-NEXT: retq
804 ; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
805 ; AVX512VLDQ: # %bb.0:
806 ; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
807 ; AVX512VLDQ-NEXT: vzeroupper
808 ; AVX512VLDQ-NEXT: retq
809 %cvt = fptoui <4 x double> %a to <4 x i32>
814 ; Float to Signed Integer
817 define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
818 ; SSE-LABEL: fptosi_2f32_to_2i32:
820 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
823 ; AVX-LABEL: fptosi_2f32_to_2i32:
825 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
827 %cvt = fptosi <2 x float> %a to <2 x i32>
831 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
832 ; SSE-LABEL: fptosi_4f32_to_4i32:
834 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
837 ; AVX-LABEL: fptosi_4f32_to_4i32:
839 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
841 %cvt = fptosi <4 x float> %a to <4 x i32>
845 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
846 ; SSE-LABEL: fptosi_2f32_to_2i64:
848 ; SSE-NEXT: cvttss2si %xmm0, %rax
849 ; SSE-NEXT: movq %rax, %xmm1
850 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
851 ; SSE-NEXT: cvttss2si %xmm0, %rax
852 ; SSE-NEXT: movq %rax, %xmm0
853 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
854 ; SSE-NEXT: movdqa %xmm1, %xmm0
857 ; VEX-LABEL: fptosi_2f32_to_2i64:
859 ; VEX-NEXT: vcvttss2si %xmm0, %rax
860 ; VEX-NEXT: vmovq %rax, %xmm1
861 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
862 ; VEX-NEXT: vcvttss2si %xmm0, %rax
863 ; VEX-NEXT: vmovq %rax, %xmm0
864 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
867 ; AVX512F-LABEL: fptosi_2f32_to_2i64:
869 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
870 ; AVX512F-NEXT: vmovq %rax, %xmm1
871 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
872 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
873 ; AVX512F-NEXT: vmovq %rax, %xmm0
874 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
877 ; AVX512VL-LABEL: fptosi_2f32_to_2i64:
879 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
880 ; AVX512VL-NEXT: vmovq %rax, %xmm1
881 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
882 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
883 ; AVX512VL-NEXT: vmovq %rax, %xmm0
884 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
885 ; AVX512VL-NEXT: retq
887 ; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
889 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
890 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
891 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
892 ; AVX512DQ-NEXT: vzeroupper
893 ; AVX512DQ-NEXT: retq
895 ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
896 ; AVX512VLDQ: # %bb.0:
897 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
898 ; AVX512VLDQ-NEXT: retq
899 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
900 %cvt = fptosi <2 x float> %shuf to <2 x i64>
904 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
905 ; SSE-LABEL: fptosi_4f32_to_2i64:
907 ; SSE-NEXT: cvttss2si %xmm0, %rax
908 ; SSE-NEXT: movq %rax, %xmm1
909 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
910 ; SSE-NEXT: cvttss2si %xmm0, %rax
911 ; SSE-NEXT: movq %rax, %xmm0
912 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
913 ; SSE-NEXT: movdqa %xmm1, %xmm0
916 ; VEX-LABEL: fptosi_4f32_to_2i64:
918 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
919 ; VEX-NEXT: vcvttss2si %xmm1, %rax
920 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
921 ; VEX-NEXT: vmovq %rcx, %xmm0
922 ; VEX-NEXT: vmovq %rax, %xmm1
923 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
926 ; AVX512F-LABEL: fptosi_4f32_to_2i64:
928 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
929 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
930 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
931 ; AVX512F-NEXT: vmovq %rcx, %xmm0
932 ; AVX512F-NEXT: vmovq %rax, %xmm1
933 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
936 ; AVX512VL-LABEL: fptosi_4f32_to_2i64:
938 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
939 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
940 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
941 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
942 ; AVX512VL-NEXT: vmovq %rax, %xmm1
943 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
944 ; AVX512VL-NEXT: retq
946 ; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
948 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
949 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
950 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
951 ; AVX512DQ-NEXT: vzeroupper
952 ; AVX512DQ-NEXT: retq
954 ; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
955 ; AVX512VLDQ: # %bb.0:
956 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
957 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
958 ; AVX512VLDQ-NEXT: vzeroupper
959 ; AVX512VLDQ-NEXT: retq
960 %cvt = fptosi <4 x float> %a to <4 x i64>
961 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
965 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
966 ; SSE-LABEL: fptosi_8f32_to_8i32:
968 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
969 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
972 ; AVX-LABEL: fptosi_8f32_to_8i32:
974 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
976 %cvt = fptosi <8 x float> %a to <8 x i32>
980 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
981 ; SSE-LABEL: fptosi_4f32_to_4i64:
983 ; SSE-NEXT: cvttss2si %xmm0, %rax
984 ; SSE-NEXT: movq %rax, %xmm2
985 ; SSE-NEXT: movaps %xmm0, %xmm1
986 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
987 ; SSE-NEXT: cvttss2si %xmm1, %rax
988 ; SSE-NEXT: movq %rax, %xmm1
989 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
990 ; SSE-NEXT: movaps %xmm0, %xmm1
991 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
992 ; SSE-NEXT: cvttss2si %xmm1, %rax
993 ; SSE-NEXT: movq %rax, %xmm3
994 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
995 ; SSE-NEXT: cvttss2si %xmm0, %rax
996 ; SSE-NEXT: movq %rax, %xmm1
997 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
998 ; SSE-NEXT: movdqa %xmm2, %xmm0
1001 ; AVX1-LABEL: fptosi_4f32_to_4i64:
1003 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1004 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1005 ; AVX1-NEXT: vmovq %rax, %xmm1
1006 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1007 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1008 ; AVX1-NEXT: vmovq %rax, %xmm2
1009 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1010 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1011 ; AVX1-NEXT: vmovq %rax, %xmm2
1012 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1013 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1014 ; AVX1-NEXT: vmovq %rax, %xmm0
1015 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1016 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1019 ; AVX2-LABEL: fptosi_4f32_to_4i64:
1021 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1022 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1023 ; AVX2-NEXT: vmovq %rax, %xmm1
1024 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1025 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1026 ; AVX2-NEXT: vmovq %rax, %xmm2
1027 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1028 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1029 ; AVX2-NEXT: vmovq %rax, %xmm2
1030 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1031 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1032 ; AVX2-NEXT: vmovq %rax, %xmm0
1033 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1034 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1037 ; AVX512F-LABEL: fptosi_4f32_to_4i64:
1039 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1040 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1041 ; AVX512F-NEXT: vmovq %rax, %xmm1
1042 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1043 ; AVX512F-NEXT: vcvttss2si %xmm2, %rax
1044 ; AVX512F-NEXT: vmovq %rax, %xmm2
1045 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1046 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1047 ; AVX512F-NEXT: vmovq %rax, %xmm2
1048 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1049 ; AVX512F-NEXT: vcvttss2si %xmm0, %rax
1050 ; AVX512F-NEXT: vmovq %rax, %xmm0
1051 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1052 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1053 ; AVX512F-NEXT: retq
1055 ; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1056 ; AVX512VL: # %bb.0:
1057 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1058 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1059 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1060 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1061 ; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
1062 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1063 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1064 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1065 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1066 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1067 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
1068 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1069 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1070 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1071 ; AVX512VL-NEXT: retq
1073 ; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1074 ; AVX512DQ: # %bb.0:
1075 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1076 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1077 ; AVX512DQ-NEXT: retq
1079 ; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1080 ; AVX512VLDQ: # %bb.0:
1081 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
1082 ; AVX512VLDQ-NEXT: retq
1083 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1084 %cvt = fptosi <4 x float> %shuf to <4 x i64>
1088 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1089 ; SSE-LABEL: fptosi_8f32_to_4i64:
1091 ; SSE-NEXT: cvttss2si %xmm0, %rax
1092 ; SSE-NEXT: movq %rax, %xmm2
1093 ; SSE-NEXT: movaps %xmm0, %xmm1
1094 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
1095 ; SSE-NEXT: cvttss2si %xmm1, %rax
1096 ; SSE-NEXT: movq %rax, %xmm1
1097 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1098 ; SSE-NEXT: movaps %xmm0, %xmm1
1099 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
1100 ; SSE-NEXT: cvttss2si %xmm1, %rax
1101 ; SSE-NEXT: movq %rax, %xmm3
1102 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1103 ; SSE-NEXT: cvttss2si %xmm0, %rax
1104 ; SSE-NEXT: movq %rax, %xmm1
1105 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1106 ; SSE-NEXT: movdqa %xmm2, %xmm0
1109 ; AVX1-LABEL: fptosi_8f32_to_4i64:
1111 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1112 ; AVX1-NEXT: vcvttss2si %xmm1, %rax
1113 ; AVX1-NEXT: vmovq %rax, %xmm1
1114 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1115 ; AVX1-NEXT: vcvttss2si %xmm2, %rax
1116 ; AVX1-NEXT: vmovq %rax, %xmm2
1117 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1118 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1119 ; AVX1-NEXT: vmovq %rax, %xmm2
1120 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1121 ; AVX1-NEXT: vcvttss2si %xmm0, %rax
1122 ; AVX1-NEXT: vmovq %rax, %xmm0
1123 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1124 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1127 ; AVX2-LABEL: fptosi_8f32_to_4i64:
1129 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1130 ; AVX2-NEXT: vcvttss2si %xmm1, %rax
1131 ; AVX2-NEXT: vmovq %rax, %xmm1
1132 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1133 ; AVX2-NEXT: vcvttss2si %xmm2, %rax
1134 ; AVX2-NEXT: vmovq %rax, %xmm2
1135 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1136 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1137 ; AVX2-NEXT: vmovq %rax, %xmm2
1138 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1139 ; AVX2-NEXT: vcvttss2si %xmm0, %rax
1140 ; AVX2-NEXT: vmovq %rax, %xmm0
1141 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1142 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1145 ; AVX512F-LABEL: fptosi_8f32_to_4i64:
1147 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1148 ; AVX512F-NEXT: vcvttss2si %xmm1, %rax
1149 ; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
1150 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1151 ; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
1152 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1153 ; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
1154 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1155 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1156 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1157 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1158 ; AVX512F-NEXT: vmovq %rax, %xmm2
1159 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1160 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1161 ; AVX512F-NEXT: retq
1163 ; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1164 ; AVX512VL: # %bb.0:
1165 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1166 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
1167 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
1168 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1169 ; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
1170 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1171 ; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
1172 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1173 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1174 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1175 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1176 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1177 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1178 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1179 ; AVX512VL-NEXT: retq
1181 ; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1182 ; AVX512DQ: # %bb.0:
1183 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
1184 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1185 ; AVX512DQ-NEXT: retq
1187 ; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1188 ; AVX512VLDQ: # %bb.0:
1189 ; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
1190 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1191 ; AVX512VLDQ-NEXT: retq
1192 %cvt = fptosi <8 x float> %a to <8 x i64>
1193 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1198 ; Float to Unsigned Integer
1201 define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1202 ; SSE-LABEL: fptoui_2f32_to_2i32:
1204 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1205 ; SSE-NEXT: movaps %xmm0, %xmm1
1206 ; SSE-NEXT: cmpltps %xmm2, %xmm1
1207 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1208 ; SSE-NEXT: subps %xmm2, %xmm0
1209 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1210 ; SSE-NEXT: xorps {{.*}}(%rip), %xmm0
1211 ; SSE-NEXT: andps %xmm1, %xmm3
1212 ; SSE-NEXT: andnps %xmm0, %xmm1
1213 ; SSE-NEXT: orps %xmm3, %xmm1
1214 ; SSE-NEXT: movaps %xmm1, %xmm0
1217 ; AVX1-LABEL: fptoui_2f32_to_2i32:
1219 ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1220 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1221 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
1222 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
1223 ; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
1224 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1225 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1228 ; AVX2-LABEL: fptoui_2f32_to_2i32:
1230 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1231 ; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1232 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1233 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1234 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1235 ; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1
1236 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1237 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1240 ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1242 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1243 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1244 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1245 ; AVX512F-NEXT: vzeroupper
1246 ; AVX512F-NEXT: retq
1248 ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1249 ; AVX512VL: # %bb.0:
1250 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1251 ; AVX512VL-NEXT: retq
1253 ; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1254 ; AVX512DQ: # %bb.0:
1255 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1256 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1257 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1258 ; AVX512DQ-NEXT: vzeroupper
1259 ; AVX512DQ-NEXT: retq
1261 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1262 ; AVX512VLDQ: # %bb.0:
1263 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1264 ; AVX512VLDQ-NEXT: retq
1265 %cvt = fptoui <2 x float> %a to <2 x i32>
1269 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1270 ; SSE-LABEL: fptoui_4f32_to_4i32:
1272 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1273 ; SSE-NEXT: movaps %xmm0, %xmm1
1274 ; SSE-NEXT: cmpltps %xmm2, %xmm1
1275 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1276 ; SSE-NEXT: subps %xmm2, %xmm0
1277 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1278 ; SSE-NEXT: xorps {{.*}}(%rip), %xmm0
1279 ; SSE-NEXT: andps %xmm1, %xmm3
1280 ; SSE-NEXT: andnps %xmm0, %xmm1
1281 ; SSE-NEXT: orps %xmm3, %xmm1
1282 ; SSE-NEXT: movaps %xmm1, %xmm0
1285 ; AVX1-LABEL: fptoui_4f32_to_4i32:
1287 ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1288 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1289 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
1290 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
1291 ; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
1292 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
1293 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1296 ; AVX2-LABEL: fptoui_4f32_to_4i32:
1298 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1299 ; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
1300 ; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1
1301 ; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1
1302 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1303 ; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1
1304 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1305 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1308 ; AVX512F-LABEL: fptoui_4f32_to_4i32:
1310 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1311 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1312 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1313 ; AVX512F-NEXT: vzeroupper
1314 ; AVX512F-NEXT: retq
1316 ; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1317 ; AVX512VL: # %bb.0:
1318 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1319 ; AVX512VL-NEXT: retq
1321 ; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1322 ; AVX512DQ: # %bb.0:
1323 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1324 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1325 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1326 ; AVX512DQ-NEXT: vzeroupper
1327 ; AVX512DQ-NEXT: retq
1329 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1330 ; AVX512VLDQ: # %bb.0:
1331 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1332 ; AVX512VLDQ-NEXT: retq
1333 %cvt = fptoui <4 x float> %a to <4 x i32>
1337 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1338 ; SSE-LABEL: fptoui_2f32_to_2i64:
1340 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1341 ; SSE-NEXT: movaps %xmm0, %xmm1
1342 ; SSE-NEXT: subss %xmm2, %xmm1
1343 ; SSE-NEXT: cvttss2si %xmm1, %rax
1344 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1345 ; SSE-NEXT: xorq %rcx, %rax
1346 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1347 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1348 ; SSE-NEXT: cmovaeq %rax, %rdx
1349 ; SSE-NEXT: movq %rdx, %xmm1
1350 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
1351 ; SSE-NEXT: movaps %xmm0, %xmm3
1352 ; SSE-NEXT: subss %xmm2, %xmm3
1353 ; SSE-NEXT: cvttss2si %xmm3, %rax
1354 ; SSE-NEXT: xorq %rcx, %rax
1355 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1356 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1357 ; SSE-NEXT: cmovaeq %rax, %rcx
1358 ; SSE-NEXT: movq %rcx, %xmm0
1359 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1360 ; SSE-NEXT: movdqa %xmm1, %xmm0
1363 ; VEX-LABEL: fptoui_2f32_to_2i64:
1365 ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1366 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
1367 ; VEX-NEXT: vcvttss2si %xmm2, %rax
1368 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1369 ; VEX-NEXT: xorq %rcx, %rax
1370 ; VEX-NEXT: vcvttss2si %xmm0, %rdx
1371 ; VEX-NEXT: vucomiss %xmm1, %xmm0
1372 ; VEX-NEXT: cmovaeq %rax, %rdx
1373 ; VEX-NEXT: vmovq %rdx, %xmm2
1374 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1375 ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3
1376 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1377 ; VEX-NEXT: xorq %rcx, %rax
1378 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1379 ; VEX-NEXT: vucomiss %xmm1, %xmm0
1380 ; VEX-NEXT: cmovaeq %rax, %rcx
1381 ; VEX-NEXT: vmovq %rcx, %xmm0
1382 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1385 ; AVX512F-LABEL: fptoui_2f32_to_2i64:
1387 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1388 ; AVX512F-NEXT: vmovq %rax, %xmm1
1389 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1390 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1391 ; AVX512F-NEXT: vmovq %rax, %xmm0
1392 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1393 ; AVX512F-NEXT: retq
1395 ; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1396 ; AVX512VL: # %bb.0:
1397 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1398 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1399 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1400 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1401 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1402 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1403 ; AVX512VL-NEXT: retq
1405 ; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1406 ; AVX512DQ: # %bb.0:
1407 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1408 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1409 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1410 ; AVX512DQ-NEXT: vzeroupper
1411 ; AVX512DQ-NEXT: retq
1413 ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1414 ; AVX512VLDQ: # %bb.0:
1415 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1416 ; AVX512VLDQ-NEXT: retq
1417 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1418 %cvt = fptoui <2 x float> %shuf to <2 x i64>
1422 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1423 ; SSE-LABEL: fptoui_4f32_to_2i64:
1425 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1426 ; SSE-NEXT: movaps %xmm0, %xmm1
1427 ; SSE-NEXT: subss %xmm2, %xmm1
1428 ; SSE-NEXT: cvttss2si %xmm1, %rax
1429 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1430 ; SSE-NEXT: xorq %rcx, %rax
1431 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1432 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1433 ; SSE-NEXT: cmovaeq %rax, %rdx
1434 ; SSE-NEXT: movq %rdx, %xmm1
1435 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
1436 ; SSE-NEXT: movaps %xmm0, %xmm3
1437 ; SSE-NEXT: subss %xmm2, %xmm3
1438 ; SSE-NEXT: cvttss2si %xmm3, %rax
1439 ; SSE-NEXT: xorq %rcx, %rax
1440 ; SSE-NEXT: cvttss2si %xmm0, %rcx
1441 ; SSE-NEXT: ucomiss %xmm2, %xmm0
1442 ; SSE-NEXT: cmovaeq %rax, %rcx
1443 ; SSE-NEXT: movq %rcx, %xmm0
1444 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1445 ; SSE-NEXT: movdqa %xmm1, %xmm0
1448 ; VEX-LABEL: fptoui_4f32_to_2i64:
1450 ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1451 ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1452 ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
1453 ; VEX-NEXT: vcvttss2si %xmm3, %rax
1454 ; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1455 ; VEX-NEXT: xorq %rcx, %rax
1456 ; VEX-NEXT: vcvttss2si %xmm1, %rdx
1457 ; VEX-NEXT: vucomiss %xmm2, %xmm1
1458 ; VEX-NEXT: cmovaeq %rax, %rdx
1459 ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
1460 ; VEX-NEXT: vcvttss2si %xmm1, %rax
1461 ; VEX-NEXT: xorq %rcx, %rax
1462 ; VEX-NEXT: vcvttss2si %xmm0, %rcx
1463 ; VEX-NEXT: vucomiss %xmm2, %xmm0
1464 ; VEX-NEXT: cmovaeq %rax, %rcx
1465 ; VEX-NEXT: vmovq %rcx, %xmm0
1466 ; VEX-NEXT: vmovq %rdx, %xmm1
1467 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1470 ; AVX512F-LABEL: fptoui_4f32_to_2i64:
1472 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1473 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1474 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1475 ; AVX512F-NEXT: vmovq %rcx, %xmm0
1476 ; AVX512F-NEXT: vmovq %rax, %xmm1
1477 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1478 ; AVX512F-NEXT: retq
1480 ; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1481 ; AVX512VL: # %bb.0:
1482 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1483 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1484 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1485 ; AVX512VL-NEXT: vmovq %rcx, %xmm0
1486 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1487 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1488 ; AVX512VL-NEXT: retq
1490 ; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1491 ; AVX512DQ: # %bb.0:
1492 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1493 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1494 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1495 ; AVX512DQ-NEXT: vzeroupper
1496 ; AVX512DQ-NEXT: retq
1498 ; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1499 ; AVX512VLDQ: # %bb.0:
1500 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1501 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1502 ; AVX512VLDQ-NEXT: vzeroupper
1503 ; AVX512VLDQ-NEXT: retq
1504 %cvt = fptoui <4 x float> %a to <4 x i64>
1505 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1509 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1510 ; SSE-LABEL: fptoui_8f32_to_8i32:
1512 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1513 ; SSE-NEXT: movaps %xmm0, %xmm2
1514 ; SSE-NEXT: cmpltps %xmm4, %xmm2
1515 ; SSE-NEXT: cvttps2dq %xmm0, %xmm3
1516 ; SSE-NEXT: subps %xmm4, %xmm0
1517 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1518 ; SSE-NEXT: movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1519 ; SSE-NEXT: xorps %xmm5, %xmm0
1520 ; SSE-NEXT: andps %xmm2, %xmm3
1521 ; SSE-NEXT: andnps %xmm0, %xmm2
1522 ; SSE-NEXT: orps %xmm3, %xmm2
1523 ; SSE-NEXT: movaps %xmm1, %xmm3
1524 ; SSE-NEXT: cmpltps %xmm4, %xmm3
1525 ; SSE-NEXT: cvttps2dq %xmm1, %xmm0
1526 ; SSE-NEXT: subps %xmm4, %xmm1
1527 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
1528 ; SSE-NEXT: xorps %xmm5, %xmm1
1529 ; SSE-NEXT: andps %xmm3, %xmm0
1530 ; SSE-NEXT: andnps %xmm1, %xmm3
1531 ; SSE-NEXT: orps %xmm0, %xmm3
1532 ; SSE-NEXT: movaps %xmm2, %xmm0
1533 ; SSE-NEXT: movaps %xmm3, %xmm1
1536 ; AVX1-LABEL: fptoui_8f32_to_8i32:
1538 ; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1539 ; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
1540 ; AVX1-NEXT: vsubps %ymm1, %ymm0, %ymm1
1541 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
1542 ; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
1543 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
1544 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1547 ; AVX2-LABEL: fptoui_8f32_to_8i32:
1549 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1550 ; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
1551 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1
1552 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
1553 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
1554 ; AVX2-NEXT: vxorps %ymm3, %ymm1, %ymm1
1555 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
1556 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1559 ; AVX512F-LABEL: fptoui_8f32_to_8i32:
1561 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1562 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1563 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1564 ; AVX512F-NEXT: retq
1566 ; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1567 ; AVX512VL: # %bb.0:
1568 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1569 ; AVX512VL-NEXT: retq
1571 ; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1572 ; AVX512DQ: # %bb.0:
1573 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1574 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1575 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1576 ; AVX512DQ-NEXT: retq
1578 ; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1579 ; AVX512VLDQ: # %bb.0:
1580 ; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
1581 ; AVX512VLDQ-NEXT: retq
1582 %cvt = fptoui <8 x float> %a to <8 x i32>
1586 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1587 ; SSE-LABEL: fptoui_4f32_to_4i64:
1589 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1590 ; SSE-NEXT: movaps %xmm0, %xmm2
1591 ; SSE-NEXT: subss %xmm1, %xmm2
1592 ; SSE-NEXT: cvttss2si %xmm2, %rcx
1593 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1594 ; SSE-NEXT: xorq %rax, %rcx
1595 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1596 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1597 ; SSE-NEXT: cmovaeq %rcx, %rdx
1598 ; SSE-NEXT: movq %rdx, %xmm2
1599 ; SSE-NEXT: movaps %xmm0, %xmm3
1600 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
1601 ; SSE-NEXT: movaps %xmm3, %xmm4
1602 ; SSE-NEXT: subss %xmm1, %xmm4
1603 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1604 ; SSE-NEXT: xorq %rax, %rcx
1605 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1606 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1607 ; SSE-NEXT: cmovaeq %rcx, %rdx
1608 ; SSE-NEXT: movq %rdx, %xmm3
1609 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1610 ; SSE-NEXT: movaps %xmm0, %xmm3
1611 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
1612 ; SSE-NEXT: movaps %xmm3, %xmm4
1613 ; SSE-NEXT: subss %xmm1, %xmm4
1614 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1615 ; SSE-NEXT: xorq %rax, %rcx
1616 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1617 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1618 ; SSE-NEXT: cmovaeq %rcx, %rdx
1619 ; SSE-NEXT: movq %rdx, %xmm3
1620 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1621 ; SSE-NEXT: movaps %xmm0, %xmm4
1622 ; SSE-NEXT: subss %xmm1, %xmm4
1623 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1624 ; SSE-NEXT: xorq %rax, %rcx
1625 ; SSE-NEXT: cvttss2si %xmm0, %rax
1626 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1627 ; SSE-NEXT: cmovaeq %rcx, %rax
1628 ; SSE-NEXT: movq %rax, %xmm1
1629 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1630 ; SSE-NEXT: movdqa %xmm2, %xmm0
1633 ; AVX1-LABEL: fptoui_4f32_to_4i64:
1635 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1636 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1637 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1638 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1639 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1640 ; AVX1-NEXT: xorq %rcx, %rax
1641 ; AVX1-NEXT: vcvttss2si %xmm2, %rdx
1642 ; AVX1-NEXT: vucomiss %xmm1, %xmm2
1643 ; AVX1-NEXT: cmovaeq %rax, %rdx
1644 ; AVX1-NEXT: vmovq %rdx, %xmm2
1645 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1646 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1647 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1648 ; AVX1-NEXT: xorq %rcx, %rax
1649 ; AVX1-NEXT: vcvttss2si %xmm3, %rdx
1650 ; AVX1-NEXT: vucomiss %xmm1, %xmm3
1651 ; AVX1-NEXT: cmovaeq %rax, %rdx
1652 ; AVX1-NEXT: vmovq %rdx, %xmm3
1653 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1654 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1655 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1656 ; AVX1-NEXT: xorq %rcx, %rax
1657 ; AVX1-NEXT: vcvttss2si %xmm0, %rdx
1658 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1659 ; AVX1-NEXT: cmovaeq %rax, %rdx
1660 ; AVX1-NEXT: vmovq %rdx, %xmm3
1661 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1662 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
1663 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1664 ; AVX1-NEXT: xorq %rcx, %rax
1665 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1666 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1667 ; AVX1-NEXT: cmovaeq %rax, %rcx
1668 ; AVX1-NEXT: vmovq %rcx, %xmm0
1669 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1670 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1673 ; AVX2-LABEL: fptoui_4f32_to_4i64:
1675 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1676 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1677 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1678 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1679 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1680 ; AVX2-NEXT: xorq %rcx, %rax
1681 ; AVX2-NEXT: vcvttss2si %xmm2, %rdx
1682 ; AVX2-NEXT: vucomiss %xmm1, %xmm2
1683 ; AVX2-NEXT: cmovaeq %rax, %rdx
1684 ; AVX2-NEXT: vmovq %rdx, %xmm2
1685 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1686 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1687 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1688 ; AVX2-NEXT: xorq %rcx, %rax
1689 ; AVX2-NEXT: vcvttss2si %xmm3, %rdx
1690 ; AVX2-NEXT: vucomiss %xmm1, %xmm3
1691 ; AVX2-NEXT: cmovaeq %rax, %rdx
1692 ; AVX2-NEXT: vmovq %rdx, %xmm3
1693 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1694 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1695 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1696 ; AVX2-NEXT: xorq %rcx, %rax
1697 ; AVX2-NEXT: vcvttss2si %xmm0, %rdx
1698 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1699 ; AVX2-NEXT: cmovaeq %rax, %rdx
1700 ; AVX2-NEXT: vmovq %rdx, %xmm3
1701 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1702 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
1703 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1704 ; AVX2-NEXT: xorq %rcx, %rax
1705 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1706 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1707 ; AVX2-NEXT: cmovaeq %rax, %rcx
1708 ; AVX2-NEXT: vmovq %rcx, %xmm0
1709 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1710 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1713 ; AVX512F-LABEL: fptoui_4f32_to_4i64:
1715 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1716 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1717 ; AVX512F-NEXT: vmovq %rax, %xmm1
1718 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1719 ; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
1720 ; AVX512F-NEXT: vmovq %rax, %xmm2
1721 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1722 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1723 ; AVX512F-NEXT: vmovq %rax, %xmm2
1724 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1725 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
1726 ; AVX512F-NEXT: vmovq %rax, %xmm0
1727 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1728 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1729 ; AVX512F-NEXT: retq
1731 ; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1732 ; AVX512VL: # %bb.0:
1733 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
1734 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1735 ; AVX512VL-NEXT: vmovq %rax, %xmm1
1736 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1737 ; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
1738 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1739 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1740 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1741 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1742 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1743 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
1744 ; AVX512VL-NEXT: vmovq %rax, %xmm0
1745 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1746 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1747 ; AVX512VL-NEXT: retq
1749 ; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1750 ; AVX512DQ: # %bb.0:
1751 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1752 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1753 ; AVX512DQ-NEXT: retq
1755 ; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1756 ; AVX512VLDQ: # %bb.0:
1757 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
1758 ; AVX512VLDQ-NEXT: retq
1759 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1760 %cvt = fptoui <4 x float> %shuf to <4 x i64>
1764 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1765 ; SSE-LABEL: fptoui_8f32_to_4i64:
1767 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1768 ; SSE-NEXT: movaps %xmm0, %xmm2
1769 ; SSE-NEXT: subss %xmm1, %xmm2
1770 ; SSE-NEXT: cvttss2si %xmm2, %rcx
1771 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1772 ; SSE-NEXT: xorq %rax, %rcx
1773 ; SSE-NEXT: cvttss2si %xmm0, %rdx
1774 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1775 ; SSE-NEXT: cmovaeq %rcx, %rdx
1776 ; SSE-NEXT: movq %rdx, %xmm2
1777 ; SSE-NEXT: movaps %xmm0, %xmm3
1778 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
1779 ; SSE-NEXT: movaps %xmm3, %xmm4
1780 ; SSE-NEXT: subss %xmm1, %xmm4
1781 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1782 ; SSE-NEXT: xorq %rax, %rcx
1783 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1784 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1785 ; SSE-NEXT: cmovaeq %rcx, %rdx
1786 ; SSE-NEXT: movq %rdx, %xmm3
1787 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1788 ; SSE-NEXT: movaps %xmm0, %xmm3
1789 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
1790 ; SSE-NEXT: movaps %xmm3, %xmm4
1791 ; SSE-NEXT: subss %xmm1, %xmm4
1792 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1793 ; SSE-NEXT: xorq %rax, %rcx
1794 ; SSE-NEXT: cvttss2si %xmm3, %rdx
1795 ; SSE-NEXT: ucomiss %xmm1, %xmm3
1796 ; SSE-NEXT: cmovaeq %rcx, %rdx
1797 ; SSE-NEXT: movq %rdx, %xmm3
1798 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1799 ; SSE-NEXT: movaps %xmm0, %xmm4
1800 ; SSE-NEXT: subss %xmm1, %xmm4
1801 ; SSE-NEXT: cvttss2si %xmm4, %rcx
1802 ; SSE-NEXT: xorq %rax, %rcx
1803 ; SSE-NEXT: cvttss2si %xmm0, %rax
1804 ; SSE-NEXT: ucomiss %xmm1, %xmm0
1805 ; SSE-NEXT: cmovaeq %rcx, %rax
1806 ; SSE-NEXT: movq %rax, %xmm1
1807 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1808 ; SSE-NEXT: movdqa %xmm2, %xmm0
1811 ; AVX1-LABEL: fptoui_8f32_to_4i64:
1813 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1814 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1815 ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
1816 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1817 ; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1818 ; AVX1-NEXT: xorq %rcx, %rax
1819 ; AVX1-NEXT: vcvttss2si %xmm2, %rdx
1820 ; AVX1-NEXT: vucomiss %xmm1, %xmm2
1821 ; AVX1-NEXT: cmovaeq %rax, %rdx
1822 ; AVX1-NEXT: vmovq %rdx, %xmm2
1823 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1824 ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
1825 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1826 ; AVX1-NEXT: xorq %rcx, %rax
1827 ; AVX1-NEXT: vcvttss2si %xmm3, %rdx
1828 ; AVX1-NEXT: vucomiss %xmm1, %xmm3
1829 ; AVX1-NEXT: cmovaeq %rax, %rdx
1830 ; AVX1-NEXT: vmovq %rdx, %xmm3
1831 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1832 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
1833 ; AVX1-NEXT: vcvttss2si %xmm3, %rax
1834 ; AVX1-NEXT: xorq %rcx, %rax
1835 ; AVX1-NEXT: vcvttss2si %xmm0, %rdx
1836 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1837 ; AVX1-NEXT: cmovaeq %rax, %rdx
1838 ; AVX1-NEXT: vmovq %rdx, %xmm3
1839 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1840 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
1841 ; AVX1-NEXT: vcvttss2si %xmm4, %rax
1842 ; AVX1-NEXT: xorq %rcx, %rax
1843 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx
1844 ; AVX1-NEXT: vucomiss %xmm1, %xmm0
1845 ; AVX1-NEXT: cmovaeq %rax, %rcx
1846 ; AVX1-NEXT: vmovq %rcx, %xmm0
1847 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1848 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1851 ; AVX2-LABEL: fptoui_8f32_to_4i64:
1853 ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1854 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1855 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
1856 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1857 ; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1858 ; AVX2-NEXT: xorq %rcx, %rax
1859 ; AVX2-NEXT: vcvttss2si %xmm2, %rdx
1860 ; AVX2-NEXT: vucomiss %xmm1, %xmm2
1861 ; AVX2-NEXT: cmovaeq %rax, %rdx
1862 ; AVX2-NEXT: vmovq %rdx, %xmm2
1863 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1864 ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
1865 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1866 ; AVX2-NEXT: xorq %rcx, %rax
1867 ; AVX2-NEXT: vcvttss2si %xmm3, %rdx
1868 ; AVX2-NEXT: vucomiss %xmm1, %xmm3
1869 ; AVX2-NEXT: cmovaeq %rax, %rdx
1870 ; AVX2-NEXT: vmovq %rdx, %xmm3
1871 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1872 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
1873 ; AVX2-NEXT: vcvttss2si %xmm3, %rax
1874 ; AVX2-NEXT: xorq %rcx, %rax
1875 ; AVX2-NEXT: vcvttss2si %xmm0, %rdx
1876 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1877 ; AVX2-NEXT: cmovaeq %rax, %rdx
1878 ; AVX2-NEXT: vmovq %rdx, %xmm3
1879 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1880 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
1881 ; AVX2-NEXT: vcvttss2si %xmm4, %rax
1882 ; AVX2-NEXT: xorq %rcx, %rax
1883 ; AVX2-NEXT: vcvttss2si %xmm0, %rcx
1884 ; AVX2-NEXT: vucomiss %xmm1, %xmm0
1885 ; AVX2-NEXT: cmovaeq %rax, %rcx
1886 ; AVX2-NEXT: vmovq %rcx, %xmm0
1887 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1888 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1891 ; AVX512F-LABEL: fptoui_8f32_to_4i64:
1893 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1894 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
1895 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
1896 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1897 ; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
1898 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1899 ; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
1900 ; AVX512F-NEXT: vmovq %rsi, %xmm0
1901 ; AVX512F-NEXT: vmovq %rdx, %xmm1
1902 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1903 ; AVX512F-NEXT: vmovq %rcx, %xmm1
1904 ; AVX512F-NEXT: vmovq %rax, %xmm2
1905 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1906 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1907 ; AVX512F-NEXT: retq
1909 ; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1910 ; AVX512VL: # %bb.0:
1911 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1912 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
1913 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
1914 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1915 ; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
1916 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
1917 ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
1918 ; AVX512VL-NEXT: vmovq %rsi, %xmm0
1919 ; AVX512VL-NEXT: vmovq %rdx, %xmm1
1920 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1921 ; AVX512VL-NEXT: vmovq %rcx, %xmm1
1922 ; AVX512VL-NEXT: vmovq %rax, %xmm2
1923 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1924 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1925 ; AVX512VL-NEXT: retq
1927 ; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1928 ; AVX512DQ: # %bb.0:
1929 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1930 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1931 ; AVX512DQ-NEXT: retq
1933 ; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1934 ; AVX512VLDQ: # %bb.0:
1935 ; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1936 ; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1937 ; AVX512VLDQ-NEXT: retq
1938 %cvt = fptoui <8 x float> %a to <8 x i64>
1939 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1947 define <2 x i64> @fptosi_2f64_to_2i64_const() {
1948 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
1950 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1953 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
1955 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1957 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1961 define <4 x i32> @fptosi_2f64_to_2i32_const() {
1962 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
1964 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1967 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
1969 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1971 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1972 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1976 define <4 x i64> @fptosi_4f64_to_4i64_const() {
1977 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
1979 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1980 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1983 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
1985 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1987 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1991 define <4 x i32> @fptosi_4f64_to_4i32_const() {
1992 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
1994 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1997 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
1999 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
2001 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
2005 define <2 x i64> @fptoui_2f64_to_2i64_const() {
2006 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
2008 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
2011 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
2013 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
2015 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
2019 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
2020 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
2022 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
2025 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
2027 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
2029 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
2030 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2034 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
2035 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
2037 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
2038 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
2041 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
2043 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
2045 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
2049 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
2050 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
2052 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
2055 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
2057 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2059 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2063 define <4 x i32> @fptosi_4f32_to_4i32_const() {
2064 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
2066 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2069 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
2071 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2073 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2077 define <4 x i64> @fptosi_4f32_to_4i64_const() {
2078 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
2080 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2081 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
2084 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
2086 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2088 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2092 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2093 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
2095 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2096 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2099 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
2101 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2103 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2107 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2108 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
2110 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2113 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
2115 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2117 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2121 define <4 x i64> @fptoui_4f32_to_4i64_const() {
2122 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
2124 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
2125 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
2128 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
2130 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2132 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2136 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2137 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
2139 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
2140 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
2143 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
2145 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2147 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2155 define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
2156 ; SSE-LABEL: fptosi_2f16_to_4i32:
2158 ; SSE-NEXT: pushq %rax
2159 ; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2160 ; SSE-NEXT: movaps %xmm1, %xmm0
2161 ; SSE-NEXT: callq __gnu_f2h_ieee
2162 ; SSE-NEXT: movzwl %ax, %edi
2163 ; SSE-NEXT: callq __gnu_h2f_ieee
2164 ; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
2165 ; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
2166 ; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2167 ; SSE-NEXT: callq __gnu_f2h_ieee
2168 ; SSE-NEXT: movzwl %ax, %edi
2169 ; SSE-NEXT: callq __gnu_h2f_ieee
2170 ; SSE-NEXT: cvttss2si %xmm0, %eax
2171 ; SSE-NEXT: cvttss2si (%rsp), %ecx # 4-byte Folded Reload
2172 ; SSE-NEXT: movd %ecx, %xmm0
2173 ; SSE-NEXT: movd %eax, %xmm1
2174 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2175 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2176 ; SSE-NEXT: popq %rax
2179 ; VEX-LABEL: fptosi_2f16_to_4i32:
2181 ; VEX-NEXT: pushq %rax
2182 ; VEX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2183 ; VEX-NEXT: vmovaps %xmm1, %xmm0
2184 ; VEX-NEXT: callq __gnu_f2h_ieee
2185 ; VEX-NEXT: movzwl %ax, %edi
2186 ; VEX-NEXT: callq __gnu_h2f_ieee
2187 ; VEX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
2188 ; VEX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
2189 ; VEX-NEXT: # xmm0 = mem[0],zero,zero,zero
2190 ; VEX-NEXT: callq __gnu_f2h_ieee
2191 ; VEX-NEXT: movzwl %ax, %edi
2192 ; VEX-NEXT: callq __gnu_h2f_ieee
2193 ; VEX-NEXT: vcvttss2si %xmm0, %eax
2194 ; VEX-NEXT: vcvttss2si (%rsp), %ecx # 4-byte Folded Reload
2195 ; VEX-NEXT: vmovd %ecx, %xmm0
2196 ; VEX-NEXT: vmovd %eax, %xmm1
2197 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2198 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2199 ; VEX-NEXT: popq %rax
2202 ; AVX512-LABEL: fptosi_2f16_to_4i32:
2204 ; AVX512-NEXT: vcvtps2ph $4, %xmm1, %xmm1
2205 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
2206 ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
2207 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
2208 ; AVX512-NEXT: vcvttss2si %xmm0, %eax
2209 ; AVX512-NEXT: vcvttss2si %xmm1, %ecx
2210 ; AVX512-NEXT: vmovd %ecx, %xmm0
2211 ; AVX512-NEXT: vmovd %eax, %xmm1
2212 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2213 ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2215 %cvt = fptosi <2 x half> %a to <2 x i32>
2216 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2220 define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2221 ; SSE-LABEL: fptosi_2f80_to_4i32:
2223 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2224 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
2225 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2226 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2227 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2228 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2229 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2230 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2231 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2232 ; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
2233 ; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2234 ; SSE-NEXT: orl $3072, %eax # imm = 0xC00
2235 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2236 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2237 ; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp)
2238 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2239 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2240 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2241 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2242 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2245 ; AVX-LABEL: fptosi_2f80_to_4i32:
2247 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2248 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
2249 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2250 ; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp)
2251 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2252 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2253 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2254 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2256 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2257 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2261 define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2262 ; SSE-LABEL: fptosi_2f128_to_4i32:
2264 ; SSE-NEXT: pushq %rbp
2265 ; SSE-NEXT: pushq %r14
2266 ; SSE-NEXT: pushq %rbx
2267 ; SSE-NEXT: movq %rcx, %r14
2268 ; SSE-NEXT: movq %rdx, %rbx
2269 ; SSE-NEXT: callq __fixtfsi
2270 ; SSE-NEXT: movl %eax, %ebp
2271 ; SSE-NEXT: movq %rbx, %rdi
2272 ; SSE-NEXT: movq %r14, %rsi
2273 ; SSE-NEXT: callq __fixtfsi
2274 ; SSE-NEXT: movd %eax, %xmm0
2275 ; SSE-NEXT: movd %ebp, %xmm1
2276 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2277 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2278 ; SSE-NEXT: popq %rbx
2279 ; SSE-NEXT: popq %r14
2280 ; SSE-NEXT: popq %rbp
2283 ; AVX-LABEL: fptosi_2f128_to_4i32:
2285 ; AVX-NEXT: pushq %rbp
2286 ; AVX-NEXT: pushq %r14
2287 ; AVX-NEXT: pushq %rbx
2288 ; AVX-NEXT: movq %rcx, %r14
2289 ; AVX-NEXT: movq %rdx, %rbx
2290 ; AVX-NEXT: callq __fixtfsi
2291 ; AVX-NEXT: movl %eax, %ebp
2292 ; AVX-NEXT: movq %rbx, %rdi
2293 ; AVX-NEXT: movq %r14, %rsi
2294 ; AVX-NEXT: callq __fixtfsi
2295 ; AVX-NEXT: vmovd %eax, %xmm0
2296 ; AVX-NEXT: vmovd %ebp, %xmm1
2297 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2298 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2299 ; AVX-NEXT: popq %rbx
2300 ; AVX-NEXT: popq %r14
2301 ; AVX-NEXT: popq %rbp
2303 %cvt = fptosi <2 x fp128> %a to <2 x i32>
2304 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2308 define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2309 ; SSE-LABEL: fptosi_2f32_to_2i8:
2311 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2312 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
2313 ; SSE-NEXT: packuswb %xmm0, %xmm0
2314 ; SSE-NEXT: packuswb %xmm0, %xmm0
2317 ; AVX-LABEL: fptosi_2f32_to_2i8:
2319 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2320 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2322 %cvt = fptosi <2 x float> %a to <2 x i8>
2326 define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2327 ; SSE-LABEL: fptosi_2f32_to_2i16:
2329 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2330 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2333 ; AVX-LABEL: fptosi_2f32_to_2i16:
2335 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2336 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2338 %cvt = fptosi <2 x float> %a to <2 x i16>
2342 define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2343 ; SSE-LABEL: fptoui_2f32_to_2i8:
2345 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2346 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
2347 ; SSE-NEXT: packuswb %xmm0, %xmm0
2348 ; SSE-NEXT: packuswb %xmm0, %xmm0
2351 ; AVX-LABEL: fptoui_2f32_to_2i8:
2353 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2354 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2356 %cvt = fptoui <2 x float> %a to <2 x i8>
2360 define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2361 ; SSE-LABEL: fptoui_2f32_to_2i16:
2363 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2364 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2367 ; AVX-LABEL: fptoui_2f32_to_2i16:
2369 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2370 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2372 %cvt = fptoui <2 x float> %a to <2 x i16>
2376 define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2377 ; SSE-LABEL: fptosi_2f64_to_2i8:
2379 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2380 ; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
2381 ; SSE-NEXT: packuswb %xmm0, %xmm0
2382 ; SSE-NEXT: packuswb %xmm0, %xmm0
2385 ; AVX-LABEL: fptosi_2f64_to_2i8:
2387 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2388 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2390 %cvt = fptosi <2 x double> %a to <2 x i8>
2394 define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2395 ; SSE-LABEL: fptosi_2f64_to_2i16:
2397 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2398 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2401 ; AVX-LABEL: fptosi_2f64_to_2i16:
2403 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2404 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2406 %cvt = fptosi <2 x double> %a to <2 x i16>
2410 define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2411 ; SSE-LABEL: fptoui_2f64_to_2i8:
2413 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2414 ; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
2415 ; SSE-NEXT: packuswb %xmm0, %xmm0
2416 ; SSE-NEXT: packuswb %xmm0, %xmm0
2419 ; AVX-LABEL: fptoui_2f64_to_2i8:
2421 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2422 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2424 %cvt = fptoui <2 x double> %a to <2 x i8>
2428 define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2429 ; SSE-LABEL: fptoui_2f64_to_2i16:
2431 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2432 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2435 ; AVX-LABEL: fptoui_2f64_to_2i16:
2437 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2438 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2440 %cvt = fptoui <2 x double> %a to <2 x i16>
2444 define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2445 ; SSE-LABEL: fptosi_8f64_to_8i16:
2447 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2448 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2449 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2450 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2451 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2452 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2453 ; SSE-NEXT: packssdw %xmm2, %xmm0
2456 ; VEX-LABEL: fptosi_8f64_to_8i16:
2458 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2459 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2460 ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2461 ; VEX-NEXT: vzeroupper
2464 ; AVX512F-LABEL: fptosi_8f64_to_8i16:
2466 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2467 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2468 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2469 ; AVX512F-NEXT: vzeroupper
2470 ; AVX512F-NEXT: retq
2472 ; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2473 ; AVX512VL: # %bb.0:
2474 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2475 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2476 ; AVX512VL-NEXT: vzeroupper
2477 ; AVX512VL-NEXT: retq
2479 ; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2480 ; AVX512DQ: # %bb.0:
2481 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2482 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2483 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2484 ; AVX512DQ-NEXT: vzeroupper
2485 ; AVX512DQ-NEXT: retq
2487 ; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2488 ; AVX512VLDQ: # %bb.0:
2489 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2490 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2491 ; AVX512VLDQ-NEXT: vzeroupper
2492 ; AVX512VLDQ-NEXT: retq
2493 %cvt = fptosi <8 x double> %a to <8 x i16>
2497 define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2498 ; SSE-LABEL: fptoui_8f64_to_8i16:
2500 ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2501 ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2502 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2503 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2504 ; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2505 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2506 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2507 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2508 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2509 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2510 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2511 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2512 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2515 ; VEX-LABEL: fptoui_8f64_to_8i16:
2517 ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2518 ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2519 ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2520 ; VEX-NEXT: vzeroupper
2523 ; AVX512F-LABEL: fptoui_8f64_to_8i16:
2525 ; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
2526 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
2527 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2528 ; AVX512F-NEXT: vzeroupper
2529 ; AVX512F-NEXT: retq
2531 ; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2532 ; AVX512VL: # %bb.0:
2533 ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
2534 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2535 ; AVX512VL-NEXT: vzeroupper
2536 ; AVX512VL-NEXT: retq
2538 ; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2539 ; AVX512DQ: # %bb.0:
2540 ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2541 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2542 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2543 ; AVX512DQ-NEXT: vzeroupper
2544 ; AVX512DQ-NEXT: retq
2546 ; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2547 ; AVX512VLDQ: # %bb.0:
2548 ; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
2549 ; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
2550 ; AVX512VLDQ-NEXT: vzeroupper
2551 ; AVX512VLDQ-NEXT: retq
2552 %cvt = fptoui <8 x double> %a to <8 x i16>
2556 define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2557 ; SSE-LABEL: fptosi_16f32_to_16i8:
2559 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2560 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2561 ; SSE-NEXT: packssdw %xmm3, %xmm2
2562 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2563 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2564 ; SSE-NEXT: packssdw %xmm1, %xmm0
2565 ; SSE-NEXT: packsswb %xmm2, %xmm0
2568 ; AVX1-LABEL: fptosi_16f32_to_16i8:
2570 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2571 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2572 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2573 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2574 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2575 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2576 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2577 ; AVX1-NEXT: vzeroupper
2580 ; AVX2-LABEL: fptosi_16f32_to_16i8:
2582 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2583 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2584 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2585 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2586 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2587 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2588 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2589 ; AVX2-NEXT: vzeroupper
2592 ; AVX512-LABEL: fptosi_16f32_to_16i8:
2594 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2595 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2596 ; AVX512-NEXT: vzeroupper
2598 %cvt = fptosi <16 x float> %a to <16 x i8>
2602 define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2603 ; SSE-LABEL: fptoui_16f32_to_16i8:
2605 ; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2606 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2607 ; SSE-NEXT: packssdw %xmm3, %xmm2
2608 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2609 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2610 ; SSE-NEXT: packssdw %xmm1, %xmm0
2611 ; SSE-NEXT: packuswb %xmm2, %xmm0
2614 ; AVX1-LABEL: fptoui_16f32_to_16i8:
2616 ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2617 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2618 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2619 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2620 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2621 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2622 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2623 ; AVX1-NEXT: vzeroupper
2626 ; AVX2-LABEL: fptoui_16f32_to_16i8:
2628 ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
2629 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2630 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2631 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2632 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2633 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2634 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2635 ; AVX2-NEXT: vzeroupper
2638 ; AVX512-LABEL: fptoui_16f32_to_16i8:
2640 ; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
2641 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2642 ; AVX512-NEXT: vzeroupper
2644 %cvt = fptoui <16 x float> %a to <16 x i8>