1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
8 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
11 ; 32-bit tests to make sure we're not doing anything stupid.
12 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown
13 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse
14 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2
15 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1
18 ; Signed Integer to Double
21 define <2 x float> @sitofp_2i32_to_2f32(<2 x i32> %a) {
22 ; SSE-LABEL: sitofp_2i32_to_2f32:
24 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
27 ; AVX-LABEL: sitofp_2i32_to_2f32:
29 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
31 %cvt = sitofp <2 x i32> %a to <2 x float>
35 define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) {
36 ; SSE2-LABEL: uitofp_2i32_to_2f32:
38 ; SSE2-NEXT: xorpd %xmm1, %xmm1
39 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
40 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
41 ; SSE2-NEXT: orpd %xmm1, %xmm0
42 ; SSE2-NEXT: subpd %xmm1, %xmm0
43 ; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0
46 ; SSE41-LABEL: uitofp_2i32_to_2f32:
48 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
49 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
50 ; SSE41-NEXT: por %xmm1, %xmm0
51 ; SSE41-NEXT: subpd %xmm1, %xmm0
52 ; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0
55 ; AVX1-LABEL: uitofp_2i32_to_2f32:
57 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
58 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
59 ; AVX1-NEXT: # xmm1 = mem[0,0]
60 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
61 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
62 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
65 ; AVX2-LABEL: uitofp_2i32_to_2f32:
67 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
68 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
69 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
70 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
71 ; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0
74 ; AVX512F-LABEL: uitofp_2i32_to_2f32:
76 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
77 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
78 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
79 ; AVX512F-NEXT: vzeroupper
82 ; AVX512VL-LABEL: uitofp_2i32_to_2f32:
84 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
87 ; AVX512DQ-LABEL: uitofp_2i32_to_2f32:
89 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
90 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
91 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
92 ; AVX512DQ-NEXT: vzeroupper
95 ; AVX512VLDQ-LABEL: uitofp_2i32_to_2f32:
96 ; AVX512VLDQ: # %bb.0:
97 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
98 ; AVX512VLDQ-NEXT: retq
99 %cvt = uitofp <2 x i32> %a to <2 x float>
103 define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
104 ; SSE2-LABEL: sitofp_2i64_to_2f64:
106 ; SSE2-NEXT: movq %xmm0, %rax
107 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1
108 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
109 ; SSE2-NEXT: movq %xmm0, %rax
110 ; SSE2-NEXT: xorps %xmm0, %xmm0
111 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
112 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
113 ; SSE2-NEXT: movapd %xmm1, %xmm0
116 ; SSE41-LABEL: sitofp_2i64_to_2f64:
118 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
119 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1
120 ; SSE41-NEXT: movq %xmm0, %rax
121 ; SSE41-NEXT: xorps %xmm0, %xmm0
122 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0
123 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
126 ; VEX-LABEL: sitofp_2i64_to_2f64:
128 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
129 ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
130 ; VEX-NEXT: vmovq %xmm0, %rax
131 ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
132 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
135 ; AVX512F-LABEL: sitofp_2i64_to_2f64:
137 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
138 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
139 ; AVX512F-NEXT: vmovq %xmm0, %rax
140 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
141 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
144 ; AVX512VL-LABEL: sitofp_2i64_to_2f64:
146 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
147 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
148 ; AVX512VL-NEXT: vmovq %xmm0, %rax
149 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
150 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
151 ; AVX512VL-NEXT: retq
153 ; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
155 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
156 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
157 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
158 ; AVX512DQ-NEXT: vzeroupper
159 ; AVX512DQ-NEXT: retq
161 ; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64:
162 ; AVX512VLDQ: # %bb.0:
163 ; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
164 ; AVX512VLDQ-NEXT: retq
165 %cvt = sitofp <2 x i64> %a to <2 x double>
166 ret <2 x double> %cvt
169 define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
170 ; SSE-LABEL: sitofp_2i32_to_2f64:
172 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
175 ; AVX-LABEL: sitofp_2i32_to_2f64:
177 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
179 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
180 %cvt = sitofp <2 x i32> %shuf to <2 x double>
181 ret <2 x double> %cvt
184 define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
185 ; SSE-LABEL: sitofp_4i32_to_2f64:
187 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
190 ; AVX-LABEL: sitofp_4i32_to_2f64:
192 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
194 %cvt = sitofp <4 x i32> %a to <4 x double>
195 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
196 ret <2 x double> %shuf
199 define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
200 ; SSE2-LABEL: sitofp_2i16_to_2f64:
202 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
203 ; SSE2-NEXT: psrad $16, %xmm0
204 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
207 ; SSE41-LABEL: sitofp_2i16_to_2f64:
209 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
210 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
213 ; AVX-LABEL: sitofp_2i16_to_2f64:
215 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
216 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
218 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
219 %cvt = sitofp <2 x i16> %shuf to <2 x double>
220 ret <2 x double> %cvt
223 define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
224 ; SSE2-LABEL: sitofp_8i16_to_2f64:
226 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
227 ; SSE2-NEXT: psrad $16, %xmm0
228 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
231 ; SSE41-LABEL: sitofp_8i16_to_2f64:
233 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
234 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
237 ; VEX-LABEL: sitofp_8i16_to_2f64:
239 ; VEX-NEXT: vpmovsxwd %xmm0, %xmm0
240 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
243 ; AVX512-LABEL: sitofp_8i16_to_2f64:
245 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
246 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
247 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
248 ; AVX512-NEXT: vzeroupper
250 %cvt = sitofp <8 x i16> %a to <8 x double>
251 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
252 ret <2 x double> %shuf
255 define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
256 ; SSE2-LABEL: sitofp_2i8_to_2f64:
258 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
259 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
260 ; SSE2-NEXT: psrad $24, %xmm0
261 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
264 ; SSE41-LABEL: sitofp_2i8_to_2f64:
266 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
267 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
270 ; AVX-LABEL: sitofp_2i8_to_2f64:
272 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
273 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
275 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
276 %cvt = sitofp <2 x i8> %shuf to <2 x double>
277 ret <2 x double> %cvt
280 define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
281 ; SSE2-LABEL: sitofp_16i8_to_2f64:
283 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
284 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
285 ; SSE2-NEXT: psrad $24, %xmm0
286 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
289 ; SSE41-LABEL: sitofp_16i8_to_2f64:
291 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
292 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
295 ; VEX-LABEL: sitofp_16i8_to_2f64:
297 ; VEX-NEXT: vpmovsxbd %xmm0, %xmm0
298 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
301 ; AVX512-LABEL: sitofp_16i8_to_2f64:
303 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
304 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
305 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
306 ; AVX512-NEXT: vzeroupper
308 %cvt = sitofp <16 x i8> %a to <16 x double>
309 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
310 ret <2 x double> %shuf
313 define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
314 ; SSE2-LABEL: sitofp_4i64_to_4f64:
316 ; SSE2-NEXT: movq %xmm0, %rax
317 ; SSE2-NEXT: cvtsi2sd %rax, %xmm2
318 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
319 ; SSE2-NEXT: movq %xmm0, %rax
320 ; SSE2-NEXT: xorps %xmm0, %xmm0
321 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
322 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
323 ; SSE2-NEXT: movq %xmm1, %rax
324 ; SSE2-NEXT: cvtsi2sd %rax, %xmm3
325 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
326 ; SSE2-NEXT: movq %xmm0, %rax
327 ; SSE2-NEXT: xorps %xmm0, %xmm0
328 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
329 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
330 ; SSE2-NEXT: movapd %xmm2, %xmm0
331 ; SSE2-NEXT: movapd %xmm3, %xmm1
334 ; SSE41-LABEL: sitofp_4i64_to_4f64:
336 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
337 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2
338 ; SSE41-NEXT: movq %xmm0, %rax
339 ; SSE41-NEXT: xorps %xmm0, %xmm0
340 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0
341 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
342 ; SSE41-NEXT: pextrq $1, %xmm1, %rax
343 ; SSE41-NEXT: xorps %xmm2, %xmm2
344 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2
345 ; SSE41-NEXT: movq %xmm1, %rax
346 ; SSE41-NEXT: xorps %xmm1, %xmm1
347 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1
348 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
351 ; AVX1-LABEL: sitofp_4i64_to_4f64:
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
354 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
355 ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
356 ; AVX1-NEXT: vmovq %xmm1, %rax
357 ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
358 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
359 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
360 ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
361 ; AVX1-NEXT: vmovq %xmm0, %rax
362 ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
363 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
364 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
367 ; AVX2-LABEL: sitofp_4i64_to_4f64:
369 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
370 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
371 ; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
372 ; AVX2-NEXT: vmovq %xmm1, %rax
373 ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
374 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
375 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
376 ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
377 ; AVX2-NEXT: vmovq %xmm0, %rax
378 ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
379 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
380 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
383 ; AVX512F-LABEL: sitofp_4i64_to_4f64:
385 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
386 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
387 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
388 ; AVX512F-NEXT: vmovq %xmm1, %rax
389 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
390 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
391 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
392 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
393 ; AVX512F-NEXT: vmovq %xmm0, %rax
394 ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
395 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
396 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
399 ; AVX512VL-LABEL: sitofp_4i64_to_4f64:
401 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
402 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
403 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
404 ; AVX512VL-NEXT: vmovq %xmm1, %rax
405 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
406 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
407 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
408 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
409 ; AVX512VL-NEXT: vmovq %xmm0, %rax
410 ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
411 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
412 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
413 ; AVX512VL-NEXT: retq
415 ; AVX512DQ-LABEL: sitofp_4i64_to_4f64:
417 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
418 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
419 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
420 ; AVX512DQ-NEXT: retq
422 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64:
423 ; AVX512VLDQ: # %bb.0:
424 ; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
425 ; AVX512VLDQ-NEXT: retq
426 %cvt = sitofp <4 x i64> %a to <4 x double>
427 ret <4 x double> %cvt
430 define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
431 ; SSE-LABEL: sitofp_4i32_to_4f64:
433 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
434 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
435 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
436 ; SSE-NEXT: movaps %xmm2, %xmm0
439 ; AVX-LABEL: sitofp_4i32_to_4f64:
441 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
443 %cvt = sitofp <4 x i32> %a to <4 x double>
444 ret <4 x double> %cvt
447 define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
448 ; SSE2-LABEL: sitofp_4i16_to_4f64:
450 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
451 ; SSE2-NEXT: psrad $16, %xmm1
452 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
453 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
454 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
457 ; SSE41-LABEL: sitofp_4i16_to_4f64:
459 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
460 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
461 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
462 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
465 ; AVX-LABEL: sitofp_4i16_to_4f64:
467 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
468 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
470 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
471 %cvt = sitofp <4 x i16> %shuf to <4 x double>
472 ret <4 x double> %cvt
475 define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
476 ; SSE2-LABEL: sitofp_8i16_to_4f64:
478 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
479 ; SSE2-NEXT: psrad $16, %xmm1
480 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
481 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
482 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
485 ; SSE41-LABEL: sitofp_8i16_to_4f64:
487 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
488 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
489 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
490 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
493 ; VEX-LABEL: sitofp_8i16_to_4f64:
495 ; VEX-NEXT: vpmovsxwd %xmm0, %xmm0
496 ; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
499 ; AVX512-LABEL: sitofp_8i16_to_4f64:
501 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
502 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
503 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
505 %cvt = sitofp <8 x i16> %a to <8 x double>
506 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
507 ret <4 x double> %shuf
510 define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
511 ; SSE2-LABEL: sitofp_4i8_to_4f64:
513 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
514 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
515 ; SSE2-NEXT: psrad $24, %xmm1
516 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
517 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
518 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
521 ; SSE41-LABEL: sitofp_4i8_to_4f64:
523 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
524 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
525 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
526 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
529 ; AVX-LABEL: sitofp_4i8_to_4f64:
531 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
532 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
534 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
535 %cvt = sitofp <4 x i8> %shuf to <4 x double>
536 ret <4 x double> %cvt
539 define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
540 ; SSE2-LABEL: sitofp_16i8_to_4f64:
542 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
543 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
544 ; SSE2-NEXT: psrad $24, %xmm1
545 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
546 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
547 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
550 ; SSE41-LABEL: sitofp_16i8_to_4f64:
552 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
553 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
554 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
555 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
558 ; VEX-LABEL: sitofp_16i8_to_4f64:
560 ; VEX-NEXT: vpmovsxbd %xmm0, %xmm0
561 ; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
564 ; AVX512-LABEL: sitofp_16i8_to_4f64:
566 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
567 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
568 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
570 %cvt = sitofp <16 x i8> %a to <16 x double>
571 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
572 ret <4 x double> %shuf
576 ; Unsigned Integer to Double
579 define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
580 ; SSE2-LABEL: uitofp_2i64_to_2f64:
582 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
583 ; SSE2-NEXT: pand %xmm0, %xmm1
584 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
585 ; SSE2-NEXT: psrlq $32, %xmm0
586 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
587 ; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588 ; SSE2-NEXT: addpd %xmm1, %xmm0
591 ; SSE41-LABEL: uitofp_2i64_to_2f64:
593 ; SSE41-NEXT: pxor %xmm1, %xmm1
594 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
595 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
596 ; SSE41-NEXT: psrlq $32, %xmm0
597 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
598 ; SSE41-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
599 ; SSE41-NEXT: addpd %xmm1, %xmm0
602 ; AVX1-LABEL: uitofp_2i64_to_2f64:
604 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
605 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
606 ; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
607 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
608 ; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
609 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
610 ; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
613 ; AVX2-LABEL: uitofp_2i64_to_2f64:
615 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
616 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
617 ; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
618 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
619 ; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
620 ; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
621 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0
624 ; AVX512F-LABEL: uitofp_2i64_to_2f64:
626 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
627 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
628 ; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
629 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
630 ; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
631 ; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
632 ; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0
635 ; AVX512VL-LABEL: uitofp_2i64_to_2f64:
637 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
638 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
639 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
640 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
641 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
642 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
643 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0
644 ; AVX512VL-NEXT: retq
646 ; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
648 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
649 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
650 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
651 ; AVX512DQ-NEXT: vzeroupper
652 ; AVX512DQ-NEXT: retq
654 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64:
655 ; AVX512VLDQ: # %bb.0:
656 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
657 ; AVX512VLDQ-NEXT: retq
658 %cvt = uitofp <2 x i64> %a to <2 x double>
659 ret <2 x double> %cvt
662 define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
663 ; SSE2-LABEL: uitofp_2i32_to_2f64:
665 ; SSE2-NEXT: xorpd %xmm1, %xmm1
666 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
667 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
668 ; SSE2-NEXT: orpd %xmm1, %xmm0
669 ; SSE2-NEXT: subpd %xmm1, %xmm0
672 ; SSE41-LABEL: uitofp_2i32_to_2f64:
674 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
675 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
676 ; SSE41-NEXT: por %xmm1, %xmm0
677 ; SSE41-NEXT: subpd %xmm1, %xmm0
680 ; AVX1-LABEL: uitofp_2i32_to_2f64:
682 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
683 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
684 ; AVX1-NEXT: # xmm1 = mem[0,0]
685 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
686 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
689 ; AVX2-LABEL: uitofp_2i32_to_2f64:
691 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
692 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
693 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
694 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
697 ; AVX512F-LABEL: uitofp_2i32_to_2f64:
699 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
700 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
701 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
702 ; AVX512F-NEXT: vzeroupper
705 ; AVX512VL-LABEL: uitofp_2i32_to_2f64:
707 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
708 ; AVX512VL-NEXT: retq
710 ; AVX512DQ-LABEL: uitofp_2i32_to_2f64:
712 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
713 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
714 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
715 ; AVX512DQ-NEXT: vzeroupper
716 ; AVX512DQ-NEXT: retq
718 ; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64:
719 ; AVX512VLDQ: # %bb.0:
720 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
721 ; AVX512VLDQ-NEXT: retq
722 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
723 %cvt = uitofp <2 x i32> %shuf to <2 x double>
724 ret <2 x double> %cvt
727 define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
728 ; SSE2-LABEL: uitofp_4i32_to_2f64:
730 ; SSE2-NEXT: xorpd %xmm1, %xmm1
731 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
732 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
733 ; SSE2-NEXT: orpd %xmm1, %xmm0
734 ; SSE2-NEXT: subpd %xmm1, %xmm0
737 ; SSE41-LABEL: uitofp_4i32_to_2f64:
739 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
740 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
741 ; SSE41-NEXT: por %xmm1, %xmm0
742 ; SSE41-NEXT: subpd %xmm1, %xmm0
745 ; AVX1-LABEL: uitofp_4i32_to_2f64:
747 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
748 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
749 ; AVX1-NEXT: # xmm1 = mem[0,0]
750 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
751 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
754 ; AVX2-LABEL: uitofp_4i32_to_2f64:
756 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
757 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
758 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
759 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
762 ; AVX512F-LABEL: uitofp_4i32_to_2f64:
764 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
765 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
766 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
767 ; AVX512F-NEXT: vzeroupper
770 ; AVX512VL-LABEL: uitofp_4i32_to_2f64:
772 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
773 ; AVX512VL-NEXT: retq
775 ; AVX512DQ-LABEL: uitofp_4i32_to_2f64:
777 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
778 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
779 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
780 ; AVX512DQ-NEXT: vzeroupper
781 ; AVX512DQ-NEXT: retq
783 ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64:
784 ; AVX512VLDQ: # %bb.0:
785 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
786 ; AVX512VLDQ-NEXT: retq
787 %cvt = uitofp <4 x i32> %a to <4 x double>
788 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
789 ret <2 x double> %shuf
792 define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
793 ; SSE2-LABEL: uitofp_2i16_to_2f64:
795 ; SSE2-NEXT: pxor %xmm1, %xmm1
796 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
797 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
800 ; SSE41-LABEL: uitofp_2i16_to_2f64:
802 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
803 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
806 ; AVX-LABEL: uitofp_2i16_to_2f64:
808 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
809 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
811 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
812 %cvt = uitofp <2 x i16> %shuf to <2 x double>
813 ret <2 x double> %cvt
816 define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
817 ; SSE2-LABEL: uitofp_8i16_to_2f64:
819 ; SSE2-NEXT: pxor %xmm1, %xmm1
820 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
821 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
824 ; SSE41-LABEL: uitofp_8i16_to_2f64:
826 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
827 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
830 ; VEX-LABEL: uitofp_8i16_to_2f64:
832 ; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
833 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
836 ; AVX512-LABEL: uitofp_8i16_to_2f64:
838 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
839 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
840 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
841 ; AVX512-NEXT: vzeroupper
843 %cvt = uitofp <8 x i16> %a to <8 x double>
844 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
845 ret <2 x double> %shuf
848 define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
849 ; SSE2-LABEL: uitofp_2i8_to_2f64:
851 ; SSE2-NEXT: pxor %xmm1, %xmm1
852 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
853 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
854 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
857 ; SSE41-LABEL: uitofp_2i8_to_2f64:
859 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
860 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
863 ; AVX-LABEL: uitofp_2i8_to_2f64:
865 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
866 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
868 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
869 %cvt = uitofp <2 x i8> %shuf to <2 x double>
870 ret <2 x double> %cvt
873 define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
874 ; SSE2-LABEL: uitofp_16i8_to_2f64:
876 ; SSE2-NEXT: pxor %xmm1, %xmm1
877 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
878 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
879 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
882 ; SSE41-LABEL: uitofp_16i8_to_2f64:
884 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
885 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
888 ; VEX-LABEL: uitofp_16i8_to_2f64:
890 ; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
891 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
894 ; AVX512-LABEL: uitofp_16i8_to_2f64:
896 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
897 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
898 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
899 ; AVX512-NEXT: vzeroupper
901 %cvt = uitofp <16 x i8> %a to <16 x double>
902 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
903 ret <2 x double> %shuf
906 define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
907 ; SSE2-LABEL: uitofp_4i64_to_4f64:
909 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
910 ; SSE2-NEXT: movdqa %xmm0, %xmm3
911 ; SSE2-NEXT: pand %xmm2, %xmm3
912 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
913 ; SSE2-NEXT: por %xmm4, %xmm3
914 ; SSE2-NEXT: psrlq $32, %xmm0
915 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
916 ; SSE2-NEXT: por %xmm5, %xmm0
917 ; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
918 ; SSE2-NEXT: subpd %xmm6, %xmm0
919 ; SSE2-NEXT: addpd %xmm3, %xmm0
920 ; SSE2-NEXT: pand %xmm1, %xmm2
921 ; SSE2-NEXT: por %xmm4, %xmm2
922 ; SSE2-NEXT: psrlq $32, %xmm1
923 ; SSE2-NEXT: por %xmm5, %xmm1
924 ; SSE2-NEXT: subpd %xmm6, %xmm1
925 ; SSE2-NEXT: addpd %xmm2, %xmm1
928 ; SSE41-LABEL: uitofp_4i64_to_4f64:
930 ; SSE41-NEXT: pxor %xmm2, %xmm2
931 ; SSE41-NEXT: movdqa %xmm0, %xmm3
932 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
933 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
934 ; SSE41-NEXT: por %xmm4, %xmm3
935 ; SSE41-NEXT: psrlq $32, %xmm0
936 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
937 ; SSE41-NEXT: por %xmm5, %xmm0
938 ; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
939 ; SSE41-NEXT: subpd %xmm6, %xmm0
940 ; SSE41-NEXT: addpd %xmm3, %xmm0
941 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
942 ; SSE41-NEXT: por %xmm4, %xmm2
943 ; SSE41-NEXT: psrlq $32, %xmm1
944 ; SSE41-NEXT: por %xmm5, %xmm1
945 ; SSE41-NEXT: subpd %xmm6, %xmm1
946 ; SSE41-NEXT: addpd %xmm2, %xmm1
949 ; AVX1-LABEL: uitofp_4i64_to_4f64:
951 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
952 ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
953 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
954 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
955 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
956 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
957 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
958 ; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0
961 ; AVX2-LABEL: uitofp_4i64_to_4f64:
963 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
964 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
965 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
966 ; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
967 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
968 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
969 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
970 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
971 ; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0
972 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
975 ; AVX512F-LABEL: uitofp_4i64_to_4f64:
977 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
978 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
979 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
980 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
981 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
982 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
983 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
984 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
985 ; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0
986 ; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0
989 ; AVX512VL-LABEL: uitofp_4i64_to_4f64:
991 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
992 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
993 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
994 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
995 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
996 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
997 ; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0
998 ; AVX512VL-NEXT: retq
1000 ; AVX512DQ-LABEL: uitofp_4i64_to_4f64:
1001 ; AVX512DQ: # %bb.0:
1002 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1003 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
1004 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1005 ; AVX512DQ-NEXT: retq
1007 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64:
1008 ; AVX512VLDQ: # %bb.0:
1009 ; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0
1010 ; AVX512VLDQ-NEXT: retq
1011 %cvt = uitofp <4 x i64> %a to <4 x double>
1012 ret <4 x double> %cvt
1015 define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
1016 ; SSE2-LABEL: uitofp_4i32_to_4f64:
1018 ; SSE2-NEXT: movapd %xmm0, %xmm1
1019 ; SSE2-NEXT: xorpd %xmm2, %xmm2
1020 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1021 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
1022 ; SSE2-NEXT: orpd %xmm3, %xmm0
1023 ; SSE2-NEXT: subpd %xmm3, %xmm0
1024 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1025 ; SSE2-NEXT: orpd %xmm3, %xmm1
1026 ; SSE2-NEXT: subpd %xmm3, %xmm1
1029 ; SSE41-LABEL: uitofp_4i32_to_4f64:
1031 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1032 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1033 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
1034 ; SSE41-NEXT: por %xmm2, %xmm0
1035 ; SSE41-NEXT: subpd %xmm2, %xmm0
1036 ; SSE41-NEXT: pxor %xmm3, %xmm3
1037 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1038 ; SSE41-NEXT: por %xmm2, %xmm1
1039 ; SSE41-NEXT: subpd %xmm2, %xmm1
1042 ; AVX1-LABEL: uitofp_4i32_to_4f64:
1044 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1045 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1046 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1047 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1048 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
1049 ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
1050 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
1053 ; AVX2-LABEL: uitofp_4i32_to_4f64:
1055 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1056 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
1057 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1058 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
1061 ; AVX512F-LABEL: uitofp_4i32_to_4f64:
1063 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1064 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
1065 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1066 ; AVX512F-NEXT: retq
1068 ; AVX512VL-LABEL: uitofp_4i32_to_4f64:
1069 ; AVX512VL: # %bb.0:
1070 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
1071 ; AVX512VL-NEXT: retq
1073 ; AVX512DQ-LABEL: uitofp_4i32_to_4f64:
1074 ; AVX512DQ: # %bb.0:
1075 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1076 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
1077 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1078 ; AVX512DQ-NEXT: retq
1080 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64:
1081 ; AVX512VLDQ: # %bb.0:
1082 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0
1083 ; AVX512VLDQ-NEXT: retq
1084 %cvt = uitofp <4 x i32> %a to <4 x double>
1085 ret <4 x double> %cvt
1088 define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
1089 ; SSE2-LABEL: uitofp_4i16_to_4f64:
1091 ; SSE2-NEXT: pxor %xmm1, %xmm1
1092 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1093 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
1094 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1095 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
1096 ; SSE2-NEXT: movaps %xmm2, %xmm0
1099 ; SSE41-LABEL: uitofp_4i16_to_4f64:
1101 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1102 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
1103 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1104 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
1107 ; AVX-LABEL: uitofp_4i16_to_4f64:
1109 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1110 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
1112 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1113 %cvt = uitofp <4 x i16> %shuf to <4 x double>
1114 ret <4 x double> %cvt
1117 define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
1118 ; SSE2-LABEL: uitofp_8i16_to_4f64:
1120 ; SSE2-NEXT: pxor %xmm1, %xmm1
1121 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1122 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
1123 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1124 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
1125 ; SSE2-NEXT: movaps %xmm2, %xmm0
1128 ; SSE41-LABEL: uitofp_8i16_to_4f64:
1130 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1131 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
1132 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1133 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
1136 ; VEX-LABEL: uitofp_8i16_to_4f64:
1138 ; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1139 ; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
1142 ; AVX512-LABEL: uitofp_8i16_to_4f64:
1144 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1145 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
1146 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1148 %cvt = uitofp <8 x i16> %a to <8 x double>
1149 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1150 ret <4 x double> %shuf
1153 define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
1154 ; SSE2-LABEL: uitofp_4i8_to_4f64:
1156 ; SSE2-NEXT: pxor %xmm1, %xmm1
1157 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1158 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1159 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
1160 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1161 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
1162 ; SSE2-NEXT: movaps %xmm2, %xmm0
1165 ; SSE41-LABEL: uitofp_4i8_to_4f64:
1167 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1168 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
1169 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1170 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
1173 ; AVX-LABEL: uitofp_4i8_to_4f64:
1175 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1176 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
1178 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1179 %cvt = uitofp <4 x i8> %shuf to <4 x double>
1180 ret <4 x double> %cvt
1183 define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
1184 ; SSE2-LABEL: uitofp_16i8_to_4f64:
1186 ; SSE2-NEXT: pxor %xmm1, %xmm1
1187 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1188 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1189 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
1190 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1191 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
1192 ; SSE2-NEXT: movaps %xmm2, %xmm0
1195 ; SSE41-LABEL: uitofp_16i8_to_4f64:
1197 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1198 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
1199 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1200 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
1203 ; VEX-LABEL: uitofp_16i8_to_4f64:
1205 ; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1206 ; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
1209 ; AVX512-LABEL: uitofp_16i8_to_4f64:
1211 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1212 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
1213 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1215 %cvt = uitofp <16 x i8> %a to <16 x double>
1216 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1217 ret <4 x double> %shuf
1221 ; Signed Integer to Float
1224 define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
1225 ; SSE2-LABEL: sitofp_2i64_to_4f32:
1227 ; SSE2-NEXT: movq %xmm0, %rax
1228 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1229 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1230 ; SSE2-NEXT: movq %xmm0, %rax
1231 ; SSE2-NEXT: xorps %xmm0, %xmm0
1232 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1233 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1234 ; SSE2-NEXT: movaps %xmm1, %xmm0
1237 ; SSE41-LABEL: sitofp_2i64_to_4f32:
1239 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1240 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
1241 ; SSE41-NEXT: movq %xmm0, %rax
1242 ; SSE41-NEXT: xorps %xmm0, %xmm0
1243 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
1244 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1247 ; VEX-LABEL: sitofp_2i64_to_4f32:
1249 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1250 ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1251 ; VEX-NEXT: vmovq %xmm0, %rax
1252 ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1253 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1256 ; AVX512F-LABEL: sitofp_2i64_to_4f32:
1258 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1259 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1260 ; AVX512F-NEXT: vmovq %xmm0, %rax
1261 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1262 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1263 ; AVX512F-NEXT: retq
1265 ; AVX512VL-LABEL: sitofp_2i64_to_4f32:
1266 ; AVX512VL: # %bb.0:
1267 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1268 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1269 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1270 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1271 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1272 ; AVX512VL-NEXT: retq
1274 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
1275 ; AVX512DQ: # %bb.0:
1276 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1277 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1278 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1279 ; AVX512DQ-NEXT: vzeroupper
1280 ; AVX512DQ-NEXT: retq
1282 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32:
1283 ; AVX512VLDQ: # %bb.0:
1284 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1285 ; AVX512VLDQ-NEXT: retq
1286 %cvt = sitofp <2 x i64> %a to <2 x float>
1287 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1288 ret <4 x float> %ext
1291 define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
1292 ; SSE2-LABEL: sitofp_2i64_to_4f32_zero:
1294 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1295 ; SSE2-NEXT: movq %xmm1, %rax
1296 ; SSE2-NEXT: xorps %xmm1, %xmm1
1297 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1298 ; SSE2-NEXT: movq %xmm0, %rax
1299 ; SSE2-NEXT: xorps %xmm0, %xmm0
1300 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1301 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1302 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1305 ; SSE41-LABEL: sitofp_2i64_to_4f32_zero:
1307 ; SSE41-NEXT: movq %xmm0, %rax
1308 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
1309 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1310 ; SSE41-NEXT: xorps %xmm0, %xmm0
1311 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
1312 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero
1313 ; SSE41-NEXT: movaps %xmm1, %xmm0
1316 ; VEX-LABEL: sitofp_2i64_to_4f32_zero:
1318 ; VEX-NEXT: vmovq %xmm0, %rax
1319 ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1320 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1321 ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1322 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1325 ; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
1327 ; AVX512F-NEXT: vmovq %xmm0, %rax
1328 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1329 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1330 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1331 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1332 ; AVX512F-NEXT: retq
1334 ; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
1335 ; AVX512VL: # %bb.0:
1336 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1337 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1338 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1339 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1340 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1341 ; AVX512VL-NEXT: retq
1343 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero:
1344 ; AVX512DQ: # %bb.0:
1345 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1346 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1347 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1348 ; AVX512DQ-NEXT: vzeroupper
1349 ; AVX512DQ-NEXT: retq
1351 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero:
1352 ; AVX512VLDQ: # %bb.0:
1353 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1354 ; AVX512VLDQ-NEXT: retq
1355 %cvt = sitofp <2 x i64> %a to <2 x float>
1356 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1357 ret <4 x float> %ext
1360 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1361 ; SSE2-LABEL: sitofp_4i64_to_4f32_undef:
1363 ; SSE2-NEXT: movq %xmm0, %rax
1364 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1365 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1366 ; SSE2-NEXT: movq %xmm0, %rax
1367 ; SSE2-NEXT: xorps %xmm0, %xmm0
1368 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1369 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1370 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
1373 ; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
1375 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1376 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
1377 ; SSE41-NEXT: movq %xmm0, %rax
1378 ; SSE41-NEXT: xorps %xmm0, %xmm0
1379 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
1380 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1383 ; VEX-LABEL: sitofp_4i64_to_4f32_undef:
1385 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1386 ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1387 ; VEX-NEXT: vmovq %xmm0, %rax
1388 ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1389 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1392 ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
1394 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1395 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1396 ; AVX512F-NEXT: vmovq %xmm0, %rax
1397 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1398 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1399 ; AVX512F-NEXT: retq
1401 ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
1402 ; AVX512VL: # %bb.0:
1403 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1404 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1405 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1406 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
1407 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1408 ; AVX512VL-NEXT: retq
1410 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
1411 ; AVX512DQ: # %bb.0:
1412 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1413 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1414 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1415 ; AVX512DQ-NEXT: vzeroupper
1416 ; AVX512DQ-NEXT: retq
1418 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef:
1419 ; AVX512VLDQ: # %bb.0:
1420 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1421 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1422 ; AVX512VLDQ-NEXT: vzeroupper
1423 ; AVX512VLDQ-NEXT: retq
1424 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1425 %cvt = sitofp <4 x i64> %ext to <4 x float>
1426 ret <4 x float> %cvt
1429 define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
1430 ; SSE-LABEL: sitofp_4i32_to_4f32:
1432 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1435 ; AVX-LABEL: sitofp_4i32_to_4f32:
1437 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1439 %cvt = sitofp <4 x i32> %a to <4 x float>
1440 ret <4 x float> %cvt
1443 define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
1444 ; SSE2-LABEL: sitofp_4i16_to_4f32:
1446 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1447 ; SSE2-NEXT: psrad $16, %xmm0
1448 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1451 ; SSE41-LABEL: sitofp_4i16_to_4f32:
1453 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
1454 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
1457 ; AVX-LABEL: sitofp_4i16_to_4f32:
1459 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1460 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1462 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1463 %cvt = sitofp <4 x i16> %shuf to <4 x float>
1464 ret <4 x float> %cvt
1467 define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
1468 ; SSE2-LABEL: sitofp_8i16_to_4f32:
1470 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1471 ; SSE2-NEXT: psrad $16, %xmm0
1472 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1475 ; SSE41-LABEL: sitofp_8i16_to_4f32:
1477 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
1478 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
1481 ; AVX-LABEL: sitofp_8i16_to_4f32:
1483 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1484 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1486 %cvt = sitofp <8 x i16> %a to <8 x float>
1487 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1488 ret <4 x float> %shuf
1491 define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
1492 ; SSE2-LABEL: sitofp_4i8_to_4f32:
1494 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1495 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1496 ; SSE2-NEXT: psrad $24, %xmm0
1497 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1500 ; SSE41-LABEL: sitofp_4i8_to_4f32:
1502 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
1503 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
1506 ; AVX-LABEL: sitofp_4i8_to_4f32:
1508 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
1509 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1511 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1512 %cvt = sitofp <4 x i8> %shuf to <4 x float>
1513 ret <4 x float> %cvt
1516 define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
1517 ; SSE2-LABEL: sitofp_16i8_to_4f32:
1519 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1520 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1521 ; SSE2-NEXT: psrad $24, %xmm0
1522 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1525 ; SSE41-LABEL: sitofp_16i8_to_4f32:
1527 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
1528 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
1531 ; AVX-LABEL: sitofp_16i8_to_4f32:
1533 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
1534 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1536 %cvt = sitofp <16 x i8> %a to <16 x float>
1537 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1538 ret <4 x float> %shuf
1541 define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
1542 ; SSE2-LABEL: sitofp_4i64_to_4f32:
1544 ; SSE2-NEXT: movq %xmm1, %rax
1545 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
1546 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1547 ; SSE2-NEXT: movq %xmm1, %rax
1548 ; SSE2-NEXT: xorps %xmm1, %xmm1
1549 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1550 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1551 ; SSE2-NEXT: movq %xmm0, %rax
1552 ; SSE2-NEXT: xorps %xmm1, %xmm1
1553 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1554 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1555 ; SSE2-NEXT: movq %xmm0, %rax
1556 ; SSE2-NEXT: xorps %xmm0, %xmm0
1557 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1558 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1559 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1560 ; SSE2-NEXT: movaps %xmm1, %xmm0
1563 ; SSE41-LABEL: sitofp_4i64_to_4f32:
1565 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1566 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2
1567 ; SSE41-NEXT: movq %xmm0, %rax
1568 ; SSE41-NEXT: xorps %xmm0, %xmm0
1569 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
1570 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
1571 ; SSE41-NEXT: movq %xmm1, %rax
1572 ; SSE41-NEXT: xorps %xmm2, %xmm2
1573 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2
1574 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
1575 ; SSE41-NEXT: pextrq $1, %xmm1, %rax
1576 ; SSE41-NEXT: xorps %xmm1, %xmm1
1577 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
1578 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
1581 ; AVX1-LABEL: sitofp_4i64_to_4f32:
1583 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1584 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1585 ; AVX1-NEXT: vmovq %xmm0, %rax
1586 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
1587 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1589 ; AVX1-NEXT: vmovq %xmm0, %rax
1590 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1591 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1592 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1593 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
1594 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1595 ; AVX1-NEXT: vzeroupper
1598 ; AVX2-LABEL: sitofp_4i64_to_4f32:
1600 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1601 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1602 ; AVX2-NEXT: vmovq %xmm0, %rax
1603 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
1604 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1605 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1606 ; AVX2-NEXT: vmovq %xmm0, %rax
1607 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1608 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1609 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1610 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
1611 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1612 ; AVX2-NEXT: vzeroupper
1615 ; AVX512F-LABEL: sitofp_4i64_to_4f32:
1617 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1618 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1619 ; AVX512F-NEXT: vmovq %xmm0, %rax
1620 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
1621 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1622 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
1623 ; AVX512F-NEXT: vmovq %xmm0, %rax
1624 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1625 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1626 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1627 ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
1628 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1629 ; AVX512F-NEXT: vzeroupper
1630 ; AVX512F-NEXT: retq
1632 ; AVX512VL-LABEL: sitofp_4i64_to_4f32:
1633 ; AVX512VL: # %bb.0:
1634 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1635 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1636 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1637 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
1638 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1639 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
1640 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1641 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1642 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1643 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1644 ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
1645 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1646 ; AVX512VL-NEXT: vzeroupper
1647 ; AVX512VL-NEXT: retq
1649 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32:
1650 ; AVX512DQ: # %bb.0:
1651 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1652 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1653 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1654 ; AVX512DQ-NEXT: vzeroupper
1655 ; AVX512DQ-NEXT: retq
1657 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32:
1658 ; AVX512VLDQ: # %bb.0:
1659 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1660 ; AVX512VLDQ-NEXT: vzeroupper
1661 ; AVX512VLDQ-NEXT: retq
1662 %cvt = sitofp <4 x i64> %a to <4 x float>
1663 ret <4 x float> %cvt
1666 define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
1667 ; SSE-LABEL: sitofp_8i32_to_8f32:
1669 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1670 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
1673 ; AVX-LABEL: sitofp_8i32_to_8f32:
1675 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1677 %cvt = sitofp <8 x i32> %a to <8 x float>
1678 ret <8 x float> %cvt
1681 define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
1682 ; SSE2-LABEL: sitofp_8i16_to_8f32:
1684 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1685 ; SSE2-NEXT: psrad $16, %xmm1
1686 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
1687 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1688 ; SSE2-NEXT: psrad $16, %xmm0
1689 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
1690 ; SSE2-NEXT: movaps %xmm2, %xmm0
1693 ; SSE41-LABEL: sitofp_8i16_to_8f32:
1695 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
1696 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
1697 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1698 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
1699 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
1700 ; SSE41-NEXT: movaps %xmm2, %xmm0
1703 ; AVX1-LABEL: sitofp_8i16_to_8f32:
1705 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1707 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1708 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1709 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1712 ; AVX2-LABEL: sitofp_8i16_to_8f32:
1714 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1715 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1718 ; AVX512-LABEL: sitofp_8i16_to_8f32:
1720 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1721 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1723 %cvt = sitofp <8 x i16> %a to <8 x float>
1724 ret <8 x float> %cvt
1727 define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
1728 ; SSE2-LABEL: sitofp_8i8_to_8f32:
1730 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1731 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1732 ; SSE2-NEXT: psrad $24, %xmm0
1733 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1734 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1735 ; SSE2-NEXT: psrad $24, %xmm1
1736 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
1739 ; SSE41-LABEL: sitofp_8i8_to_8f32:
1741 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
1742 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
1743 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1744 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
1745 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
1746 ; SSE41-NEXT: movaps %xmm2, %xmm0
1749 ; AVX1-LABEL: sitofp_8i8_to_8f32:
1751 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1752 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1753 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1754 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1755 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1758 ; AVX2-LABEL: sitofp_8i8_to_8f32:
1760 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1761 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1764 ; AVX512-LABEL: sitofp_8i8_to_8f32:
1766 ; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
1767 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1769 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1770 %cvt = sitofp <8 x i8> %shuf to <8 x float>
1771 ret <8 x float> %cvt
1774 define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
1775 ; SSE2-LABEL: sitofp_16i8_to_8f32:
1777 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1778 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1779 ; SSE2-NEXT: psrad $24, %xmm0
1780 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1781 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1782 ; SSE2-NEXT: psrad $24, %xmm1
1783 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
1786 ; SSE41-LABEL: sitofp_16i8_to_8f32:
1788 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
1789 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
1790 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1791 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
1792 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
1793 ; SSE41-NEXT: movaps %xmm2, %xmm0
1796 ; AVX1-LABEL: sitofp_16i8_to_8f32:
1798 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1799 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1800 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1801 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1802 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1805 ; AVX2-LABEL: sitofp_16i8_to_8f32:
1807 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1808 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1811 ; AVX512-LABEL: sitofp_16i8_to_8f32:
1813 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1814 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1815 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1817 %cvt = sitofp <16 x i8> %a to <16 x float>
1818 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1819 ret <8 x float> %shuf
1823 ; Unsigned Integer to Float
1826 define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
1827 ; SSE2-LABEL: uitofp_2i64_to_4f32:
1829 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1830 ; SSE2-NEXT: movq %xmm0, %rax
1831 ; SSE2-NEXT: testq %rax, %rax
1832 ; SSE2-NEXT: js .LBB41_1
1833 ; SSE2-NEXT: # %bb.2:
1834 ; SSE2-NEXT: xorps %xmm0, %xmm0
1835 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1836 ; SSE2-NEXT: jmp .LBB41_3
1837 ; SSE2-NEXT: .LBB41_1:
1838 ; SSE2-NEXT: movq %rax, %rcx
1839 ; SSE2-NEXT: shrq %rcx
1840 ; SSE2-NEXT: andl $1, %eax
1841 ; SSE2-NEXT: orq %rcx, %rax
1842 ; SSE2-NEXT: xorps %xmm0, %xmm0
1843 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1844 ; SSE2-NEXT: addss %xmm0, %xmm0
1845 ; SSE2-NEXT: .LBB41_3:
1846 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1847 ; SSE2-NEXT: movq %xmm1, %rax
1848 ; SSE2-NEXT: testq %rax, %rax
1849 ; SSE2-NEXT: js .LBB41_4
1850 ; SSE2-NEXT: # %bb.5:
1851 ; SSE2-NEXT: xorps %xmm1, %xmm1
1852 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1853 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1855 ; SSE2-NEXT: .LBB41_4:
1856 ; SSE2-NEXT: movq %rax, %rcx
1857 ; SSE2-NEXT: shrq %rcx
1858 ; SSE2-NEXT: andl $1, %eax
1859 ; SSE2-NEXT: orq %rcx, %rax
1860 ; SSE2-NEXT: xorps %xmm1, %xmm1
1861 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1862 ; SSE2-NEXT: addss %xmm1, %xmm1
1863 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1866 ; SSE41-LABEL: uitofp_2i64_to_4f32:
1868 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1]
1869 ; SSE41-NEXT: pand %xmm0, %xmm1
1870 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1871 ; SSE41-NEXT: psrlq $1, %xmm2
1872 ; SSE41-NEXT: por %xmm1, %xmm2
1873 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
1874 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0
1875 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1876 ; SSE41-NEXT: cvtsi2ss %rax, %xmm3
1877 ; SSE41-NEXT: movq %xmm0, %rax
1878 ; SSE41-NEXT: xorps %xmm2, %xmm2
1879 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2
1880 ; SSE41-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero
1881 ; SSE41-NEXT: movaps %xmm2, %xmm3
1882 ; SSE41-NEXT: addps %xmm2, %xmm3
1883 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1884 ; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
1885 ; SSE41-NEXT: movaps %xmm2, %xmm0
1888 ; VEX-LABEL: uitofp_2i64_to_4f32:
1890 ; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1891 ; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2
1892 ; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
1893 ; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
1894 ; VEX-NEXT: vpextrq $1, %xmm1, %rax
1895 ; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1896 ; VEX-NEXT: vmovq %xmm1, %rax
1897 ; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
1898 ; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
1899 ; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
1900 ; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
1901 ; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1902 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1903 ; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1906 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
1908 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1909 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
1910 ; AVX512F-NEXT: vmovq %xmm0, %rax
1911 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
1912 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1913 ; AVX512F-NEXT: retq
1915 ; AVX512VL-LABEL: uitofp_2i64_to_4f32:
1916 ; AVX512VL: # %bb.0:
1917 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1918 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
1919 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1920 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
1921 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1922 ; AVX512VL-NEXT: retq
1924 ; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
1925 ; AVX512DQ: # %bb.0:
1926 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1927 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1928 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1929 ; AVX512DQ-NEXT: vzeroupper
1930 ; AVX512DQ-NEXT: retq
1932 ; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32:
1933 ; AVX512VLDQ: # %bb.0:
1934 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
1935 ; AVX512VLDQ-NEXT: retq
1936 %cvt = uitofp <2 x i64> %a to <2 x float>
1937 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1938 ret <4 x float> %ext
1941 define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
1942 ; SSE2-LABEL: uitofp_2i64_to_2f32:
1944 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1945 ; SSE2-NEXT: movq %xmm1, %rax
1946 ; SSE2-NEXT: testq %rax, %rax
1947 ; SSE2-NEXT: js .LBB42_1
1948 ; SSE2-NEXT: # %bb.2:
1949 ; SSE2-NEXT: xorps %xmm1, %xmm1
1950 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1951 ; SSE2-NEXT: jmp .LBB42_3
1952 ; SSE2-NEXT: .LBB42_1:
1953 ; SSE2-NEXT: movq %rax, %rcx
1954 ; SSE2-NEXT: shrq %rcx
1955 ; SSE2-NEXT: andl $1, %eax
1956 ; SSE2-NEXT: orq %rcx, %rax
1957 ; SSE2-NEXT: xorps %xmm1, %xmm1
1958 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
1959 ; SSE2-NEXT: addss %xmm1, %xmm1
1960 ; SSE2-NEXT: .LBB42_3:
1961 ; SSE2-NEXT: movq %xmm0, %rax
1962 ; SSE2-NEXT: testq %rax, %rax
1963 ; SSE2-NEXT: js .LBB42_4
1964 ; SSE2-NEXT: # %bb.5:
1965 ; SSE2-NEXT: xorps %xmm0, %xmm0
1966 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1967 ; SSE2-NEXT: jmp .LBB42_6
1968 ; SSE2-NEXT: .LBB42_4:
1969 ; SSE2-NEXT: movq %rax, %rcx
1970 ; SSE2-NEXT: shrq %rcx
1971 ; SSE2-NEXT: andl $1, %eax
1972 ; SSE2-NEXT: orq %rcx, %rax
1973 ; SSE2-NEXT: xorps %xmm0, %xmm0
1974 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
1975 ; SSE2-NEXT: addss %xmm0, %xmm0
1976 ; SSE2-NEXT: .LBB42_6:
1977 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1978 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1981 ; SSE41-LABEL: uitofp_2i64_to_2f32:
1983 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1]
1984 ; SSE41-NEXT: pand %xmm0, %xmm1
1985 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1986 ; SSE41-NEXT: psrlq $1, %xmm2
1987 ; SSE41-NEXT: por %xmm1, %xmm2
1988 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
1989 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0
1990 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
1991 ; SSE41-NEXT: xorps %xmm2, %xmm2
1992 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2
1993 ; SSE41-NEXT: movq %xmm0, %rax
1994 ; SSE41-NEXT: cvtsi2ss %rax, %xmm3
1995 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm2[0],zero,zero
1996 ; SSE41-NEXT: movaps %xmm3, %xmm2
1997 ; SSE41-NEXT: addps %xmm3, %xmm2
1998 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1999 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
2000 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm3[0],zero
2003 ; VEX-LABEL: uitofp_2i64_to_2f32:
2005 ; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2006 ; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2
2007 ; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
2008 ; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
2009 ; VEX-NEXT: vpextrq $1, %xmm1, %rax
2010 ; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
2011 ; VEX-NEXT: vmovq %xmm1, %rax
2012 ; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
2013 ; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
2014 ; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
2015 ; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
2016 ; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2017 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
2018 ; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2019 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2022 ; AVX512F-LABEL: uitofp_2i64_to_2f32:
2024 ; AVX512F-NEXT: vmovq %xmm0, %rax
2025 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2026 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2027 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
2028 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
2029 ; AVX512F-NEXT: retq
2031 ; AVX512VL-LABEL: uitofp_2i64_to_2f32:
2032 ; AVX512VL: # %bb.0:
2033 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2034 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2035 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2036 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
2037 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
2038 ; AVX512VL-NEXT: retq
2040 ; AVX512DQ-LABEL: uitofp_2i64_to_2f32:
2041 ; AVX512DQ: # %bb.0:
2042 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2043 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
2044 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2045 ; AVX512DQ-NEXT: vzeroupper
2046 ; AVX512DQ-NEXT: retq
2048 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32:
2049 ; AVX512VLDQ: # %bb.0:
2050 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
2051 ; AVX512VLDQ-NEXT: retq
2052 %cvt = uitofp <2 x i64> %a to <2 x float>
2053 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2054 ret <4 x float> %ext
2057 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
2058 ; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
2060 ; SSE2-NEXT: movq %xmm0, %rax
2061 ; SSE2-NEXT: testq %rax, %rax
2062 ; SSE2-NEXT: js .LBB43_1
2063 ; SSE2-NEXT: # %bb.2:
2064 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
2065 ; SSE2-NEXT: jmp .LBB43_3
2066 ; SSE2-NEXT: .LBB43_1:
2067 ; SSE2-NEXT: movq %rax, %rcx
2068 ; SSE2-NEXT: shrq %rcx
2069 ; SSE2-NEXT: andl $1, %eax
2070 ; SSE2-NEXT: orq %rcx, %rax
2071 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
2072 ; SSE2-NEXT: addss %xmm1, %xmm1
2073 ; SSE2-NEXT: .LBB43_3:
2074 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2075 ; SSE2-NEXT: movq %xmm0, %rax
2076 ; SSE2-NEXT: testq %rax, %rax
2077 ; SSE2-NEXT: js .LBB43_4
2078 ; SSE2-NEXT: # %bb.5:
2079 ; SSE2-NEXT: xorps %xmm0, %xmm0
2080 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
2081 ; SSE2-NEXT: jmp .LBB43_6
2082 ; SSE2-NEXT: .LBB43_4:
2083 ; SSE2-NEXT: movq %rax, %rcx
2084 ; SSE2-NEXT: shrq %rcx
2085 ; SSE2-NEXT: andl $1, %eax
2086 ; SSE2-NEXT: orq %rcx, %rax
2087 ; SSE2-NEXT: xorps %xmm0, %xmm0
2088 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
2089 ; SSE2-NEXT: addss %xmm0, %xmm0
2090 ; SSE2-NEXT: .LBB43_6:
2091 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2092 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
2095 ; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
2097 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1]
2098 ; SSE41-NEXT: pand %xmm0, %xmm1
2099 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2100 ; SSE41-NEXT: psrlq $1, %xmm2
2101 ; SSE41-NEXT: por %xmm1, %xmm2
2102 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2103 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
2104 ; SSE41-NEXT: pextrq $1, %xmm1, %rax
2105 ; SSE41-NEXT: xorps %xmm2, %xmm2
2106 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2
2107 ; SSE41-NEXT: movq %xmm1, %rax
2108 ; SSE41-NEXT: xorps %xmm1, %xmm1
2109 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
2110 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
2111 ; SSE41-NEXT: movaps %xmm1, %xmm2
2112 ; SSE41-NEXT: addps %xmm1, %xmm2
2113 ; SSE41-NEXT: xorps %xmm3, %xmm3
2114 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[2,3]
2115 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
2116 ; SSE41-NEXT: movaps %xmm1, %xmm0
2119 ; AVX1-LABEL: uitofp_4i64_to_4f32_undef:
2121 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2122 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
2123 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
2124 ; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
2125 ; AVX1-NEXT: vmovaps %xmm0, %xmm2
2126 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm1
2127 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
2128 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
2129 ; AVX1-NEXT: vmovq %xmm1, %rax
2130 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
2131 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
2132 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
2133 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2134 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
2135 ; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
2136 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2137 ; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2138 ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2139 ; AVX1-NEXT: vzeroupper
2142 ; AVX2-LABEL: uitofp_4i64_to_4f32_undef:
2144 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2145 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2146 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
2147 ; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
2148 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
2149 ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2150 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2151 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
2152 ; AVX2-NEXT: vmovq %xmm1, %rax
2153 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
2154 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2155 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
2156 ; AVX2-NEXT: vmovq %xmm1, %rax
2157 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
2158 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
2159 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2160 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
2161 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
2162 ; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
2163 ; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2164 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2165 ; AVX2-NEXT: vzeroupper
2168 ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
2170 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2171 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2172 ; AVX512F-NEXT: vmovq %xmm0, %rax
2173 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
2174 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
2175 ; AVX512F-NEXT: retq
2177 ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
2178 ; AVX512VL: # %bb.0:
2179 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2180 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2181 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2182 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
2183 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
2184 ; AVX512VL-NEXT: retq
2186 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
2187 ; AVX512DQ: # %bb.0:
2188 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2189 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
2190 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2191 ; AVX512DQ-NEXT: vzeroupper
2192 ; AVX512DQ-NEXT: retq
2194 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef:
2195 ; AVX512VLDQ: # %bb.0:
2196 ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2197 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
2198 ; AVX512VLDQ-NEXT: vzeroupper
2199 ; AVX512VLDQ-NEXT: retq
2200 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2201 %cvt = uitofp <4 x i64> %ext to <4 x float>
2202 ret <4 x float> %cvt
2205 define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
2206 ; SSE2-LABEL: uitofp_4i32_to_4f32:
2208 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
2209 ; SSE2-NEXT: pand %xmm0, %xmm1
2210 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2211 ; SSE2-NEXT: psrld $16, %xmm0
2212 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2213 ; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2214 ; SSE2-NEXT: addps %xmm1, %xmm0
2217 ; SSE41-LABEL: uitofp_4i32_to_4f32:
2219 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
2220 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2221 ; SSE41-NEXT: psrld $16, %xmm0
2222 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2223 ; SSE41-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2224 ; SSE41-NEXT: addps %xmm1, %xmm0
2227 ; AVX1-LABEL: uitofp_4i32_to_4f32:
2229 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2230 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2231 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2232 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2233 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
2236 ; AVX2-LABEL: uitofp_4i32_to_4f32:
2238 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
2239 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2240 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
2241 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
2242 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2243 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2244 ; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0
2245 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
2248 ; AVX512F-LABEL: uitofp_4i32_to_4f32:
2250 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2251 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
2252 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2253 ; AVX512F-NEXT: vzeroupper
2254 ; AVX512F-NEXT: retq
2256 ; AVX512VL-LABEL: uitofp_4i32_to_4f32:
2257 ; AVX512VL: # %bb.0:
2258 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
2259 ; AVX512VL-NEXT: retq
2261 ; AVX512DQ-LABEL: uitofp_4i32_to_4f32:
2262 ; AVX512DQ: # %bb.0:
2263 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2264 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
2265 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2266 ; AVX512DQ-NEXT: vzeroupper
2267 ; AVX512DQ-NEXT: retq
2269 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32:
2270 ; AVX512VLDQ: # %bb.0:
2271 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
2272 ; AVX512VLDQ-NEXT: retq
2273 %cvt = uitofp <4 x i32> %a to <4 x float>
2274 ret <4 x float> %cvt
2277 define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
2278 ; SSE2-LABEL: uitofp_4i16_to_4f32:
2280 ; SSE2-NEXT: pxor %xmm1, %xmm1
2281 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2282 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
2285 ; SSE41-LABEL: uitofp_4i16_to_4f32:
2287 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2288 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
2291 ; AVX-LABEL: uitofp_4i16_to_4f32:
2293 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2294 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2296 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2297 %cvt = uitofp <4 x i16> %shuf to <4 x float>
2298 ret <4 x float> %cvt
2301 define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
2302 ; SSE2-LABEL: uitofp_8i16_to_4f32:
2304 ; SSE2-NEXT: pxor %xmm1, %xmm1
2305 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2306 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
2309 ; SSE41-LABEL: uitofp_8i16_to_4f32:
2311 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2312 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
2315 ; AVX-LABEL: uitofp_8i16_to_4f32:
2317 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2318 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2320 %cvt = uitofp <8 x i16> %a to <8 x float>
2321 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2322 ret <4 x float> %shuf
2325 define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
2326 ; SSE2-LABEL: uitofp_4i8_to_4f32:
2328 ; SSE2-NEXT: pxor %xmm1, %xmm1
2329 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2330 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2331 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
2334 ; SSE41-LABEL: uitofp_4i8_to_4f32:
2336 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2337 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
2340 ; AVX-LABEL: uitofp_4i8_to_4f32:
2342 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2343 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2345 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2346 %cvt = uitofp <4 x i8> %shuf to <4 x float>
2347 ret <4 x float> %cvt
2350 define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
2351 ; SSE2-LABEL: uitofp_16i8_to_4f32:
2353 ; SSE2-NEXT: pxor %xmm1, %xmm1
2354 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2355 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2356 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
2359 ; SSE41-LABEL: uitofp_16i8_to_4f32:
2361 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2362 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
2365 ; AVX-LABEL: uitofp_16i8_to_4f32:
2367 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2368 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2370 %cvt = uitofp <16 x i8> %a to <16 x float>
2371 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2372 ret <4 x float> %shuf
2375 define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
2376 ; SSE2-LABEL: uitofp_4i64_to_4f32:
2378 ; SSE2-NEXT: movq %xmm1, %rax
2379 ; SSE2-NEXT: testq %rax, %rax
2380 ; SSE2-NEXT: js .LBB49_1
2381 ; SSE2-NEXT: # %bb.2:
2382 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
2383 ; SSE2-NEXT: jmp .LBB49_3
2384 ; SSE2-NEXT: .LBB49_1:
2385 ; SSE2-NEXT: movq %rax, %rcx
2386 ; SSE2-NEXT: shrq %rcx
2387 ; SSE2-NEXT: andl $1, %eax
2388 ; SSE2-NEXT: orq %rcx, %rax
2389 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
2390 ; SSE2-NEXT: addss %xmm2, %xmm2
2391 ; SSE2-NEXT: .LBB49_3:
2392 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2393 ; SSE2-NEXT: movq %xmm1, %rax
2394 ; SSE2-NEXT: testq %rax, %rax
2395 ; SSE2-NEXT: js .LBB49_4
2396 ; SSE2-NEXT: # %bb.5:
2397 ; SSE2-NEXT: cvtsi2ss %rax, %xmm3
2398 ; SSE2-NEXT: jmp .LBB49_6
2399 ; SSE2-NEXT: .LBB49_4:
2400 ; SSE2-NEXT: movq %rax, %rcx
2401 ; SSE2-NEXT: shrq %rcx
2402 ; SSE2-NEXT: andl $1, %eax
2403 ; SSE2-NEXT: orq %rcx, %rax
2404 ; SSE2-NEXT: cvtsi2ss %rax, %xmm3
2405 ; SSE2-NEXT: addss %xmm3, %xmm3
2406 ; SSE2-NEXT: .LBB49_6:
2407 ; SSE2-NEXT: movq %xmm0, %rax
2408 ; SSE2-NEXT: testq %rax, %rax
2409 ; SSE2-NEXT: js .LBB49_7
2410 ; SSE2-NEXT: # %bb.8:
2411 ; SSE2-NEXT: xorps %xmm1, %xmm1
2412 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
2413 ; SSE2-NEXT: jmp .LBB49_9
2414 ; SSE2-NEXT: .LBB49_7:
2415 ; SSE2-NEXT: movq %rax, %rcx
2416 ; SSE2-NEXT: shrq %rcx
2417 ; SSE2-NEXT: andl $1, %eax
2418 ; SSE2-NEXT: orq %rcx, %rax
2419 ; SSE2-NEXT: xorps %xmm1, %xmm1
2420 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
2421 ; SSE2-NEXT: addss %xmm1, %xmm1
2422 ; SSE2-NEXT: .LBB49_9:
2423 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2424 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2425 ; SSE2-NEXT: movq %xmm0, %rax
2426 ; SSE2-NEXT: testq %rax, %rax
2427 ; SSE2-NEXT: js .LBB49_10
2428 ; SSE2-NEXT: # %bb.11:
2429 ; SSE2-NEXT: xorps %xmm0, %xmm0
2430 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
2431 ; SSE2-NEXT: jmp .LBB49_12
2432 ; SSE2-NEXT: .LBB49_10:
2433 ; SSE2-NEXT: movq %rax, %rcx
2434 ; SSE2-NEXT: shrq %rcx
2435 ; SSE2-NEXT: andl $1, %eax
2436 ; SSE2-NEXT: orq %rcx, %rax
2437 ; SSE2-NEXT: xorps %xmm0, %xmm0
2438 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
2439 ; SSE2-NEXT: addss %xmm0, %xmm0
2440 ; SSE2-NEXT: .LBB49_12:
2441 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2442 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2443 ; SSE2-NEXT: movaps %xmm1, %xmm0
2446 ; SSE41-LABEL: uitofp_4i64_to_4f32:
2448 ; SSE41-NEXT: movdqa %xmm1, %xmm2
2449 ; SSE41-NEXT: movdqa %xmm0, %xmm1
2450 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm4 = [1,1]
2451 ; SSE41-NEXT: pand %xmm4, %xmm0
2452 ; SSE41-NEXT: movdqa %xmm1, %xmm3
2453 ; SSE41-NEXT: psrlq $1, %xmm3
2454 ; SSE41-NEXT: por %xmm0, %xmm3
2455 ; SSE41-NEXT: movdqa %xmm1, %xmm5
2456 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2457 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5
2458 ; SSE41-NEXT: pextrq $1, %xmm5, %rax
2459 ; SSE41-NEXT: xorps %xmm0, %xmm0
2460 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
2461 ; SSE41-NEXT: movq %xmm5, %rax
2462 ; SSE41-NEXT: xorps %xmm3, %xmm3
2463 ; SSE41-NEXT: cvtsi2ss %rax, %xmm3
2464 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
2465 ; SSE41-NEXT: pand %xmm2, %xmm4
2466 ; SSE41-NEXT: movdqa %xmm2, %xmm5
2467 ; SSE41-NEXT: psrlq $1, %xmm5
2468 ; SSE41-NEXT: por %xmm4, %xmm5
2469 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
2470 ; SSE41-NEXT: movaps %xmm2, %xmm0
2471 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2
2472 ; SSE41-NEXT: movq %xmm2, %rax
2473 ; SSE41-NEXT: xorps %xmm0, %xmm0
2474 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
2475 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
2476 ; SSE41-NEXT: pextrq $1, %xmm2, %rax
2477 ; SSE41-NEXT: xorps %xmm0, %xmm0
2478 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
2479 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
2480 ; SSE41-NEXT: movaps %xmm3, %xmm2
2481 ; SSE41-NEXT: addps %xmm3, %xmm2
2482 ; SSE41-NEXT: movaps %xmm1, %xmm0
2483 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
2484 ; SSE41-NEXT: movaps %xmm3, %xmm0
2487 ; AVX1-LABEL: uitofp_4i64_to_4f32:
2489 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1
2490 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2491 ; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3
2492 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
2493 ; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
2494 ; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
2495 ; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2496 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
2497 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
2498 ; AVX1-NEXT: vmovq %xmm1, %rax
2499 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
2500 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
2501 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2502 ; AVX1-NEXT: vmovq %xmm1, %rax
2503 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
2504 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
2505 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
2506 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
2507 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
2508 ; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
2509 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2510 ; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
2511 ; AVX1-NEXT: vzeroupper
2514 ; AVX2-LABEL: uitofp_4i64_to_4f32:
2516 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2517 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
2518 ; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
2519 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
2520 ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2521 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2522 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
2523 ; AVX2-NEXT: vmovq %xmm1, %rax
2524 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
2525 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2526 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
2527 ; AVX2-NEXT: vmovq %xmm1, %rax
2528 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
2529 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
2530 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2531 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
2532 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
2533 ; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
2534 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2535 ; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2536 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2537 ; AVX2-NEXT: vzeroupper
2540 ; AVX512F-LABEL: uitofp_4i64_to_4f32:
2542 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2543 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2544 ; AVX512F-NEXT: vmovq %xmm0, %rax
2545 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
2546 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2547 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
2548 ; AVX512F-NEXT: vmovq %xmm0, %rax
2549 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
2550 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2551 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2552 ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
2553 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2554 ; AVX512F-NEXT: vzeroupper
2555 ; AVX512F-NEXT: retq
2557 ; AVX512VL-LABEL: uitofp_4i64_to_4f32:
2558 ; AVX512VL: # %bb.0:
2559 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2560 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
2561 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2562 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
2563 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2564 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
2565 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2566 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
2567 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2568 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2569 ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
2570 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2571 ; AVX512VL-NEXT: vzeroupper
2572 ; AVX512VL-NEXT: retq
2574 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32:
2575 ; AVX512DQ: # %bb.0:
2576 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2577 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
2578 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2579 ; AVX512DQ-NEXT: vzeroupper
2580 ; AVX512DQ-NEXT: retq
2582 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32:
2583 ; AVX512VLDQ: # %bb.0:
2584 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
2585 ; AVX512VLDQ-NEXT: vzeroupper
2586 ; AVX512VLDQ-NEXT: retq
2587 %cvt = uitofp <4 x i64> %a to <4 x float>
2588 ret <4 x float> %cvt
2591 define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
2592 ; SSE2-LABEL: uitofp_8i32_to_8f32:
2594 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
2595 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2596 ; SSE2-NEXT: pand %xmm2, %xmm3
2597 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
2598 ; SSE2-NEXT: por %xmm4, %xmm3
2599 ; SSE2-NEXT: psrld $16, %xmm0
2600 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
2601 ; SSE2-NEXT: por %xmm5, %xmm0
2602 ; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2603 ; SSE2-NEXT: subps %xmm6, %xmm0
2604 ; SSE2-NEXT: addps %xmm3, %xmm0
2605 ; SSE2-NEXT: pand %xmm1, %xmm2
2606 ; SSE2-NEXT: por %xmm4, %xmm2
2607 ; SSE2-NEXT: psrld $16, %xmm1
2608 ; SSE2-NEXT: por %xmm5, %xmm1
2609 ; SSE2-NEXT: subps %xmm6, %xmm1
2610 ; SSE2-NEXT: addps %xmm2, %xmm1
2613 ; SSE41-LABEL: uitofp_8i32_to_8f32:
2615 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200]
2616 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2617 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
2618 ; SSE41-NEXT: psrld $16, %xmm0
2619 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928]
2620 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
2621 ; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2622 ; SSE41-NEXT: subps %xmm5, %xmm0
2623 ; SSE41-NEXT: addps %xmm3, %xmm0
2624 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2625 ; SSE41-NEXT: psrld $16, %xmm1
2626 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
2627 ; SSE41-NEXT: subps %xmm5, %xmm1
2628 ; SSE41-NEXT: addps %xmm2, %xmm1
2631 ; AVX1-LABEL: uitofp_8i32_to_8f32:
2633 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2634 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2635 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
2636 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2637 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
2638 ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
2639 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2640 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2641 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
2644 ; AVX2-LABEL: uitofp_8i32_to_8f32:
2646 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
2647 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2648 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
2649 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
2650 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
2651 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2652 ; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0
2653 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
2656 ; AVX512F-LABEL: uitofp_8i32_to_8f32:
2658 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2659 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
2660 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2661 ; AVX512F-NEXT: retq
2663 ; AVX512VL-LABEL: uitofp_8i32_to_8f32:
2664 ; AVX512VL: # %bb.0:
2665 ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
2666 ; AVX512VL-NEXT: retq
2668 ; AVX512DQ-LABEL: uitofp_8i32_to_8f32:
2669 ; AVX512DQ: # %bb.0:
2670 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2671 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
2672 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2673 ; AVX512DQ-NEXT: retq
2675 ; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32:
2676 ; AVX512VLDQ: # %bb.0:
2677 ; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0
2678 ; AVX512VLDQ-NEXT: retq
2679 %cvt = uitofp <8 x i32> %a to <8 x float>
2680 ret <8 x float> %cvt
2683 define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
2684 ; SSE2-LABEL: uitofp_8i16_to_8f32:
2686 ; SSE2-NEXT: pxor %xmm1, %xmm1
2687 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2688 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2689 ; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
2690 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2691 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
2692 ; SSE2-NEXT: movaps %xmm2, %xmm0
2695 ; SSE41-LABEL: uitofp_8i16_to_8f32:
2697 ; SSE41-NEXT: pxor %xmm1, %xmm1
2698 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2699 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2700 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
2701 ; SSE41-NEXT: cvtdq2ps %xmm2, %xmm0
2704 ; AVX1-LABEL: uitofp_8i16_to_8f32:
2706 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2707 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2708 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2709 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2710 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2713 ; AVX2-LABEL: uitofp_8i16_to_8f32:
2715 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2716 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2719 ; AVX512-LABEL: uitofp_8i16_to_8f32:
2721 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2722 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2724 %cvt = uitofp <8 x i16> %a to <8 x float>
2725 ret <8 x float> %cvt
2728 define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
2729 ; SSE2-LABEL: uitofp_8i8_to_8f32:
2731 ; SSE2-NEXT: pxor %xmm1, %xmm1
2732 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2733 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2734 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2735 ; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
2736 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2737 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
2738 ; SSE2-NEXT: movaps %xmm2, %xmm0
2741 ; SSE41-LABEL: uitofp_8i8_to_8f32:
2743 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2744 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
2745 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2746 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2747 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
2748 ; SSE41-NEXT: movaps %xmm2, %xmm0
2751 ; AVX1-LABEL: uitofp_8i8_to_8f32:
2753 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2754 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2755 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2756 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2757 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2760 ; AVX2-LABEL: uitofp_8i8_to_8f32:
2762 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2763 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2766 ; AVX512-LABEL: uitofp_8i8_to_8f32:
2768 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2769 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2771 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2772 %cvt = uitofp <8 x i8> %shuf to <8 x float>
2773 ret <8 x float> %cvt
2776 define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
2777 ; SSE2-LABEL: uitofp_16i8_to_8f32:
2779 ; SSE2-NEXT: pxor %xmm1, %xmm1
2780 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2781 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2782 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2783 ; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
2784 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2785 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
2786 ; SSE2-NEXT: movaps %xmm2, %xmm0
2789 ; SSE41-LABEL: uitofp_16i8_to_8f32:
2791 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2792 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
2793 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2794 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2795 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
2796 ; SSE41-NEXT: movaps %xmm2, %xmm0
2799 ; AVX1-LABEL: uitofp_16i8_to_8f32:
2801 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2802 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2803 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2804 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2805 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2808 ; AVX2-LABEL: uitofp_16i8_to_8f32:
2810 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2811 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2814 ; AVX512-LABEL: uitofp_16i8_to_8f32:
2816 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2817 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2818 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2820 %cvt = uitofp <16 x i8> %a to <16 x float>
2821 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2822 ret <8 x float> %shuf
2826 ; Load Signed Integer to Double
2829 define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) {
2830 ; SSE-LABEL: sitofp_load_2i64_to_2f64:
2832 ; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1
2833 ; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0
2834 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2837 ; VEX-LABEL: sitofp_load_2i64_to_2f64:
2839 ; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2840 ; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
2841 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2844 ; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
2846 ; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2847 ; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
2848 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2849 ; AVX512F-NEXT: retq
2851 ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
2852 ; AVX512VL: # %bb.0:
2853 ; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2854 ; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
2855 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2856 ; AVX512VL-NEXT: retq
2858 ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
2859 ; AVX512DQ: # %bb.0:
2860 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
2861 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
2862 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2863 ; AVX512DQ-NEXT: vzeroupper
2864 ; AVX512DQ-NEXT: retq
2866 ; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64:
2867 ; AVX512VLDQ: # %bb.0:
2868 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0
2869 ; AVX512VLDQ-NEXT: retq
2870 %ld = load <2 x i64>, ptr%a
2871 %cvt = sitofp <2 x i64> %ld to <2 x double>
2872 ret <2 x double> %cvt
2875 define <2 x double> @sitofp_load_2i32_to_2f64(ptr%a) {
2876 ; SSE-LABEL: sitofp_load_2i32_to_2f64:
2878 ; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
2881 ; AVX-LABEL: sitofp_load_2i32_to_2f64:
2883 ; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
2885 %ld = load <2 x i32>, ptr%a
2886 %cvt = sitofp <2 x i32> %ld to <2 x double>
2887 ret <2 x double> %cvt
2890 define <2 x double> @sitofp_volatile_load_4i32_to_2f64(ptr%a) {
2891 ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
2893 ; SSE-NEXT: movaps (%rdi), %xmm0
2894 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2897 ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
2899 ; AVX-NEXT: vmovaps (%rdi), %xmm0
2900 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2902 %ld = load volatile <4 x i32>, ptr%a
2903 %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2904 %cvt = sitofp <2 x i32> %b to <2 x double>
2905 ret <2 x double> %cvt
2908 define <2 x double> @sitofp_load_4i32_to_2f64_2(ptr %x) {
2909 ; SSE-LABEL: sitofp_load_4i32_to_2f64_2:
2911 ; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
2914 ; AVX-LABEL: sitofp_load_4i32_to_2f64_2:
2916 ; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
2918 %a = load <4 x i32>, ptr %x
2919 %b = sitofp <4 x i32> %a to <4 x double>
2920 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
2924 define <2 x double> @sitofp_volatile_load_4i32_to_2f64_2(ptr %x) {
2925 ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64_2:
2927 ; SSE-NEXT: movaps (%rdi), %xmm0
2928 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2931 ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64_2:
2933 ; AVX-NEXT: vmovaps (%rdi), %xmm0
2934 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2936 %a = load volatile <4 x i32>, ptr %x
2937 %b = sitofp <4 x i32> %a to <4 x double>
2938 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
2942 define <2 x double> @sitofp_load_2i16_to_2f64(ptr%a) {
2943 ; SSE2-LABEL: sitofp_load_2i16_to_2f64:
2945 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2946 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2947 ; SSE2-NEXT: psrad $16, %xmm0
2948 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
2951 ; SSE41-LABEL: sitofp_load_2i16_to_2f64:
2953 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2954 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
2955 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
2958 ; AVX-LABEL: sitofp_load_2i16_to_2f64:
2960 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2961 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
2962 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2964 %ld = load <2 x i16>, ptr%a
2965 %cvt = sitofp <2 x i16> %ld to <2 x double>
2966 ret <2 x double> %cvt
2969 define <2 x double> @sitofp_load_2i8_to_2f64(ptr%a) {
2970 ; SSE2-LABEL: sitofp_load_2i8_to_2f64:
2972 ; SSE2-NEXT: movzwl (%rdi), %eax
2973 ; SSE2-NEXT: movd %eax, %xmm0
2974 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2975 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2976 ; SSE2-NEXT: psrad $24, %xmm0
2977 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
2980 ; SSE41-LABEL: sitofp_load_2i8_to_2f64:
2982 ; SSE41-NEXT: movzwl (%rdi), %eax
2983 ; SSE41-NEXT: movd %eax, %xmm0
2984 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
2985 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
2988 ; AVX-LABEL: sitofp_load_2i8_to_2f64:
2990 ; AVX-NEXT: movzwl (%rdi), %eax
2991 ; AVX-NEXT: vmovd %eax, %xmm0
2992 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
2993 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2995 %ld = load <2 x i8>, ptr%a
2996 %cvt = sitofp <2 x i8> %ld to <2 x double>
2997 ret <2 x double> %cvt
3000 define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) {
3001 ; SSE-LABEL: sitofp_load_4i64_to_4f64:
3003 ; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1
3004 ; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0
3005 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3006 ; SSE-NEXT: cvtsi2sdq 24(%rdi), %xmm2
3007 ; SSE-NEXT: xorps %xmm1, %xmm1
3008 ; SSE-NEXT: cvtsi2sdq 16(%rdi), %xmm1
3009 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3012 ; VEX-LABEL: sitofp_load_4i64_to_4f64:
3014 ; VEX-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3015 ; VEX-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3016 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3017 ; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3018 ; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
3019 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3020 ; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3023 ; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
3025 ; AVX512F-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3026 ; AVX512F-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3027 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3028 ; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3029 ; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
3030 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3031 ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3032 ; AVX512F-NEXT: retq
3034 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
3035 ; AVX512VL: # %bb.0:
3036 ; AVX512VL-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3037 ; AVX512VL-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3038 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3039 ; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3040 ; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
3041 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3042 ; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3043 ; AVX512VL-NEXT: retq
3045 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
3046 ; AVX512DQ: # %bb.0:
3047 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3048 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
3049 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3050 ; AVX512DQ-NEXT: retq
3052 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64:
3053 ; AVX512VLDQ: # %bb.0:
3054 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0
3055 ; AVX512VLDQ-NEXT: retq
3056 %ld = load <4 x i64>, ptr%a
3057 %cvt = sitofp <4 x i64> %ld to <4 x double>
3058 ret <4 x double> %cvt
3061 define <4 x double> @sitofp_load_4i32_to_4f64(ptr%a) {
3062 ; SSE-LABEL: sitofp_load_4i32_to_4f64:
3064 ; SSE-NEXT: movdqa (%rdi), %xmm1
3065 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
3066 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3067 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3070 ; AVX-LABEL: sitofp_load_4i32_to_4f64:
3072 ; AVX-NEXT: vcvtdq2pd (%rdi), %ymm0
3074 %ld = load <4 x i32>, ptr%a
3075 %cvt = sitofp <4 x i32> %ld to <4 x double>
3076 ret <4 x double> %cvt
3079 define <4 x double> @sitofp_load_4i16_to_4f64(ptr%a) {
3080 ; SSE2-LABEL: sitofp_load_4i16_to_4f64:
3082 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3083 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3084 ; SSE2-NEXT: psrad $16, %xmm1
3085 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
3086 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3087 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
3090 ; SSE41-LABEL: sitofp_load_4i16_to_4f64:
3092 ; SSE41-NEXT: pmovsxwd (%rdi), %xmm1
3093 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
3094 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3095 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
3098 ; AVX-LABEL: sitofp_load_4i16_to_4f64:
3100 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
3101 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3103 %ld = load <4 x i16>, ptr%a
3104 %cvt = sitofp <4 x i16> %ld to <4 x double>
3105 ret <4 x double> %cvt
3108 define <4 x double> @sitofp_load_4i8_to_4f64(ptr%a) {
3109 ; SSE2-LABEL: sitofp_load_4i8_to_4f64:
3111 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3112 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3113 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3114 ; SSE2-NEXT: psrad $24, %xmm1
3115 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
3116 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3117 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
3120 ; SSE41-LABEL: sitofp_load_4i8_to_4f64:
3122 ; SSE41-NEXT: pmovsxbd (%rdi), %xmm1
3123 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
3124 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3125 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
3128 ; AVX-LABEL: sitofp_load_4i8_to_4f64:
3130 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
3131 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3133 %ld = load <4 x i8>, ptr%a
3134 %cvt = sitofp <4 x i8> %ld to <4 x double>
3135 ret <4 x double> %cvt
3139 ; Load Unsigned Integer to Double
3142 define <2 x double> @uitofp_load_2i64_to_2f64(ptr%a) {
3143 ; SSE2-LABEL: uitofp_load_2i64_to_2f64:
3145 ; SSE2-NEXT: movdqa (%rdi), %xmm0
3146 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
3147 ; SSE2-NEXT: pand %xmm0, %xmm1
3148 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3149 ; SSE2-NEXT: psrlq $32, %xmm0
3150 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3151 ; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3152 ; SSE2-NEXT: addpd %xmm1, %xmm0
3155 ; SSE41-LABEL: uitofp_load_2i64_to_2f64:
3157 ; SSE41-NEXT: movdqa (%rdi), %xmm0
3158 ; SSE41-NEXT: pxor %xmm1, %xmm1
3159 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3160 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3161 ; SSE41-NEXT: psrlq $32, %xmm0
3162 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3163 ; SSE41-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3164 ; SSE41-NEXT: addpd %xmm1, %xmm0
3167 ; AVX1-LABEL: uitofp_load_2i64_to_2f64:
3169 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
3170 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3171 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3172 ; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3173 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
3174 ; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3175 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3176 ; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3179 ; AVX2-LABEL: uitofp_load_2i64_to_2f64:
3181 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
3182 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
3183 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3184 ; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3185 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
3186 ; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3187 ; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3188 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3191 ; AVX512F-LABEL: uitofp_load_2i64_to_2f64:
3193 ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3194 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3195 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3196 ; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3197 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
3198 ; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3199 ; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3200 ; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3201 ; AVX512F-NEXT: retq
3203 ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
3204 ; AVX512VL: # %bb.0:
3205 ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
3206 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3207 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3208 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
3209 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
3210 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
3211 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
3212 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3213 ; AVX512VL-NEXT: retq
3215 ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
3216 ; AVX512DQ: # %bb.0:
3217 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3218 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
3219 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3220 ; AVX512DQ-NEXT: vzeroupper
3221 ; AVX512DQ-NEXT: retq
3223 ; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64:
3224 ; AVX512VLDQ: # %bb.0:
3225 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0
3226 ; AVX512VLDQ-NEXT: retq
3227 %ld = load <2 x i64>, ptr%a
3228 %cvt = uitofp <2 x i64> %ld to <2 x double>
3229 ret <2 x double> %cvt
3232 define <2 x double> @uitofp_load_2i32_to_2f64(ptr%a) {
3233 ; SSE2-LABEL: uitofp_load_2i32_to_2f64:
3235 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
3236 ; SSE2-NEXT: xorpd %xmm1, %xmm1
3237 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3238 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3239 ; SSE2-NEXT: orpd %xmm1, %xmm0
3240 ; SSE2-NEXT: subpd %xmm1, %xmm0
3243 ; SSE41-LABEL: uitofp_load_2i32_to_2f64:
3245 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3246 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3247 ; SSE41-NEXT: por %xmm1, %xmm0
3248 ; SSE41-NEXT: subpd %xmm1, %xmm0
3251 ; AVX1-LABEL: uitofp_load_2i32_to_2f64:
3253 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3254 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3255 ; AVX1-NEXT: # xmm1 = mem[0,0]
3256 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
3257 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3260 ; AVX2-LABEL: uitofp_load_2i32_to_2f64:
3262 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3263 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3264 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
3265 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3268 ; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
3270 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3271 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3272 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3273 ; AVX512F-NEXT: vzeroupper
3274 ; AVX512F-NEXT: retq
3276 ; AVX512VL-LABEL: uitofp_load_2i32_to_2f64:
3277 ; AVX512VL: # %bb.0:
3278 ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0
3279 ; AVX512VL-NEXT: retq
3281 ; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64:
3282 ; AVX512DQ: # %bb.0:
3283 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3284 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3285 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3286 ; AVX512DQ-NEXT: vzeroupper
3287 ; AVX512DQ-NEXT: retq
3289 ; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64:
3290 ; AVX512VLDQ: # %bb.0:
3291 ; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0
3292 ; AVX512VLDQ-NEXT: retq
3293 %ld = load <2 x i32>, ptr%a
3294 %cvt = uitofp <2 x i32> %ld to <2 x double>
3295 ret <2 x double> %cvt
3298 define <2 x double> @uitofp_load_4i32_to_2f64_2(ptr %x) {
3299 ; SSE2-LABEL: uitofp_load_4i32_to_2f64_2:
3301 ; SSE2-NEXT: movapd (%rdi), %xmm0
3302 ; SSE2-NEXT: xorpd %xmm1, %xmm1
3303 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3304 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3305 ; SSE2-NEXT: orpd %xmm1, %xmm0
3306 ; SSE2-NEXT: subpd %xmm1, %xmm0
3309 ; SSE41-LABEL: uitofp_load_4i32_to_2f64_2:
3311 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3312 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3313 ; SSE41-NEXT: por %xmm1, %xmm0
3314 ; SSE41-NEXT: subpd %xmm1, %xmm0
3317 ; AVX1-LABEL: uitofp_load_4i32_to_2f64_2:
3319 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3320 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3321 ; AVX1-NEXT: # xmm1 = mem[0,0]
3322 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
3323 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3326 ; AVX2-LABEL: uitofp_load_4i32_to_2f64_2:
3328 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3329 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3330 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
3331 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3332 ; AVX2-NEXT: vzeroupper
3335 ; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2:
3337 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
3338 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3339 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3340 ; AVX512F-NEXT: vzeroupper
3341 ; AVX512F-NEXT: retq
3343 ; AVX512VL-LABEL: uitofp_load_4i32_to_2f64_2:
3344 ; AVX512VL: # %bb.0:
3345 ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0
3346 ; AVX512VL-NEXT: retq
3348 ; AVX512DQ-LABEL: uitofp_load_4i32_to_2f64_2:
3349 ; AVX512DQ: # %bb.0:
3350 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3351 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3352 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3353 ; AVX512DQ-NEXT: vzeroupper
3354 ; AVX512DQ-NEXT: retq
3356 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_2f64_2:
3357 ; AVX512VLDQ: # %bb.0:
3358 ; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0
3359 ; AVX512VLDQ-NEXT: retq
3360 %a = load <4 x i32>, ptr %x
3361 %b = uitofp <4 x i32> %a to <4 x double>
3362 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
3366 define <2 x double> @uitofp_volatile_load_4i32_to_2f64_2(ptr %x) {
3367 ; SSE2-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3369 ; SSE2-NEXT: movapd (%rdi), %xmm0
3370 ; SSE2-NEXT: xorpd %xmm1, %xmm1
3371 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3372 ; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3373 ; SSE2-NEXT: orpd %xmm1, %xmm0
3374 ; SSE2-NEXT: subpd %xmm1, %xmm0
3377 ; SSE41-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3379 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3380 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3381 ; SSE41-NEXT: por %xmm1, %xmm0
3382 ; SSE41-NEXT: subpd %xmm1, %xmm0
3385 ; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3387 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3388 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3389 ; AVX1-NEXT: # xmm1 = mem[0,0]
3390 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
3391 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3394 ; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3396 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3397 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3398 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
3399 ; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
3400 ; AVX2-NEXT: vzeroupper
3403 ; AVX512F-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3405 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
3406 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3407 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3408 ; AVX512F-NEXT: vzeroupper
3409 ; AVX512F-NEXT: retq
3411 ; AVX512VL-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3412 ; AVX512VL: # %bb.0:
3413 ; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
3414 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
3415 ; AVX512VL-NEXT: retq
3417 ; AVX512DQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3418 ; AVX512DQ: # %bb.0:
3419 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3420 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3421 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3422 ; AVX512DQ-NEXT: vzeroupper
3423 ; AVX512DQ-NEXT: retq
3425 ; AVX512VLDQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3426 ; AVX512VLDQ: # %bb.0:
3427 ; AVX512VLDQ-NEXT: vmovaps (%rdi), %xmm0
3428 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
3429 ; AVX512VLDQ-NEXT: retq
3430 %a = load volatile <4 x i32>, ptr %x
3431 %b = uitofp <4 x i32> %a to <4 x double>
3432 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
3436 define <2 x double> @uitofp_load_2i16_to_2f64(ptr%a) {
3437 ; SSE2-LABEL: uitofp_load_2i16_to_2f64:
3439 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3440 ; SSE2-NEXT: pxor %xmm1, %xmm1
3441 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3442 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
3445 ; SSE41-LABEL: uitofp_load_2i16_to_2f64:
3447 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3448 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3449 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
3452 ; AVX-LABEL: uitofp_load_2i16_to_2f64:
3454 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3455 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3456 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
3458 %ld = load <2 x i16>, ptr%a
3459 %cvt = uitofp <2 x i16> %ld to <2 x double>
3460 ret <2 x double> %cvt
3463 define <2 x double> @uitofp_load_2i8_to_2f64(ptr%a) {
3464 ; SSE2-LABEL: uitofp_load_2i8_to_2f64:
3466 ; SSE2-NEXT: movzwl (%rdi), %eax
3467 ; SSE2-NEXT: movd %eax, %xmm0
3468 ; SSE2-NEXT: pxor %xmm1, %xmm1
3469 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3470 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3471 ; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
3474 ; SSE41-LABEL: uitofp_load_2i8_to_2f64:
3476 ; SSE41-NEXT: movzwl (%rdi), %eax
3477 ; SSE41-NEXT: movd %eax, %xmm0
3478 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3479 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
3482 ; AVX-LABEL: uitofp_load_2i8_to_2f64:
3484 ; AVX-NEXT: movzwl (%rdi), %eax
3485 ; AVX-NEXT: vmovd %eax, %xmm0
3486 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3487 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
3489 %ld = load <2 x i8>, ptr%a
3490 %cvt = uitofp <2 x i8> %ld to <2 x double>
3491 ret <2 x double> %cvt
3494 define <4 x double> @uitofp_load_4i64_to_4f64(ptr%a) {
3495 ; SSE2-LABEL: uitofp_load_4i64_to_4f64:
3497 ; SSE2-NEXT: movdqa (%rdi), %xmm0
3498 ; SSE2-NEXT: movdqa 16(%rdi), %xmm1
3499 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
3500 ; SSE2-NEXT: movdqa %xmm0, %xmm3
3501 ; SSE2-NEXT: pand %xmm2, %xmm3
3502 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
3503 ; SSE2-NEXT: por %xmm4, %xmm3
3504 ; SSE2-NEXT: psrlq $32, %xmm0
3505 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
3506 ; SSE2-NEXT: por %xmm5, %xmm0
3507 ; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
3508 ; SSE2-NEXT: subpd %xmm6, %xmm0
3509 ; SSE2-NEXT: addpd %xmm3, %xmm0
3510 ; SSE2-NEXT: pand %xmm1, %xmm2
3511 ; SSE2-NEXT: por %xmm4, %xmm2
3512 ; SSE2-NEXT: psrlq $32, %xmm1
3513 ; SSE2-NEXT: por %xmm5, %xmm1
3514 ; SSE2-NEXT: subpd %xmm6, %xmm1
3515 ; SSE2-NEXT: addpd %xmm2, %xmm1
3518 ; SSE41-LABEL: uitofp_load_4i64_to_4f64:
3520 ; SSE41-NEXT: movdqa (%rdi), %xmm0
3521 ; SSE41-NEXT: movdqa 16(%rdi), %xmm1
3522 ; SSE41-NEXT: pxor %xmm2, %xmm2
3523 ; SSE41-NEXT: movdqa %xmm0, %xmm3
3524 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
3525 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
3526 ; SSE41-NEXT: por %xmm4, %xmm3
3527 ; SSE41-NEXT: psrlq $32, %xmm0
3528 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
3529 ; SSE41-NEXT: por %xmm5, %xmm0
3530 ; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
3531 ; SSE41-NEXT: subpd %xmm6, %xmm0
3532 ; SSE41-NEXT: addpd %xmm3, %xmm0
3533 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3534 ; SSE41-NEXT: por %xmm4, %xmm2
3535 ; SSE41-NEXT: psrlq $32, %xmm1
3536 ; SSE41-NEXT: por %xmm5, %xmm1
3537 ; SSE41-NEXT: subpd %xmm6, %xmm1
3538 ; SSE41-NEXT: addpd %xmm2, %xmm1
3541 ; AVX1-LABEL: uitofp_load_4i64_to_4f64:
3543 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
3544 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
3545 ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3546 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
3547 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
3548 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
3549 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3550 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3551 ; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0
3554 ; AVX2-LABEL: uitofp_load_4i64_to_4f64:
3556 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3557 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
3558 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3559 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
3560 ; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
3561 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
3562 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
3563 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
3564 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
3565 ; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0
3566 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
3569 ; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
3571 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3572 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3573 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3574 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
3575 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
3576 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
3577 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
3578 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
3579 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
3580 ; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0
3581 ; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0
3582 ; AVX512F-NEXT: retq
3584 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
3585 ; AVX512VL: # %bb.0:
3586 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
3587 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3588 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3589 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
3590 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
3591 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
3592 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
3593 ; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0
3594 ; AVX512VL-NEXT: retq
3596 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64:
3597 ; AVX512DQ: # %bb.0:
3598 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3599 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
3600 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3601 ; AVX512DQ-NEXT: retq
3603 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64:
3604 ; AVX512VLDQ: # %bb.0:
3605 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0
3606 ; AVX512VLDQ-NEXT: retq
3607 %ld = load <4 x i64>, ptr%a
3608 %cvt = uitofp <4 x i64> %ld to <4 x double>
3609 ret <4 x double> %cvt
3612 define <4 x double> @uitofp_load_4i32_to_4f64(ptr%a) {
3613 ; SSE2-LABEL: uitofp_load_4i32_to_4f64:
3615 ; SSE2-NEXT: movapd (%rdi), %xmm1
3616 ; SSE2-NEXT: xorpd %xmm2, %xmm2
3617 ; SSE2-NEXT: movapd %xmm1, %xmm0
3618 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3619 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
3620 ; SSE2-NEXT: orpd %xmm3, %xmm0
3621 ; SSE2-NEXT: subpd %xmm3, %xmm0
3622 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3623 ; SSE2-NEXT: orpd %xmm3, %xmm1
3624 ; SSE2-NEXT: subpd %xmm3, %xmm1
3627 ; SSE41-LABEL: uitofp_load_4i32_to_4f64:
3629 ; SSE41-NEXT: movdqa (%rdi), %xmm1
3630 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
3631 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
3632 ; SSE41-NEXT: por %xmm2, %xmm0
3633 ; SSE41-NEXT: subpd %xmm2, %xmm0
3634 ; SSE41-NEXT: pxor %xmm3, %xmm3
3635 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3636 ; SSE41-NEXT: por %xmm2, %xmm1
3637 ; SSE41-NEXT: subpd %xmm2, %xmm1
3640 ; AVX1-LABEL: uitofp_load_4i32_to_4f64:
3642 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
3643 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3644 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3645 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
3646 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3647 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
3648 ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
3649 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
3652 ; AVX2-LABEL: uitofp_load_4i32_to_4f64:
3654 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3655 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
3656 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
3657 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
3660 ; AVX512F-LABEL: uitofp_load_4i32_to_4f64:
3662 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
3663 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3664 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3665 ; AVX512F-NEXT: retq
3667 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f64:
3668 ; AVX512VL: # %bb.0:
3669 ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0
3670 ; AVX512VL-NEXT: retq
3672 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64:
3673 ; AVX512DQ: # %bb.0:
3674 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3675 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3676 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3677 ; AVX512DQ-NEXT: retq
3679 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64:
3680 ; AVX512VLDQ: # %bb.0:
3681 ; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0
3682 ; AVX512VLDQ-NEXT: retq
3683 %ld = load <4 x i32>, ptr%a
3684 %cvt = uitofp <4 x i32> %ld to <4 x double>
3685 ret <4 x double> %cvt
3688 define <4 x double> @uitofp_load_4i16_to_4f64(ptr%a) {
3689 ; SSE2-LABEL: uitofp_load_4i16_to_4f64:
3691 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
3692 ; SSE2-NEXT: pxor %xmm0, %xmm0
3693 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3694 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
3695 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3696 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
3699 ; SSE41-LABEL: uitofp_load_4i16_to_4f64:
3701 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3702 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
3703 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3704 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
3707 ; AVX-LABEL: uitofp_load_4i16_to_4f64:
3709 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3710 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3712 %ld = load <4 x i16>, ptr%a
3713 %cvt = uitofp <4 x i16> %ld to <4 x double>
3714 ret <4 x double> %cvt
3717 define <4 x double> @uitofp_load_4i8_to_4f64(ptr%a) {
3718 ; SSE2-LABEL: uitofp_load_4i8_to_4f64:
3720 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3721 ; SSE2-NEXT: pxor %xmm0, %xmm0
3722 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3723 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3724 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
3725 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3726 ; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
3729 ; SSE41-LABEL: uitofp_load_4i8_to_4f64:
3731 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3732 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
3733 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3734 ; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
3737 ; AVX-LABEL: uitofp_load_4i8_to_4f64:
3739 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3740 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3742 %ld = load <4 x i8>, ptr%a
3743 %cvt = uitofp <4 x i8> %ld to <4 x double>
3744 ret <4 x double> %cvt
3748 ; Load Signed Integer to Float
3751 define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) {
3752 ; SSE2-LABEL: sitofp_load_4i64_to_4f32:
3754 ; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0
3755 ; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1
3756 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3757 ; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2
3758 ; SSE2-NEXT: xorps %xmm0, %xmm0
3759 ; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
3760 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3761 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3764 ; SSE41-LABEL: sitofp_load_4i64_to_4f32:
3766 ; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1
3767 ; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0
3768 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
3769 ; SSE41-NEXT: xorps %xmm1, %xmm1
3770 ; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1
3771 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3772 ; SSE41-NEXT: xorps %xmm1, %xmm1
3773 ; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1
3774 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3777 ; VEX-LABEL: sitofp_load_4i64_to_4f32:
3779 ; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3780 ; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
3781 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3782 ; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3783 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3784 ; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3785 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3788 ; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
3790 ; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3791 ; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
3792 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3793 ; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3794 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3795 ; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3796 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3797 ; AVX512F-NEXT: retq
3799 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
3800 ; AVX512VL: # %bb.0:
3801 ; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3802 ; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
3803 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3804 ; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3805 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3806 ; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3807 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3808 ; AVX512VL-NEXT: retq
3810 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32:
3811 ; AVX512DQ: # %bb.0:
3812 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3813 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
3814 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3815 ; AVX512DQ-NEXT: vzeroupper
3816 ; AVX512DQ-NEXT: retq
3818 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32:
3819 ; AVX512VLDQ: # %bb.0:
3820 ; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
3821 ; AVX512VLDQ-NEXT: retq
3822 %ld = load <4 x i64>, ptr%a
3823 %cvt = sitofp <4 x i64> %ld to <4 x float>
3824 ret <4 x float> %cvt
3827 define <4 x float> @sitofp_load_4i32_to_4f32(ptr%a) {
3828 ; SSE-LABEL: sitofp_load_4i32_to_4f32:
3830 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
3833 ; AVX-LABEL: sitofp_load_4i32_to_4f32:
3835 ; AVX-NEXT: vcvtdq2ps (%rdi), %xmm0
3837 %ld = load <4 x i32>, ptr%a
3838 %cvt = sitofp <4 x i32> %ld to <4 x float>
3839 ret <4 x float> %cvt
3842 define <4 x float> @sitofp_load_4i16_to_4f32(ptr%a) {
3843 ; SSE2-LABEL: sitofp_load_4i16_to_4f32:
3845 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3846 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3847 ; SSE2-NEXT: psrad $16, %xmm0
3848 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
3851 ; SSE41-LABEL: sitofp_load_4i16_to_4f32:
3853 ; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
3854 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
3857 ; AVX-LABEL: sitofp_load_4i16_to_4f32:
3859 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
3860 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3862 %ld = load <4 x i16>, ptr%a
3863 %cvt = sitofp <4 x i16> %ld to <4 x float>
3864 ret <4 x float> %cvt
3867 define <4 x float> @sitofp_load_4i8_to_4f32(ptr%a) {
3868 ; SSE2-LABEL: sitofp_load_4i8_to_4f32:
3870 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3871 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3872 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3873 ; SSE2-NEXT: psrad $24, %xmm0
3874 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
3877 ; SSE41-LABEL: sitofp_load_4i8_to_4f32:
3879 ; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
3880 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
3883 ; AVX-LABEL: sitofp_load_4i8_to_4f32:
3885 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
3886 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3888 %ld = load <4 x i8>, ptr%a
3889 %cvt = sitofp <4 x i8> %ld to <4 x float>
3890 ret <4 x float> %cvt
3893 define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) {
3894 ; SSE2-LABEL: sitofp_load_8i64_to_8f32:
3896 ; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0
3897 ; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1
3898 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3899 ; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2
3900 ; SSE2-NEXT: xorps %xmm0, %xmm0
3901 ; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
3902 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3903 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3904 ; SSE2-NEXT: xorps %xmm1, %xmm1
3905 ; SSE2-NEXT: cvtsi2ssq 56(%rdi), %xmm1
3906 ; SSE2-NEXT: xorps %xmm2, %xmm2
3907 ; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm2
3908 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3909 ; SSE2-NEXT: cvtsi2ssq 40(%rdi), %xmm3
3910 ; SSE2-NEXT: xorps %xmm1, %xmm1
3911 ; SSE2-NEXT: cvtsi2ssq 32(%rdi), %xmm1
3912 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3913 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3916 ; SSE41-LABEL: sitofp_load_8i64_to_8f32:
3918 ; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1
3919 ; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0
3920 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
3921 ; SSE41-NEXT: xorps %xmm1, %xmm1
3922 ; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1
3923 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3924 ; SSE41-NEXT: xorps %xmm1, %xmm1
3925 ; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1
3926 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3927 ; SSE41-NEXT: cvtsi2ssq 40(%rdi), %xmm2
3928 ; SSE41-NEXT: xorps %xmm1, %xmm1
3929 ; SSE41-NEXT: cvtsi2ssq 32(%rdi), %xmm1
3930 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3931 ; SSE41-NEXT: xorps %xmm2, %xmm2
3932 ; SSE41-NEXT: cvtsi2ssq 48(%rdi), %xmm2
3933 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3934 ; SSE41-NEXT: xorps %xmm2, %xmm2
3935 ; SSE41-NEXT: cvtsi2ssq 56(%rdi), %xmm2
3936 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3939 ; VEX-LABEL: sitofp_load_8i64_to_8f32:
3941 ; VEX-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3942 ; VEX-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3943 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3944 ; VEX-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3945 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3946 ; VEX-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3947 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3948 ; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3949 ; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
3950 ; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3951 ; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3952 ; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3953 ; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3954 ; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3955 ; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3958 ; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
3960 ; AVX512F-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3961 ; AVX512F-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3962 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3963 ; AVX512F-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3964 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3965 ; AVX512F-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3966 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3967 ; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3968 ; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
3969 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3970 ; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3971 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3972 ; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3973 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3974 ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3975 ; AVX512F-NEXT: retq
3977 ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
3978 ; AVX512VL: # %bb.0:
3979 ; AVX512VL-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3980 ; AVX512VL-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3981 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3982 ; AVX512VL-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3983 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3984 ; AVX512VL-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3985 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3986 ; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3987 ; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
3988 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3989 ; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3990 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3991 ; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3992 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3993 ; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3994 ; AVX512VL-NEXT: retq
3996 ; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
3997 ; AVX512DQ: # %bb.0:
3998 ; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0
3999 ; AVX512DQ-NEXT: retq
4001 ; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32:
4002 ; AVX512VLDQ: # %bb.0:
4003 ; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0
4004 ; AVX512VLDQ-NEXT: retq
4005 %ld = load <8 x i64>, ptr%a
4006 %cvt = sitofp <8 x i64> %ld to <8 x float>
4007 ret <8 x float> %cvt
4010 define <8 x float> @sitofp_load_8i32_to_8f32(ptr%a) {
4011 ; SSE-LABEL: sitofp_load_8i32_to_8f32:
4013 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
4014 ; SSE-NEXT: cvtdq2ps 16(%rdi), %xmm1
4017 ; AVX-LABEL: sitofp_load_8i32_to_8f32:
4019 ; AVX-NEXT: vcvtdq2ps (%rdi), %ymm0
4021 %ld = load <8 x i32>, ptr%a
4022 %cvt = sitofp <8 x i32> %ld to <8 x float>
4023 ret <8 x float> %cvt
4026 define <8 x float> @sitofp_load_8i16_to_8f32(ptr%a) {
4027 ; SSE2-LABEL: sitofp_load_8i16_to_8f32:
4029 ; SSE2-NEXT: movdqa (%rdi), %xmm1
4030 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4031 ; SSE2-NEXT: psrad $16, %xmm0
4032 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4033 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4034 ; SSE2-NEXT: psrad $16, %xmm1
4035 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
4038 ; SSE41-LABEL: sitofp_load_8i16_to_8f32:
4040 ; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1
4041 ; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
4042 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4043 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1
4046 ; AVX1-LABEL: sitofp_load_8i16_to_8f32:
4048 ; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0
4049 ; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1
4050 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4051 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4054 ; AVX2-LABEL: sitofp_load_8i16_to_8f32:
4056 ; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
4057 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4060 ; AVX512-LABEL: sitofp_load_8i16_to_8f32:
4062 ; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
4063 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4065 %ld = load <8 x i16>, ptr%a
4066 %cvt = sitofp <8 x i16> %ld to <8 x float>
4067 ret <8 x float> %cvt
4070 define <8 x float> @sitofp_load_8i8_to_8f32(ptr%a) {
4071 ; SSE2-LABEL: sitofp_load_8i8_to_8f32:
4073 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
4074 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4075 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4076 ; SSE2-NEXT: psrad $24, %xmm0
4077 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4078 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4079 ; SSE2-NEXT: psrad $24, %xmm1
4080 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
4083 ; SSE41-LABEL: sitofp_load_8i8_to_8f32:
4085 ; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1
4086 ; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
4087 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4088 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1
4091 ; AVX1-LABEL: sitofp_load_8i8_to_8f32:
4093 ; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0
4094 ; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1
4095 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4096 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4099 ; AVX2-LABEL: sitofp_load_8i8_to_8f32:
4101 ; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
4102 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4105 ; AVX512-LABEL: sitofp_load_8i8_to_8f32:
4107 ; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
4108 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4110 %ld = load <8 x i8>, ptr%a
4111 %cvt = sitofp <8 x i8> %ld to <8 x float>
4112 ret <8 x float> %cvt
4116 ; Load Unsigned Integer to Float
4119 define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
4120 ; SSE2-LABEL: uitofp_load_4i64_to_4f32:
4122 ; SSE2-NEXT: movq 24(%rdi), %rax
4123 ; SSE2-NEXT: testq %rax, %rax
4124 ; SSE2-NEXT: js .LBB83_1
4125 ; SSE2-NEXT: # %bb.2:
4126 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4127 ; SSE2-NEXT: jmp .LBB83_3
4128 ; SSE2-NEXT: .LBB83_1:
4129 ; SSE2-NEXT: movq %rax, %rcx
4130 ; SSE2-NEXT: shrq %rcx
4131 ; SSE2-NEXT: andl $1, %eax
4132 ; SSE2-NEXT: orq %rcx, %rax
4133 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4134 ; SSE2-NEXT: addss %xmm0, %xmm0
4135 ; SSE2-NEXT: .LBB83_3:
4136 ; SSE2-NEXT: movq 16(%rdi), %rax
4137 ; SSE2-NEXT: testq %rax, %rax
4138 ; SSE2-NEXT: js .LBB83_4
4139 ; SSE2-NEXT: # %bb.5:
4140 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4141 ; SSE2-NEXT: jmp .LBB83_6
4142 ; SSE2-NEXT: .LBB83_4:
4143 ; SSE2-NEXT: movq %rax, %rcx
4144 ; SSE2-NEXT: shrq %rcx
4145 ; SSE2-NEXT: andl $1, %eax
4146 ; SSE2-NEXT: orq %rcx, %rax
4147 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4148 ; SSE2-NEXT: addss %xmm1, %xmm1
4149 ; SSE2-NEXT: .LBB83_6:
4150 ; SSE2-NEXT: movq (%rdi), %rax
4151 ; SSE2-NEXT: movq 8(%rdi), %rcx
4152 ; SSE2-NEXT: testq %rcx, %rcx
4153 ; SSE2-NEXT: js .LBB83_7
4154 ; SSE2-NEXT: # %bb.8:
4155 ; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
4156 ; SSE2-NEXT: jmp .LBB83_9
4157 ; SSE2-NEXT: .LBB83_7:
4158 ; SSE2-NEXT: movq %rcx, %rdx
4159 ; SSE2-NEXT: shrq %rdx
4160 ; SSE2-NEXT: andl $1, %ecx
4161 ; SSE2-NEXT: orq %rdx, %rcx
4162 ; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
4163 ; SSE2-NEXT: addss %xmm2, %xmm2
4164 ; SSE2-NEXT: .LBB83_9:
4165 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4166 ; SSE2-NEXT: testq %rax, %rax
4167 ; SSE2-NEXT: js .LBB83_10
4168 ; SSE2-NEXT: # %bb.11:
4169 ; SSE2-NEXT: xorps %xmm0, %xmm0
4170 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4171 ; SSE2-NEXT: jmp .LBB83_12
4172 ; SSE2-NEXT: .LBB83_10:
4173 ; SSE2-NEXT: movq %rax, %rcx
4174 ; SSE2-NEXT: shrq %rcx
4175 ; SSE2-NEXT: andl $1, %eax
4176 ; SSE2-NEXT: orq %rcx, %rax
4177 ; SSE2-NEXT: xorps %xmm0, %xmm0
4178 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4179 ; SSE2-NEXT: addss %xmm0, %xmm0
4180 ; SSE2-NEXT: .LBB83_12:
4181 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4182 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4185 ; SSE41-LABEL: uitofp_load_4i64_to_4f32:
4187 ; SSE41-NEXT: movdqa (%rdi), %xmm1
4188 ; SSE41-NEXT: movdqa 16(%rdi), %xmm2
4189 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm4 = [1,1]
4190 ; SSE41-NEXT: movdqa %xmm1, %xmm0
4191 ; SSE41-NEXT: pand %xmm4, %xmm0
4192 ; SSE41-NEXT: movdqa %xmm1, %xmm3
4193 ; SSE41-NEXT: psrlq $1, %xmm3
4194 ; SSE41-NEXT: por %xmm0, %xmm3
4195 ; SSE41-NEXT: movdqa %xmm1, %xmm5
4196 ; SSE41-NEXT: movdqa %xmm1, %xmm0
4197 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5
4198 ; SSE41-NEXT: pextrq $1, %xmm5, %rax
4199 ; SSE41-NEXT: xorps %xmm0, %xmm0
4200 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4201 ; SSE41-NEXT: movq %xmm5, %rax
4202 ; SSE41-NEXT: xorps %xmm3, %xmm3
4203 ; SSE41-NEXT: cvtsi2ss %rax, %xmm3
4204 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
4205 ; SSE41-NEXT: pand %xmm2, %xmm4
4206 ; SSE41-NEXT: movdqa %xmm2, %xmm5
4207 ; SSE41-NEXT: psrlq $1, %xmm5
4208 ; SSE41-NEXT: por %xmm4, %xmm5
4209 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
4210 ; SSE41-NEXT: movaps %xmm2, %xmm0
4211 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2
4212 ; SSE41-NEXT: movq %xmm2, %rax
4213 ; SSE41-NEXT: xorps %xmm0, %xmm0
4214 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4215 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
4216 ; SSE41-NEXT: pextrq $1, %xmm2, %rax
4217 ; SSE41-NEXT: xorps %xmm0, %xmm0
4218 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4219 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
4220 ; SSE41-NEXT: movaps %xmm3, %xmm2
4221 ; SSE41-NEXT: addps %xmm3, %xmm2
4222 ; SSE41-NEXT: movaps %xmm1, %xmm0
4223 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
4224 ; SSE41-NEXT: movaps %xmm3, %xmm0
4227 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
4229 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4230 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1
4231 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
4232 ; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3
4233 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
4234 ; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
4235 ; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
4236 ; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
4237 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
4238 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
4239 ; AVX1-NEXT: vmovq %xmm1, %rax
4240 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
4241 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
4242 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4243 ; AVX1-NEXT: vmovq %xmm1, %rax
4244 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
4245 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
4246 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
4247 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
4248 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
4249 ; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
4250 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
4251 ; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
4252 ; AVX1-NEXT: vzeroupper
4255 ; AVX2-LABEL: uitofp_load_4i64_to_4f32:
4257 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4258 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
4259 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
4260 ; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
4261 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
4262 ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
4263 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
4264 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
4265 ; AVX2-NEXT: vmovq %xmm1, %rax
4266 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
4267 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4268 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
4269 ; AVX2-NEXT: vmovq %xmm1, %rax
4270 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
4271 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4272 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
4273 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
4274 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
4275 ; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
4276 ; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0
4277 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
4278 ; AVX2-NEXT: vzeroupper
4281 ; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
4283 ; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
4284 ; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
4285 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4286 ; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
4287 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4288 ; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
4289 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4290 ; AVX512F-NEXT: retq
4292 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
4293 ; AVX512VL: # %bb.0:
4294 ; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
4295 ; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
4296 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4297 ; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
4298 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4299 ; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
4300 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4301 ; AVX512VL-NEXT: retq
4303 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32:
4304 ; AVX512DQ: # %bb.0:
4305 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4306 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
4307 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
4308 ; AVX512DQ-NEXT: vzeroupper
4309 ; AVX512DQ-NEXT: retq
4311 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32:
4312 ; AVX512VLDQ: # %bb.0:
4313 ; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0
4314 ; AVX512VLDQ-NEXT: retq
4315 %ld = load <4 x i64>, ptr%a
4316 %cvt = uitofp <4 x i64> %ld to <4 x float>
4317 ret <4 x float> %cvt
4320 define <4 x float> @uitofp_load_4i32_to_4f32(ptr%a) {
4321 ; SSE2-LABEL: uitofp_load_4i32_to_4f32:
4323 ; SSE2-NEXT: movdqa (%rdi), %xmm0
4324 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
4325 ; SSE2-NEXT: pand %xmm0, %xmm1
4326 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4327 ; SSE2-NEXT: psrld $16, %xmm0
4328 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4329 ; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4330 ; SSE2-NEXT: addps %xmm1, %xmm0
4333 ; SSE41-LABEL: uitofp_load_4i32_to_4f32:
4335 ; SSE41-NEXT: movdqa (%rdi), %xmm0
4336 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
4337 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4338 ; SSE41-NEXT: psrld $16, %xmm0
4339 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4340 ; SSE41-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4341 ; SSE41-NEXT: addps %xmm1, %xmm0
4344 ; AVX1-LABEL: uitofp_load_4i32_to_4f32:
4346 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
4347 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4348 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
4349 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4350 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4351 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
4354 ; AVX2-LABEL: uitofp_load_4i32_to_4f32:
4356 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
4357 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
4358 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4359 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
4360 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
4361 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
4362 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4363 ; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0
4364 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
4367 ; AVX512F-LABEL: uitofp_load_4i32_to_4f32:
4369 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
4370 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4371 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
4372 ; AVX512F-NEXT: vzeroupper
4373 ; AVX512F-NEXT: retq
4375 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f32:
4376 ; AVX512VL: # %bb.0:
4377 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0
4378 ; AVX512VL-NEXT: retq
4380 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32:
4381 ; AVX512DQ: # %bb.0:
4382 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
4383 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4384 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
4385 ; AVX512DQ-NEXT: vzeroupper
4386 ; AVX512DQ-NEXT: retq
4388 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32:
4389 ; AVX512VLDQ: # %bb.0:
4390 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0
4391 ; AVX512VLDQ-NEXT: retq
4392 %ld = load <4 x i32>, ptr%a
4393 %cvt = uitofp <4 x i32> %ld to <4 x float>
4394 ret <4 x float> %cvt
4397 define <4 x float> @uitofp_load_4i16_to_4f32(ptr%a) {
4398 ; SSE2-LABEL: uitofp_load_4i16_to_4f32:
4400 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
4401 ; SSE2-NEXT: pxor %xmm1, %xmm1
4402 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4403 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4406 ; SSE41-LABEL: uitofp_load_4i16_to_4f32:
4408 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4409 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4412 ; AVX-LABEL: uitofp_load_4i16_to_4f32:
4414 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4415 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4417 %ld = load <4 x i16>, ptr%a
4418 %cvt = uitofp <4 x i16> %ld to <4 x float>
4419 ret <4 x float> %cvt
4422 define <4 x float> @uitofp_load_4i8_to_4f32(ptr%a) {
4423 ; SSE2-LABEL: uitofp_load_4i8_to_4f32:
4425 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4426 ; SSE2-NEXT: pxor %xmm1, %xmm1
4427 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4428 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4429 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4432 ; SSE41-LABEL: uitofp_load_4i8_to_4f32:
4434 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4435 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4438 ; AVX-LABEL: uitofp_load_4i8_to_4f32:
4440 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4441 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4443 %ld = load <4 x i8>, ptr%a
4444 %cvt = uitofp <4 x i8> %ld to <4 x float>
4445 ret <4 x float> %cvt
4448 define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
4449 ; SSE2-LABEL: uitofp_load_8i64_to_8f32:
4451 ; SSE2-NEXT: movq 24(%rdi), %rax
4452 ; SSE2-NEXT: testq %rax, %rax
4453 ; SSE2-NEXT: js .LBB87_1
4454 ; SSE2-NEXT: # %bb.2:
4455 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
4456 ; SSE2-NEXT: jmp .LBB87_3
4457 ; SSE2-NEXT: .LBB87_1:
4458 ; SSE2-NEXT: movq %rax, %rcx
4459 ; SSE2-NEXT: shrq %rcx
4460 ; SSE2-NEXT: andl $1, %eax
4461 ; SSE2-NEXT: orq %rcx, %rax
4462 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
4463 ; SSE2-NEXT: addss %xmm2, %xmm2
4464 ; SSE2-NEXT: .LBB87_3:
4465 ; SSE2-NEXT: movq 16(%rdi), %rax
4466 ; SSE2-NEXT: testq %rax, %rax
4467 ; SSE2-NEXT: js .LBB87_4
4468 ; SSE2-NEXT: # %bb.5:
4469 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4470 ; SSE2-NEXT: jmp .LBB87_6
4471 ; SSE2-NEXT: .LBB87_4:
4472 ; SSE2-NEXT: movq %rax, %rcx
4473 ; SSE2-NEXT: shrq %rcx
4474 ; SSE2-NEXT: andl $1, %eax
4475 ; SSE2-NEXT: orq %rcx, %rax
4476 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4477 ; SSE2-NEXT: addss %xmm1, %xmm1
4478 ; SSE2-NEXT: .LBB87_6:
4479 ; SSE2-NEXT: movq (%rdi), %rax
4480 ; SSE2-NEXT: movq 8(%rdi), %rcx
4481 ; SSE2-NEXT: testq %rcx, %rcx
4482 ; SSE2-NEXT: js .LBB87_7
4483 ; SSE2-NEXT: # %bb.8:
4484 ; SSE2-NEXT: cvtsi2ss %rcx, %xmm3
4485 ; SSE2-NEXT: testq %rax, %rax
4486 ; SSE2-NEXT: jns .LBB87_11
4487 ; SSE2-NEXT: .LBB87_10:
4488 ; SSE2-NEXT: movq %rax, %rcx
4489 ; SSE2-NEXT: shrq %rcx
4490 ; SSE2-NEXT: andl $1, %eax
4491 ; SSE2-NEXT: orq %rcx, %rax
4492 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4493 ; SSE2-NEXT: addss %xmm0, %xmm0
4494 ; SSE2-NEXT: jmp .LBB87_12
4495 ; SSE2-NEXT: .LBB87_7:
4496 ; SSE2-NEXT: movq %rcx, %rdx
4497 ; SSE2-NEXT: shrq %rdx
4498 ; SSE2-NEXT: andl $1, %ecx
4499 ; SSE2-NEXT: orq %rdx, %rcx
4500 ; SSE2-NEXT: cvtsi2ss %rcx, %xmm3
4501 ; SSE2-NEXT: addss %xmm3, %xmm3
4502 ; SSE2-NEXT: testq %rax, %rax
4503 ; SSE2-NEXT: js .LBB87_10
4504 ; SSE2-NEXT: .LBB87_11:
4505 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
4506 ; SSE2-NEXT: .LBB87_12:
4507 ; SSE2-NEXT: movq 56(%rdi), %rax
4508 ; SSE2-NEXT: testq %rax, %rax
4509 ; SSE2-NEXT: js .LBB87_13
4510 ; SSE2-NEXT: # %bb.14:
4511 ; SSE2-NEXT: cvtsi2ss %rax, %xmm5
4512 ; SSE2-NEXT: jmp .LBB87_15
4513 ; SSE2-NEXT: .LBB87_13:
4514 ; SSE2-NEXT: movq %rax, %rcx
4515 ; SSE2-NEXT: shrq %rcx
4516 ; SSE2-NEXT: andl $1, %eax
4517 ; SSE2-NEXT: orq %rcx, %rax
4518 ; SSE2-NEXT: cvtsi2ss %rax, %xmm5
4519 ; SSE2-NEXT: addss %xmm5, %xmm5
4520 ; SSE2-NEXT: .LBB87_15:
4521 ; SSE2-NEXT: movq 48(%rdi), %rax
4522 ; SSE2-NEXT: testq %rax, %rax
4523 ; SSE2-NEXT: js .LBB87_16
4524 ; SSE2-NEXT: # %bb.17:
4525 ; SSE2-NEXT: cvtsi2ss %rax, %xmm4
4526 ; SSE2-NEXT: jmp .LBB87_18
4527 ; SSE2-NEXT: .LBB87_16:
4528 ; SSE2-NEXT: movq %rax, %rcx
4529 ; SSE2-NEXT: shrq %rcx
4530 ; SSE2-NEXT: andl $1, %eax
4531 ; SSE2-NEXT: orq %rcx, %rax
4532 ; SSE2-NEXT: cvtsi2ss %rax, %xmm4
4533 ; SSE2-NEXT: addss %xmm4, %xmm4
4534 ; SSE2-NEXT: .LBB87_18:
4535 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4536 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4537 ; SSE2-NEXT: movq 40(%rdi), %rax
4538 ; SSE2-NEXT: testq %rax, %rax
4539 ; SSE2-NEXT: js .LBB87_19
4540 ; SSE2-NEXT: # %bb.20:
4541 ; SSE2-NEXT: xorps %xmm2, %xmm2
4542 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
4543 ; SSE2-NEXT: jmp .LBB87_21
4544 ; SSE2-NEXT: .LBB87_19:
4545 ; SSE2-NEXT: movq %rax, %rcx
4546 ; SSE2-NEXT: shrq %rcx
4547 ; SSE2-NEXT: andl $1, %eax
4548 ; SSE2-NEXT: orq %rcx, %rax
4549 ; SSE2-NEXT: xorps %xmm2, %xmm2
4550 ; SSE2-NEXT: cvtsi2ss %rax, %xmm2
4551 ; SSE2-NEXT: addss %xmm2, %xmm2
4552 ; SSE2-NEXT: .LBB87_21:
4553 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4554 ; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
4555 ; SSE2-NEXT: movq 32(%rdi), %rax
4556 ; SSE2-NEXT: testq %rax, %rax
4557 ; SSE2-NEXT: js .LBB87_22
4558 ; SSE2-NEXT: # %bb.23:
4559 ; SSE2-NEXT: xorps %xmm1, %xmm1
4560 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4561 ; SSE2-NEXT: jmp .LBB87_24
4562 ; SSE2-NEXT: .LBB87_22:
4563 ; SSE2-NEXT: movq %rax, %rcx
4564 ; SSE2-NEXT: shrq %rcx
4565 ; SSE2-NEXT: andl $1, %eax
4566 ; SSE2-NEXT: orq %rcx, %rax
4567 ; SSE2-NEXT: xorps %xmm1, %xmm1
4568 ; SSE2-NEXT: cvtsi2ss %rax, %xmm1
4569 ; SSE2-NEXT: addss %xmm1, %xmm1
4570 ; SSE2-NEXT: .LBB87_24:
4571 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4572 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
4575 ; SSE41-LABEL: uitofp_load_8i64_to_8f32:
4577 ; SSE41-NEXT: movdqa (%rdi), %xmm4
4578 ; SSE41-NEXT: movdqa 16(%rdi), %xmm5
4579 ; SSE41-NEXT: movdqa 32(%rdi), %xmm6
4580 ; SSE41-NEXT: movdqa 48(%rdi), %xmm2
4581 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm7 = [1,1]
4582 ; SSE41-NEXT: movdqa %xmm4, %xmm0
4583 ; SSE41-NEXT: pand %xmm7, %xmm0
4584 ; SSE41-NEXT: movdqa %xmm4, %xmm1
4585 ; SSE41-NEXT: psrlq $1, %xmm1
4586 ; SSE41-NEXT: por %xmm0, %xmm1
4587 ; SSE41-NEXT: movdqa %xmm4, %xmm3
4588 ; SSE41-NEXT: movdqa %xmm4, %xmm0
4589 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
4590 ; SSE41-NEXT: pextrq $1, %xmm3, %rax
4591 ; SSE41-NEXT: xorps %xmm0, %xmm0
4592 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4593 ; SSE41-NEXT: movq %xmm3, %rax
4594 ; SSE41-NEXT: xorps %xmm3, %xmm3
4595 ; SSE41-NEXT: cvtsi2ss %rax, %xmm3
4596 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
4597 ; SSE41-NEXT: movdqa %xmm5, %xmm0
4598 ; SSE41-NEXT: pand %xmm7, %xmm0
4599 ; SSE41-NEXT: movdqa %xmm5, %xmm1
4600 ; SSE41-NEXT: psrlq $1, %xmm1
4601 ; SSE41-NEXT: por %xmm0, %xmm1
4602 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
4603 ; SSE41-NEXT: movaps %xmm5, %xmm0
4604 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5
4605 ; SSE41-NEXT: movq %xmm5, %rax
4606 ; SSE41-NEXT: xorps %xmm0, %xmm0
4607 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4608 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
4609 ; SSE41-NEXT: pextrq $1, %xmm5, %rax
4610 ; SSE41-NEXT: xorps %xmm0, %xmm0
4611 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4612 ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
4613 ; SSE41-NEXT: movaps %xmm3, %xmm1
4614 ; SSE41-NEXT: addps %xmm3, %xmm1
4615 ; SSE41-NEXT: movaps %xmm4, %xmm0
4616 ; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
4617 ; SSE41-NEXT: movdqa %xmm6, %xmm0
4618 ; SSE41-NEXT: pand %xmm7, %xmm0
4619 ; SSE41-NEXT: movdqa %xmm6, %xmm1
4620 ; SSE41-NEXT: psrlq $1, %xmm1
4621 ; SSE41-NEXT: por %xmm0, %xmm1
4622 ; SSE41-NEXT: movdqa %xmm6, %xmm4
4623 ; SSE41-NEXT: movdqa %xmm6, %xmm0
4624 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
4625 ; SSE41-NEXT: pextrq $1, %xmm4, %rax
4626 ; SSE41-NEXT: xorps %xmm0, %xmm0
4627 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4628 ; SSE41-NEXT: movq %xmm4, %rax
4629 ; SSE41-NEXT: xorps %xmm1, %xmm1
4630 ; SSE41-NEXT: cvtsi2ss %rax, %xmm1
4631 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
4632 ; SSE41-NEXT: pand %xmm2, %xmm7
4633 ; SSE41-NEXT: movdqa %xmm2, %xmm4
4634 ; SSE41-NEXT: psrlq $1, %xmm4
4635 ; SSE41-NEXT: por %xmm7, %xmm4
4636 ; SSE41-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,3],xmm2[1,3]
4637 ; SSE41-NEXT: movaps %xmm2, %xmm0
4638 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
4639 ; SSE41-NEXT: movq %xmm2, %rax
4640 ; SSE41-NEXT: xorps %xmm0, %xmm0
4641 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4642 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
4643 ; SSE41-NEXT: pextrq $1, %xmm2, %rax
4644 ; SSE41-NEXT: xorps %xmm0, %xmm0
4645 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
4646 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
4647 ; SSE41-NEXT: movaps %xmm1, %xmm2
4648 ; SSE41-NEXT: addps %xmm1, %xmm2
4649 ; SSE41-NEXT: movaps %xmm6, %xmm0
4650 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
4651 ; SSE41-NEXT: movaps %xmm3, %xmm0
4654 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
4656 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
4657 ; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
4658 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1]
4659 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm3
4660 ; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm4
4661 ; AVX1-NEXT: vmovdqa 48(%rdi), %xmm5
4662 ; AVX1-NEXT: vpsrlq $1, %xmm5, %xmm6
4663 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
4664 ; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3
4665 ; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
4666 ; AVX1-NEXT: vpextrq $1, %xmm3, %rax
4667 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
4668 ; AVX1-NEXT: vmovq %xmm3, %rax
4669 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
4670 ; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[2,3]
4671 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
4672 ; AVX1-NEXT: vmovq %xmm3, %rax
4673 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
4674 ; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
4675 ; AVX1-NEXT: vpextrq $1, %xmm3, %rax
4676 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
4677 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
4678 ; AVX1-NEXT: vaddps %xmm3, %xmm3, %xmm4
4679 ; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
4680 ; AVX1-NEXT: vblendvps %xmm1, %xmm4, %xmm3, %xmm1
4681 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm2
4682 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm3
4683 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm4
4684 ; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm5
4685 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
4686 ; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm2
4687 ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
4688 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4689 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
4690 ; AVX1-NEXT: vmovq %xmm2, %rax
4691 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
4692 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
4693 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
4694 ; AVX1-NEXT: vmovq %xmm2, %rax
4695 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
4696 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
4697 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4698 ; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm2
4699 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
4700 ; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm3
4701 ; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
4702 ; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
4703 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4706 ; AVX2-LABEL: uitofp_load_8i64_to_8f32:
4708 ; AVX2-NEXT: vmovaps (%rdi), %ymm0
4709 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
4710 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
4711 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
4712 ; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
4713 ; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
4714 ; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
4715 ; AVX2-NEXT: vpextrq $1, %xmm3, %rax
4716 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
4717 ; AVX2-NEXT: vmovq %xmm3, %rax
4718 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm5
4719 ; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
4720 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3
4721 ; AVX2-NEXT: vmovq %xmm3, %rax
4722 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm5
4723 ; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
4724 ; AVX2-NEXT: vpextrq $1, %xmm3, %rax
4725 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
4726 ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
4727 ; AVX2-NEXT: vaddps %xmm3, %xmm3, %xmm4
4728 ; AVX2-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1
4729 ; AVX2-NEXT: vblendvps %xmm1, %xmm4, %xmm3, %xmm1
4730 ; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm2
4731 ; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
4732 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
4733 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
4734 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4735 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
4736 ; AVX2-NEXT: vmovq %xmm2, %rax
4737 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
4738 ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
4739 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
4740 ; AVX2-NEXT: vmovq %xmm2, %rax
4741 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
4742 ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
4743 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4744 ; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm2
4745 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
4746 ; AVX2-NEXT: vaddps %xmm2, %xmm2, %xmm3
4747 ; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0
4748 ; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
4749 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4752 ; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
4754 ; AVX512F-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
4755 ; AVX512F-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
4756 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4757 ; AVX512F-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
4758 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4759 ; AVX512F-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
4760 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4761 ; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
4762 ; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
4763 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4764 ; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
4765 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4766 ; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
4767 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4768 ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4769 ; AVX512F-NEXT: retq
4771 ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
4772 ; AVX512VL: # %bb.0:
4773 ; AVX512VL-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
4774 ; AVX512VL-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
4775 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4776 ; AVX512VL-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
4777 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4778 ; AVX512VL-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
4779 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4780 ; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
4781 ; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
4782 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4783 ; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
4784 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4785 ; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
4786 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4787 ; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4788 ; AVX512VL-NEXT: retq
4790 ; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
4791 ; AVX512DQ: # %bb.0:
4792 ; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4793 ; AVX512DQ-NEXT: retq
4795 ; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32:
4796 ; AVX512VLDQ: # %bb.0:
4797 ; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4798 ; AVX512VLDQ-NEXT: retq
4799 %ld = load <8 x i64>, ptr%a
4800 %cvt = uitofp <8 x i64> %ld to <8 x float>
4801 ret <8 x float> %cvt
4804 define <8 x float> @uitofp_load_8i32_to_8f32(ptr%a) {
4805 ; SSE2-LABEL: uitofp_load_8i32_to_8f32:
4807 ; SSE2-NEXT: movdqa (%rdi), %xmm0
4808 ; SSE2-NEXT: movdqa 16(%rdi), %xmm1
4809 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
4810 ; SSE2-NEXT: movdqa %xmm0, %xmm3
4811 ; SSE2-NEXT: pand %xmm2, %xmm3
4812 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
4813 ; SSE2-NEXT: por %xmm4, %xmm3
4814 ; SSE2-NEXT: psrld $16, %xmm0
4815 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
4816 ; SSE2-NEXT: por %xmm5, %xmm0
4817 ; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4818 ; SSE2-NEXT: subps %xmm6, %xmm0
4819 ; SSE2-NEXT: addps %xmm3, %xmm0
4820 ; SSE2-NEXT: pand %xmm1, %xmm2
4821 ; SSE2-NEXT: por %xmm4, %xmm2
4822 ; SSE2-NEXT: psrld $16, %xmm1
4823 ; SSE2-NEXT: por %xmm5, %xmm1
4824 ; SSE2-NEXT: subps %xmm6, %xmm1
4825 ; SSE2-NEXT: addps %xmm2, %xmm1
4828 ; SSE41-LABEL: uitofp_load_8i32_to_8f32:
4830 ; SSE41-NEXT: movdqa (%rdi), %xmm0
4831 ; SSE41-NEXT: movdqa 16(%rdi), %xmm1
4832 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200]
4833 ; SSE41-NEXT: movdqa %xmm0, %xmm3
4834 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
4835 ; SSE41-NEXT: psrld $16, %xmm0
4836 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928]
4837 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
4838 ; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4839 ; SSE41-NEXT: subps %xmm5, %xmm0
4840 ; SSE41-NEXT: addps %xmm3, %xmm0
4841 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
4842 ; SSE41-NEXT: psrld $16, %xmm1
4843 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
4844 ; SSE41-NEXT: subps %xmm5, %xmm1
4845 ; SSE41-NEXT: addps %xmm2, %xmm1
4848 ; AVX1-LABEL: uitofp_load_8i32_to_8f32:
4850 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4851 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
4852 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
4853 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
4854 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
4855 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
4856 ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
4857 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
4858 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4859 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
4862 ; AVX2-LABEL: uitofp_load_8i32_to_8f32:
4864 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4865 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
4866 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
4867 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
4868 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
4869 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
4870 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4871 ; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0
4872 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
4875 ; AVX512F-LABEL: uitofp_load_8i32_to_8f32:
4877 ; AVX512F-NEXT: vmovaps (%rdi), %ymm0
4878 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4879 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4880 ; AVX512F-NEXT: retq
4882 ; AVX512VL-LABEL: uitofp_load_8i32_to_8f32:
4883 ; AVX512VL: # %bb.0:
4884 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0
4885 ; AVX512VL-NEXT: retq
4887 ; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32:
4888 ; AVX512DQ: # %bb.0:
4889 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4890 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4891 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4892 ; AVX512DQ-NEXT: retq
4894 ; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32:
4895 ; AVX512VLDQ: # %bb.0:
4896 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0
4897 ; AVX512VLDQ-NEXT: retq
4898 %ld = load <8 x i32>, ptr%a
4899 %cvt = uitofp <8 x i32> %ld to <8 x float>
4900 ret <8 x float> %cvt
4903 define <8 x float> @uitofp_load_8i16_to_8f32(ptr%a) {
4904 ; SSE2-LABEL: uitofp_load_8i16_to_8f32:
4906 ; SSE2-NEXT: movdqa (%rdi), %xmm1
4907 ; SSE2-NEXT: pxor %xmm2, %xmm2
4908 ; SSE2-NEXT: movdqa %xmm1, %xmm0
4909 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4910 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4911 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4912 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
4915 ; SSE41-LABEL: uitofp_load_8i16_to_8f32:
4917 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4918 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4919 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4920 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1
4923 ; AVX1-LABEL: uitofp_load_8i16_to_8f32:
4925 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4926 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4927 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4928 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4931 ; AVX2-LABEL: uitofp_load_8i16_to_8f32:
4933 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4934 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4937 ; AVX512-LABEL: uitofp_load_8i16_to_8f32:
4939 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4940 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4942 %ld = load <8 x i16>, ptr%a
4943 %cvt = uitofp <8 x i16> %ld to <8 x float>
4944 ret <8 x float> %cvt
4947 define <8 x float> @uitofp_load_8i8_to_8f32(ptr%a) {
4948 ; SSE2-LABEL: uitofp_load_8i8_to_8f32:
4950 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
4951 ; SSE2-NEXT: pxor %xmm2, %xmm2
4952 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4953 ; SSE2-NEXT: movdqa %xmm1, %xmm0
4954 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4955 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
4956 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4957 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
4960 ; SSE41-LABEL: uitofp_load_8i8_to_8f32:
4962 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4963 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4964 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
4965 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1
4968 ; AVX1-LABEL: uitofp_load_8i8_to_8f32:
4970 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4971 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4972 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4973 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4976 ; AVX2-LABEL: uitofp_load_8i8_to_8f32:
4978 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4979 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4982 ; AVX512-LABEL: uitofp_load_8i8_to_8f32:
4984 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4985 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4987 %ld = load <8 x i8>, ptr%a
4988 %cvt = uitofp <8 x i8> %ld to <8 x float>
4989 ret <8 x float> %cvt
4996 %Arguments = type <{ <8 x i8>, <8 x i16>, ptr }>
4997 define void @aggregate_sitofp_8i16_to_8f32(ptr nocapture readonly %a0) {
4998 ; SSE2-LABEL: aggregate_sitofp_8i16_to_8f32:
5000 ; SSE2-NEXT: movq 24(%rdi), %rax
5001 ; SSE2-NEXT: movdqu 8(%rdi), %xmm0
5002 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5003 ; SSE2-NEXT: psrad $16, %xmm1
5004 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
5005 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
5006 ; SSE2-NEXT: psrad $16, %xmm0
5007 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
5008 ; SSE2-NEXT: movaps %xmm0, 16(%rax)
5009 ; SSE2-NEXT: movaps %xmm1, (%rax)
5012 ; SSE41-LABEL: aggregate_sitofp_8i16_to_8f32:
5014 ; SSE41-NEXT: movq 24(%rdi), %rax
5015 ; SSE41-NEXT: pmovsxwd 16(%rdi), %xmm0
5016 ; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1
5017 ; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1
5018 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
5019 ; SSE41-NEXT: movaps %xmm0, 16(%rax)
5020 ; SSE41-NEXT: movaps %xmm1, (%rax)
5023 ; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
5025 ; AVX1-NEXT: movq 24(%rdi), %rax
5026 ; AVX1-NEXT: vpmovsxwd 16(%rdi), %xmm0
5027 ; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1
5028 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5029 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
5030 ; AVX1-NEXT: vmovaps %ymm0, (%rax)
5031 ; AVX1-NEXT: vzeroupper
5034 ; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
5036 ; AVX2-NEXT: movq 24(%rdi), %rax
5037 ; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0
5038 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
5039 ; AVX2-NEXT: vmovaps %ymm0, (%rax)
5040 ; AVX2-NEXT: vzeroupper
5043 ; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32:
5045 ; AVX512-NEXT: movq 24(%rdi), %rax
5046 ; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0
5047 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
5048 ; AVX512-NEXT: vmovaps %ymm0, (%rax)
5049 ; AVX512-NEXT: vzeroupper
5051 %1 = load %Arguments, ptr %a0, align 1
5052 %2 = extractvalue %Arguments %1, 1
5053 %3 = extractvalue %Arguments %1, 2
5054 %4 = sitofp <8 x i16> %2 to <8 x float>
5055 store <8 x float> %4, ptr %3, align 32
5059 define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
5060 ; SSE-LABEL: sitofp_i32_to_2f64:
5062 ; SSE-NEXT: cvtsi2sd %edi, %xmm0
5065 ; AVX-LABEL: sitofp_i32_to_2f64:
5067 ; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
5069 %cvt = sitofp i32 %a1 to double
5070 %res = insertelement <2 x double> %a0, double %cvt, i32 0
5071 ret <2 x double> %res
5074 define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
5075 ; SSE-LABEL: sitofp_i32_to_4f32:
5077 ; SSE-NEXT: cvtsi2ss %edi, %xmm0
5080 ; AVX-LABEL: sitofp_i32_to_4f32:
5082 ; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
5084 %cvt = sitofp i32 %a1 to float
5085 %res = insertelement <4 x float> %a0, float %cvt, i32 0
5086 ret <4 x float> %res
5089 define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
5090 ; SSE-LABEL: sitofp_i64_to_2f64:
5092 ; SSE-NEXT: cvtsi2sd %rdi, %xmm0
5095 ; AVX-LABEL: sitofp_i64_to_2f64:
5097 ; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
5099 %cvt = sitofp i64 %a1 to double
5100 %res = insertelement <2 x double> %a0, double %cvt, i32 0
5101 ret <2 x double> %res
5104 define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
5105 ; SSE-LABEL: sitofp_i64_to_4f32:
5107 ; SSE-NEXT: cvtsi2ss %rdi, %xmm0
5110 ; AVX-LABEL: sitofp_i64_to_4f32:
5112 ; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
5114 %cvt = sitofp i64 %a1 to float
5115 %res = insertelement <4 x float> %a0, float %cvt, i32 0
5116 ret <4 x float> %res
5119 ; Extract from int vector and convert to FP.
5121 define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
5122 ; SSE-LABEL: extract0_sitofp_v4i32_f32:
5124 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
5127 ; AVX-LABEL: extract0_sitofp_v4i32_f32:
5129 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
5131 %e = extractelement <4 x i32> %x, i32 0
5132 %r = sitofp i32 %e to float
5136 define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind {
5137 ; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
5139 ; SSE-NEXT: movd %xmm0, %eax
5140 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
5141 ; SSE-NEXT: incl %eax
5142 ; SSE-NEXT: cvtsi2ss %eax, %xmm1
5143 ; SSE-NEXT: divss %xmm1, %xmm0
5146 ; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
5148 ; AVX-NEXT: vmovd %xmm0, %eax
5149 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
5150 ; AVX-NEXT: incl %eax
5151 ; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
5152 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
5154 %e = extractelement <4 x i32> %x, i32 0
5155 %f = sitofp i32 %e to float
5157 %f1 = sitofp i32 %e1 to float
5158 %r = fdiv float %f, %f1
5162 define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, ptr %p) nounwind {
5163 ; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
5165 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
5166 ; SSE-NEXT: movss %xmm0, (%rdi)
5167 ; SSE-NEXT: movaps %xmm1, %xmm0
5170 ; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
5172 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm1
5173 ; AVX-NEXT: vmovss %xmm0, (%rdi)
5174 ; AVX-NEXT: vmovaps %xmm1, %xmm0
5176 %e = extractelement <4 x i32> %x, i32 0
5177 %r = sitofp i32 %e to float
5178 store i32 %e, ptr %p
5182 define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
5183 ; SSE-LABEL: extract0_sitofp_v4i32_f64:
5185 ; SSE-NEXT: movd %xmm0, %eax
5186 ; SSE-NEXT: xorps %xmm0, %xmm0
5187 ; SSE-NEXT: cvtsi2sd %eax, %xmm0
5190 ; AVX-LABEL: extract0_sitofp_v4i32_f64:
5192 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
5194 %e = extractelement <4 x i32> %x, i32 0
5195 %r = sitofp i32 %e to double
5199 define float @extract0_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
5200 ; SSE-LABEL: extract0_uitofp_v4i32_f32:
5202 ; SSE-NEXT: movd %xmm0, %eax
5203 ; SSE-NEXT: xorps %xmm0, %xmm0
5204 ; SSE-NEXT: cvtsi2ss %rax, %xmm0
5207 ; VEX-LABEL: extract0_uitofp_v4i32_f32:
5209 ; VEX-NEXT: vmovd %xmm0, %eax
5210 ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0
5213 ; AVX512F-LABEL: extract0_uitofp_v4i32_f32:
5215 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5216 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
5217 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5218 ; AVX512F-NEXT: vzeroupper
5219 ; AVX512F-NEXT: retq
5221 ; AVX512VL-LABEL: extract0_uitofp_v4i32_f32:
5222 ; AVX512VL: # %bb.0:
5223 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
5224 ; AVX512VL-NEXT: retq
5226 ; AVX512DQ-LABEL: extract0_uitofp_v4i32_f32:
5227 ; AVX512DQ: # %bb.0:
5228 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5229 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
5230 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5231 ; AVX512DQ-NEXT: vzeroupper
5232 ; AVX512DQ-NEXT: retq
5234 ; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f32:
5235 ; AVX512VLDQ: # %bb.0:
5236 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
5237 ; AVX512VLDQ-NEXT: retq
5238 %e = extractelement <4 x i32> %x, i32 0
5239 %r = uitofp i32 %e to float
5243 define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
5244 ; SSE-LABEL: extract0_uitofp_v4i32_f64:
5246 ; SSE-NEXT: movd %xmm0, %eax
5247 ; SSE-NEXT: xorps %xmm0, %xmm0
5248 ; SSE-NEXT: cvtsi2sd %rax, %xmm0
5251 ; VEX-LABEL: extract0_uitofp_v4i32_f64:
5253 ; VEX-NEXT: vmovd %xmm0, %eax
5254 ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
5257 ; AVX512F-LABEL: extract0_uitofp_v4i32_f64:
5259 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
5260 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
5261 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5262 ; AVX512F-NEXT: vzeroupper
5263 ; AVX512F-NEXT: retq
5265 ; AVX512VL-LABEL: extract0_uitofp_v4i32_f64:
5266 ; AVX512VL: # %bb.0:
5267 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
5268 ; AVX512VL-NEXT: retq
5270 ; AVX512DQ-LABEL: extract0_uitofp_v4i32_f64:
5271 ; AVX512DQ: # %bb.0:
5272 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
5273 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
5274 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5275 ; AVX512DQ-NEXT: vzeroupper
5276 ; AVX512DQ-NEXT: retq
5278 ; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f64:
5279 ; AVX512VLDQ: # %bb.0:
5280 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
5281 ; AVX512VLDQ-NEXT: retq
5282 %e = extractelement <4 x i32> %x, i32 0
5283 %r = uitofp i32 %e to double
5287 ; Extract non-zero element from int vector and convert to FP.
5289 define float @extract3_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
5290 ; SSE-LABEL: extract3_sitofp_v4i32_f32:
5292 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5293 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
5296 ; AVX-LABEL: extract3_sitofp_v4i32_f32:
5298 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5299 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
5301 %e = extractelement <4 x i32> %x, i32 3
5302 %r = sitofp i32 %e to float
5306 define double @extract3_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
5307 ; SSE2-LABEL: extract3_sitofp_v4i32_f64:
5309 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5310 ; SSE2-NEXT: movd %xmm0, %eax
5311 ; SSE2-NEXT: xorps %xmm0, %xmm0
5312 ; SSE2-NEXT: cvtsi2sd %eax, %xmm0
5315 ; SSE41-LABEL: extract3_sitofp_v4i32_f64:
5317 ; SSE41-NEXT: extractps $3, %xmm0, %eax
5318 ; SSE41-NEXT: xorps %xmm0, %xmm0
5319 ; SSE41-NEXT: cvtsi2sd %eax, %xmm0
5322 ; AVX-LABEL: extract3_sitofp_v4i32_f64:
5324 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5325 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
5327 %e = extractelement <4 x i32> %x, i32 3
5328 %r = sitofp i32 %e to double
5332 define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
5333 ; SSE2-LABEL: extract3_uitofp_v4i32_f32:
5335 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5336 ; SSE2-NEXT: movd %xmm0, %eax
5337 ; SSE2-NEXT: xorps %xmm0, %xmm0
5338 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0
5341 ; SSE41-LABEL: extract3_uitofp_v4i32_f32:
5343 ; SSE41-NEXT: extractps $3, %xmm0, %eax
5344 ; SSE41-NEXT: xorps %xmm0, %xmm0
5345 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0
5348 ; VEX-LABEL: extract3_uitofp_v4i32_f32:
5350 ; VEX-NEXT: vextractps $3, %xmm0, %eax
5351 ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0
5354 ; AVX512F-LABEL: extract3_uitofp_v4i32_f32:
5356 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5357 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
5358 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5359 ; AVX512F-NEXT: vzeroupper
5360 ; AVX512F-NEXT: retq
5362 ; AVX512VL-LABEL: extract3_uitofp_v4i32_f32:
5363 ; AVX512VL: # %bb.0:
5364 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5365 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
5366 ; AVX512VL-NEXT: retq
5368 ; AVX512DQ-LABEL: extract3_uitofp_v4i32_f32:
5369 ; AVX512DQ: # %bb.0:
5370 ; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5371 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
5372 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5373 ; AVX512DQ-NEXT: vzeroupper
5374 ; AVX512DQ-NEXT: retq
5376 ; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f32:
5377 ; AVX512VLDQ: # %bb.0:
5378 ; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5379 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
5380 ; AVX512VLDQ-NEXT: retq
5381 %e = extractelement <4 x i32> %x, i32 3
5382 %r = uitofp i32 %e to float
5386 define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
5387 ; SSE2-LABEL: extract3_uitofp_v4i32_f64:
5389 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5390 ; SSE2-NEXT: movd %xmm0, %eax
5391 ; SSE2-NEXT: xorps %xmm0, %xmm0
5392 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0
5395 ; SSE41-LABEL: extract3_uitofp_v4i32_f64:
5397 ; SSE41-NEXT: extractps $3, %xmm0, %eax
5398 ; SSE41-NEXT: xorps %xmm0, %xmm0
5399 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0
5402 ; VEX-LABEL: extract3_uitofp_v4i32_f64:
5404 ; VEX-NEXT: vextractps $3, %xmm0, %eax
5405 ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
5408 ; AVX512F-LABEL: extract3_uitofp_v4i32_f64:
5410 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5411 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
5412 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5413 ; AVX512F-NEXT: vzeroupper
5414 ; AVX512F-NEXT: retq
5416 ; AVX512VL-LABEL: extract3_uitofp_v4i32_f64:
5417 ; AVX512VL: # %bb.0:
5418 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5419 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
5420 ; AVX512VL-NEXT: retq
5422 ; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64:
5423 ; AVX512DQ: # %bb.0:
5424 ; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5425 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
5426 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5427 ; AVX512DQ-NEXT: vzeroupper
5428 ; AVX512DQ-NEXT: retq
5430 ; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64:
5431 ; AVX512VLDQ: # %bb.0:
5432 ; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5433 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
5434 ; AVX512VLDQ-NEXT: retq
5435 %e = extractelement <4 x i32> %x, i32 3
5436 %r = uitofp i32 %e to double
5440 define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
5441 ; SSE2-LABEL: PR43609:
5443 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,2]
5444 ; SSE2-NEXT: paddq %xmm0, %xmm1
5445 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
5446 ; SSE2-NEXT: movdqa %xmm0, %xmm3
5447 ; SSE2-NEXT: pand %xmm2, %xmm3
5448 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5449 ; SSE2-NEXT: por %xmm4, %xmm3
5450 ; SSE2-NEXT: psrlq $32, %xmm0
5451 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5452 ; SSE2-NEXT: por %xmm5, %xmm0
5453 ; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5454 ; SSE2-NEXT: subpd %xmm6, %xmm0
5455 ; SSE2-NEXT: addpd %xmm3, %xmm0
5456 ; SSE2-NEXT: pand %xmm1, %xmm2
5457 ; SSE2-NEXT: por %xmm4, %xmm2
5458 ; SSE2-NEXT: psrlq $32, %xmm1
5459 ; SSE2-NEXT: por %xmm5, %xmm1
5460 ; SSE2-NEXT: subpd %xmm6, %xmm1
5461 ; SSE2-NEXT: addpd %xmm2, %xmm1
5462 ; SSE2-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5463 ; SSE2-NEXT: addpd %xmm2, %xmm0
5464 ; SSE2-NEXT: addpd %xmm2, %xmm1
5465 ; SSE2-NEXT: movupd %xmm0, (%rdi)
5466 ; SSE2-NEXT: movupd %xmm1, 16(%rdi)
5469 ; SSE41-LABEL: PR43609:
5471 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [2,2]
5472 ; SSE41-NEXT: paddq %xmm0, %xmm1
5473 ; SSE41-NEXT: pxor %xmm2, %xmm2
5474 ; SSE41-NEXT: movdqa %xmm0, %xmm3
5475 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
5476 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5477 ; SSE41-NEXT: por %xmm4, %xmm3
5478 ; SSE41-NEXT: psrlq $32, %xmm0
5479 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5480 ; SSE41-NEXT: por %xmm5, %xmm0
5481 ; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5482 ; SSE41-NEXT: subpd %xmm6, %xmm0
5483 ; SSE41-NEXT: addpd %xmm3, %xmm0
5484 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
5485 ; SSE41-NEXT: por %xmm4, %xmm2
5486 ; SSE41-NEXT: psrlq $32, %xmm1
5487 ; SSE41-NEXT: por %xmm5, %xmm1
5488 ; SSE41-NEXT: subpd %xmm6, %xmm1
5489 ; SSE41-NEXT: addpd %xmm2, %xmm1
5490 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5491 ; SSE41-NEXT: addpd %xmm2, %xmm0
5492 ; SSE41-NEXT: addpd %xmm2, %xmm1
5493 ; SSE41-NEXT: movupd %xmm0, (%rdi)
5494 ; SSE41-NEXT: movupd %xmm1, 16(%rdi)
5497 ; AVX1-LABEL: PR43609:
5499 ; AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5500 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
5501 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
5502 ; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5503 ; AVX1-NEXT: # xmm4 = mem[0,0]
5504 ; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3
5505 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
5506 ; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5507 ; AVX1-NEXT: # xmm5 = mem[0,0]
5508 ; AVX1-NEXT: vpor %xmm5, %xmm0, %xmm0
5509 ; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5510 ; AVX1-NEXT: # xmm6 = mem[0,0]
5511 ; AVX1-NEXT: vsubpd %xmm6, %xmm0, %xmm0
5512 ; AVX1-NEXT: vaddpd %xmm0, %xmm3, %xmm0
5513 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
5514 ; AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2
5515 ; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
5516 ; AVX1-NEXT: vpor %xmm5, %xmm1, %xmm1
5517 ; AVX1-NEXT: vsubpd %xmm6, %xmm1, %xmm1
5518 ; AVX1-NEXT: vaddpd %xmm1, %xmm2, %xmm1
5519 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5520 ; AVX1-NEXT: # xmm2 = mem[0,0]
5521 ; AVX1-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5522 ; AVX1-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5523 ; AVX1-NEXT: vmovupd %xmm0, (%rdi)
5524 ; AVX1-NEXT: vmovupd %xmm1, 16(%rdi)
5527 ; AVX2-LABEL: PR43609:
5529 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5530 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
5531 ; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5532 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5533 ; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3
5534 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
5535 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5536 ; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0
5537 ; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5538 ; AVX2-NEXT: # xmm6 = mem[0,0]
5539 ; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0
5540 ; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0
5541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5542 ; AVX2-NEXT: vpor %xmm4, %xmm2, %xmm2
5543 ; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm1
5544 ; AVX2-NEXT: vpor %xmm5, %xmm1, %xmm1
5545 ; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1
5546 ; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1
5547 ; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5548 ; AVX2-NEXT: # xmm2 = mem[0,0]
5549 ; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5550 ; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5551 ; AVX2-NEXT: vmovupd %xmm0, (%rdi)
5552 ; AVX2-NEXT: vmovupd %xmm1, 16(%rdi)
5555 ; AVX512F-LABEL: PR43609:
5557 ; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5558 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
5559 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5560 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5561 ; AVX512F-NEXT: vpor %xmm4, %xmm3, %xmm3
5562 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
5563 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5564 ; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0
5565 ; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5566 ; AVX512F-NEXT: # xmm6 = mem[0,0]
5567 ; AVX512F-NEXT: vsubpd %xmm6, %xmm0, %xmm0
5568 ; AVX512F-NEXT: vaddpd %xmm0, %xmm3, %xmm0
5569 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5570 ; AVX512F-NEXT: vpor %xmm4, %xmm2, %xmm2
5571 ; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1
5572 ; AVX512F-NEXT: vpor %xmm5, %xmm1, %xmm1
5573 ; AVX512F-NEXT: vsubpd %xmm6, %xmm1, %xmm1
5574 ; AVX512F-NEXT: vaddpd %xmm1, %xmm2, %xmm1
5575 ; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5576 ; AVX512F-NEXT: # xmm2 = mem[0,0]
5577 ; AVX512F-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5578 ; AVX512F-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5579 ; AVX512F-NEXT: vmovupd %xmm0, (%rdi)
5580 ; AVX512F-NEXT: vmovupd %xmm1, 16(%rdi)
5581 ; AVX512F-NEXT: retq
5583 ; AVX512VL-LABEL: PR43609:
5584 ; AVX512VL: # %bb.0:
5585 ; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
5586 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
5587 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5588 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5589 ; AVX512VL-NEXT: vpor %xmm4, %xmm3, %xmm3
5590 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
5591 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5592 ; AVX512VL-NEXT: vpor %xmm5, %xmm0, %xmm0
5593 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5594 ; AVX512VL-NEXT: # xmm6 = mem[0,0]
5595 ; AVX512VL-NEXT: vsubpd %xmm6, %xmm0, %xmm0
5596 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm3, %xmm0
5597 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5598 ; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2
5599 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1
5600 ; AVX512VL-NEXT: vpor %xmm5, %xmm1, %xmm1
5601 ; AVX512VL-NEXT: vsubpd %xmm6, %xmm1, %xmm1
5602 ; AVX512VL-NEXT: vaddpd %xmm1, %xmm2, %xmm1
5603 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5604 ; AVX512VL-NEXT: # xmm2 = mem[0,0]
5605 ; AVX512VL-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5606 ; AVX512VL-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5607 ; AVX512VL-NEXT: vmovupd %xmm0, (%rdi)
5608 ; AVX512VL-NEXT: vmovupd %xmm1, 16(%rdi)
5609 ; AVX512VL-NEXT: retq
5611 ; AVX512DQ-LABEL: PR43609:
5612 ; AVX512DQ: # %bb.0:
5613 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5614 ; AVX512DQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5615 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
5616 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm1, %zmm1
5617 ; AVX512DQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5618 ; AVX512DQ-NEXT: # xmm2 = mem[0,0]
5619 ; AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5620 ; AVX512DQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5621 ; AVX512DQ-NEXT: vmovupd %xmm0, (%rdi)
5622 ; AVX512DQ-NEXT: vmovupd %xmm1, 16(%rdi)
5623 ; AVX512DQ-NEXT: vzeroupper
5624 ; AVX512DQ-NEXT: retq
5626 ; AVX512VLDQ-LABEL: PR43609:
5627 ; AVX512VLDQ: # %bb.0:
5628 ; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
5629 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
5630 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm1, %xmm1
5631 ; AVX512VLDQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5632 ; AVX512VLDQ-NEXT: # xmm2 = mem[0,0]
5633 ; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0
5634 ; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1
5635 ; AVX512VLDQ-NEXT: vmovupd %xmm0, (%rdi)
5636 ; AVX512VLDQ-NEXT: vmovupd %xmm1, 16(%rdi)
5637 ; AVX512VLDQ-NEXT: retq
5638 %step.add.epil = add <2 x i64> %y, <i64 2, i64 2>
5639 %t20 = uitofp <2 x i64> %y to <2 x double>
5640 %t21 = uitofp <2 x i64> %step.add.epil to <2 x double>
5641 %t22 = fadd fast <2 x double> %t20, <double 5.0e-01, double 5.0e-01>
5642 %t23 = fadd fast <2 x double> %t21, <double 5.0e-01, double 5.0e-01>
5643 store <2 x double> %t22, ptr %x, align 8
5644 %t26 = getelementptr inbounds double, ptr %x, i64 2
5645 store <2 x double> %t23, ptr %t26, align 8
5649 attributes #0 = { "unsafe-fp-math"="true" }