1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE4
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL
9 define <4 x float> @sitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
10 ; SSE-LABEL: sitofp_v4i32_v4f32:
12 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
16 ; AVX-LABEL: sitofp_v4i32_v4f32:
18 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
19 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
21 %s0 = sitofp <2 x i32> %x to <2 x float>
22 %s1 = sitofp <2 x i32> %y to <2 x float>
23 %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27 define <4 x float> @uitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
28 ; SSE2-LABEL: uitofp_v4i32_v4f32:
30 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
31 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
32 ; SSE2-NEXT: pand %xmm0, %xmm1
33 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34 ; SSE2-NEXT: psrld $16, %xmm0
35 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
36 ; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
37 ; SSE2-NEXT: addps %xmm1, %xmm0
40 ; SSE4-LABEL: uitofp_v4i32_v4f32:
42 ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
43 ; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
44 ; SSE4-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
45 ; SSE4-NEXT: psrld $16, %xmm0
46 ; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
47 ; SSE4-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
48 ; SSE4-NEXT: addps %xmm1, %xmm0
51 ; AVX1-LABEL: uitofp_v4i32_v4f32:
53 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
54 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
55 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
56 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
57 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
58 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
61 ; AVX2-LABEL: uitofp_v4i32_v4f32:
63 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
64 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
65 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
66 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
67 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
68 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
69 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
70 ; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0
71 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
74 ; AVX512F-LABEL: uitofp_v4i32_v4f32:
76 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
77 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
78 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
79 ; AVX512F-NEXT: vzeroupper
82 ; AVX512VL-LABEL: uitofp_v4i32_v4f32:
84 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
85 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
87 %s0 = uitofp <2 x i32> %x to <2 x float>
88 %s1 = uitofp <2 x i32> %y to <2 x float>
89 %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
93 define <4 x i32> @fptosi_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
94 ; SSE-LABEL: fptosi_v4f32_v4i32:
96 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
97 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
100 ; AVX-LABEL: fptosi_v4f32_v4i32:
102 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
105 %s0 = fptosi <2 x float> %x to <2 x i32>
106 %s1 = fptosi <2 x float> %y to <2 x i32>
107 %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111 define <4 x i32> @fptoui_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
112 ; SSE-LABEL: fptoui_v4f32_v4i32:
114 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
115 ; SSE-NEXT: cvttps2dq %xmm0, %xmm1
116 ; SSE-NEXT: movdqa %xmm1, %xmm2
117 ; SSE-NEXT: psrad $31, %xmm2
118 ; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
119 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
120 ; SSE-NEXT: pand %xmm2, %xmm0
121 ; SSE-NEXT: por %xmm1, %xmm0
124 ; AVX1-LABEL: fptoui_v4f32_v4i32:
126 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
127 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
128 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
129 ; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
130 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
131 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
132 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
135 ; AVX2-LABEL: fptoui_v4f32_v4i32:
137 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
138 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm1
139 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
140 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
141 ; AVX2-NEXT: vsubps %xmm3, %xmm0, %xmm0
142 ; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
143 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
144 ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
147 ; AVX512F-LABEL: fptoui_v4f32_v4i32:
149 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
150 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
151 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
152 ; AVX512F-NEXT: vzeroupper
155 ; AVX512VL-LABEL: fptoui_v4f32_v4i32:
157 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
158 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
159 ; AVX512VL-NEXT: retq
160 %s0 = fptoui <2 x float> %x to <2 x i32>
161 %s1 = fptoui <2 x float> %y to <2 x i32>
162 %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
166 define <4 x double> @sitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
167 ; SSE-LABEL: sitofp_v4i32_v4f64:
169 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
170 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
173 ; AVX-LABEL: sitofp_v4i32_v4f64:
175 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
176 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
178 %s0 = sitofp <2 x i32> %x to <2 x double>
179 %s1 = sitofp <2 x i32> %y to <2 x double>
180 %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
184 define <4 x double> @uitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
185 ; SSE2-LABEL: uitofp_v4i32_v4f64:
187 ; SSE2-NEXT: xorpd %xmm2, %xmm2
188 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
189 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
190 ; SSE2-NEXT: orpd %xmm3, %xmm0
191 ; SSE2-NEXT: subpd %xmm3, %xmm0
192 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
193 ; SSE2-NEXT: orpd %xmm3, %xmm1
194 ; SSE2-NEXT: subpd %xmm3, %xmm1
197 ; SSE4-LABEL: uitofp_v4i32_v4f64:
199 ; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
200 ; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
201 ; SSE4-NEXT: por %xmm2, %xmm0
202 ; SSE4-NEXT: subpd %xmm2, %xmm0
203 ; SSE4-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
204 ; SSE4-NEXT: por %xmm2, %xmm1
205 ; SSE4-NEXT: subpd %xmm2, %xmm1
208 ; AVX1-LABEL: uitofp_v4i32_v4f64:
210 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
211 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
212 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
213 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
214 ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
215 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
218 ; AVX2-LABEL: uitofp_v4i32_v4f64:
220 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
221 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
222 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
223 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
224 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
227 ; AVX512F-LABEL: uitofp_v4i32_v4f64:
229 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
230 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
231 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
234 ; AVX512VL-LABEL: uitofp_v4i32_v4f64:
236 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
238 ; AVX512VL-NEXT: retq
239 %s0 = uitofp <2 x i32> %x to <2 x double>
240 %s1 = uitofp <2 x i32> %y to <2 x double>
241 %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
245 define <4 x i32> @fptosi_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
246 ; SSE-LABEL: fptosi_v4f64_v4i32:
248 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
249 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
250 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
253 ; AVX-LABEL: fptosi_v4f64_v4i32:
255 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
256 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
257 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
258 ; AVX-NEXT: vzeroupper
260 %s0 = fptosi <2 x double> %x to <2 x i32>
261 %s1 = fptosi <2 x double> %y to <2 x i32>
262 %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
266 define <4 x i32> @fptoui_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
267 ; SSE-LABEL: fptoui_v4f64_v4i32:
269 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
270 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm3
271 ; SSE-NEXT: subpd %xmm2, %xmm0
272 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm4
273 ; SSE-NEXT: movapd %xmm3, %xmm0
274 ; SSE-NEXT: psrad $31, %xmm0
275 ; SSE-NEXT: pand %xmm4, %xmm0
276 ; SSE-NEXT: por %xmm3, %xmm0
277 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm3
278 ; SSE-NEXT: subpd %xmm2, %xmm1
279 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
280 ; SSE-NEXT: movapd %xmm3, %xmm2
281 ; SSE-NEXT: psrad $31, %xmm2
282 ; SSE-NEXT: pand %xmm1, %xmm2
283 ; SSE-NEXT: por %xmm3, %xmm2
284 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
287 ; AVX1-LABEL: fptoui_v4f64_v4i32:
289 ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
290 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
291 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1
292 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
293 ; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
294 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
295 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
296 ; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0
297 ; AVX1-NEXT: vzeroupper
300 ; AVX2-LABEL: fptoui_v4f64_v4i32:
302 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
303 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
304 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm1
305 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
306 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
307 ; AVX2-NEXT: vsubpd %ymm3, %ymm0, %ymm0
308 ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
309 ; AVX2-NEXT: vandpd %xmm2, %xmm0, %xmm0
310 ; AVX2-NEXT: vorpd %xmm0, %xmm1, %xmm0
311 ; AVX2-NEXT: vzeroupper
314 ; AVX512F-LABEL: fptoui_v4f64_v4i32:
316 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
317 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
318 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
319 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
320 ; AVX512F-NEXT: vzeroupper
323 ; AVX512VL-LABEL: fptoui_v4f64_v4i32:
325 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
326 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
327 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
328 ; AVX512VL-NEXT: vzeroupper
329 ; AVX512VL-NEXT: retq
330 %s0 = fptoui <2 x double> %x to <2 x i32>
331 %s1 = fptoui <2 x double> %y to <2 x i32>
332 %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
338 define <4 x float> @mismatch_tofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
339 ; SSE2-LABEL: mismatch_tofp_v4i32_v4f32:
341 ; SSE2-NEXT: xorpd %xmm2, %xmm2
342 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
343 ; SSE2-NEXT: movapd {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
344 ; SSE2-NEXT: orpd %xmm2, %xmm0
345 ; SSE2-NEXT: subpd %xmm2, %xmm0
346 ; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0
347 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
348 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
351 ; SSE4-LABEL: mismatch_tofp_v4i32_v4f32:
353 ; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
354 ; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
355 ; SSE4-NEXT: por %xmm2, %xmm0
356 ; SSE4-NEXT: subpd %xmm2, %xmm0
357 ; SSE4-NEXT: cvtpd2ps %xmm0, %xmm0
358 ; SSE4-NEXT: cvtdq2ps %xmm1, %xmm1
359 ; SSE4-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
362 ; AVX1-LABEL: mismatch_tofp_v4i32_v4f32:
364 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
365 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
366 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
367 ; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0
368 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
369 ; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
370 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
373 ; AVX2-LABEL: mismatch_tofp_v4i32_v4f32:
375 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
376 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
377 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
378 ; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0
379 ; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0
380 ; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1
381 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
384 ; AVX512F-LABEL: mismatch_tofp_v4i32_v4f32:
386 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
387 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
388 ; AVX512F-NEXT: vcvtdq2ps %xmm1, %xmm1
389 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
390 ; AVX512F-NEXT: vzeroupper
393 ; AVX512VL-LABEL: mismatch_tofp_v4i32_v4f32:
395 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
396 ; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
397 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
398 ; AVX512VL-NEXT: retq
399 %s0 = uitofp <2 x i32> %x to <2 x float>
400 %s1 = sitofp <2 x i32> %y to <2 x float>
401 %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407 define <4 x float> @sitofp_v4i32_v4f32_extra_use(<2 x i32> %x, <2 x i32> %y, <2 x float>* %p) {
408 ; SSE-LABEL: sitofp_v4i32_v4f32_extra_use:
410 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
411 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
412 ; SSE-NEXT: movlps %xmm1, (%rdi)
413 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
416 ; AVX-LABEL: sitofp_v4i32_v4f32_extra_use:
418 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
419 ; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1
420 ; AVX-NEXT: vmovlps %xmm1, (%rdi)
421 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
423 %s0 = sitofp <2 x i32> %x to <2 x float>
424 %s1 = sitofp <2 x i32> %y to <2 x float>
425 store <2 x float> %s1, <2 x float>* %p
426 %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
430 define <4 x float> @PR45794(<2 x i64> %x, <2 x i64> %y) {
431 ; SSE-LABEL: PR45794:
433 ; SSE-NEXT: psrad $16, %xmm0
434 ; SSE-NEXT: psrad $16, %xmm1
435 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
436 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
439 ; AVX1-LABEL: PR45794:
441 ; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
442 ; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
443 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
444 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
447 ; AVX2-LABEL: PR45794:
449 ; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
450 ; AVX2-NEXT: vpsrad $16, %xmm1, %xmm1
451 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
452 ; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
455 ; AVX512F-LABEL: PR45794:
457 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
458 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
459 ; AVX512F-NEXT: vpsraq $48, %zmm0, %zmm0
460 ; AVX512F-NEXT: vpsraq $48, %zmm1, %zmm1
461 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
462 ; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
463 ; AVX512F-NEXT: vzeroupper
466 ; AVX512VL-LABEL: PR45794:
468 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
469 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
470 ; AVX512VL-NEXT: vpsraq $48, %ymm0, %ymm0
471 ; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
472 ; AVX512VL-NEXT: vcvtdq2ps %xmm0, %xmm0
473 ; AVX512VL-NEXT: vzeroupper
474 ; AVX512VL-NEXT: retq
475 %a0 = ashr <2 x i64> %x, <i64 48, i64 48>
476 %s0 = sitofp <2 x i64> %a0 to <2 x float>
477 %a1 = ashr <2 x i64> %y, <i64 48, i64 48>
478 %s1 = sitofp <2 x i64> %a1 to <2 x float>
479 %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>