1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-32
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-64
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-32
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-64,AVX512F-64
10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64
14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-32
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-64
17 declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
18 declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
19 declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
20 declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
21 declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
22 declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
23 declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8>, metadata, metadata)
24 declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8>, metadata, metadata)
25 declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16>, metadata, metadata)
26 declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16>, metadata, metadata)
27 declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
28 declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
29 declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1>, metadata, metadata)
30 declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1>, metadata, metadata)
31 declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8>, metadata, metadata)
32 declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8>, metadata, metadata)
33 declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16>, metadata, metadata)
34 declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16>, metadata, metadata)
35 declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
36 declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
37 declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
38 declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
40 define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 {
41 ; SSE-LABEL: sitofp_v2i32_v2f32:
43 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
44 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
45 ; SSE-NEXT: ret{{[l|q]}}
47 ; SSE41-LABEL: sitofp_v2i32_v2f32:
49 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
50 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
51 ; SSE41-NEXT: ret{{[l|q]}}
53 ; AVX-LABEL: sitofp_v2i32_v2f32:
55 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
56 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
57 ; AVX-NEXT: ret{{[l|q]}}
58 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict") #0
61 ret <2 x float> %result
64 define <2 x float> @uitofp_v2i32_v2f32(<2 x i32> %x) #0 {
65 ; SSE-LABEL: uitofp_v2i32_v2f32:
67 ; SSE-NEXT: xorpd %xmm1, %xmm1
68 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
69 ; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
70 ; SSE-NEXT: orpd %xmm1, %xmm0
71 ; SSE-NEXT: subpd %xmm1, %xmm0
72 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0
73 ; SSE-NEXT: ret{{[l|q]}}
75 ; SSE41-LABEL: uitofp_v2i32_v2f32:
77 ; SSE41-NEXT: xorpd %xmm1, %xmm1
78 ; SSE41-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
79 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
80 ; SSE41-NEXT: orpd %xmm1, %xmm0
81 ; SSE41-NEXT: subpd %xmm1, %xmm0
82 ; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0
83 ; SSE41-NEXT: ret{{[l|q]}}
85 ; AVX1-LABEL: uitofp_v2i32_v2f32:
87 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
88 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
89 ; AVX1-NEXT: # xmm1 = mem[0,0]
90 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
91 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
92 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
93 ; AVX1-NEXT: ret{{[l|q]}}
95 ; AVX512F-LABEL: uitofp_v2i32_v2f32:
97 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
98 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
99 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
100 ; AVX512F-NEXT: vzeroupper
101 ; AVX512F-NEXT: ret{{[l|q]}}
103 ; AVX512VL-LABEL: uitofp_v2i32_v2f32:
105 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
106 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
107 ; AVX512VL-NEXT: ret{{[l|q]}}
109 ; AVX512DQ-LABEL: uitofp_v2i32_v2f32:
111 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
112 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
113 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
114 ; AVX512DQ-NEXT: vzeroupper
115 ; AVX512DQ-NEXT: ret{{[l|q]}}
117 ; AVX512DQVL-LABEL: uitofp_v2i32_v2f32:
118 ; AVX512DQVL: # %bb.0:
119 ; AVX512DQVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
120 ; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0
121 ; AVX512DQVL-NEXT: ret{{[l|q]}}
122 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
123 metadata !"round.dynamic",
124 metadata !"fpexcept.strict") #0
125 ret <2 x float> %result
128 define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
129 ; SSE-32-LABEL: sitofp_v2i64_v2f32:
131 ; SSE-32-NEXT: pushl %ebp
132 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
133 ; SSE-32-NEXT: .cfi_offset %ebp, -8
134 ; SSE-32-NEXT: movl %esp, %ebp
135 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
136 ; SSE-32-NEXT: andl $-8, %esp
137 ; SSE-32-NEXT: subl $24, %esp
138 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
139 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
140 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
141 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
142 ; SSE-32-NEXT: fstps (%esp)
143 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
144 ; SSE-32-NEXT: fstps {{[0-9]+}}(%esp)
146 ; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
147 ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
148 ; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
149 ; SSE-32-NEXT: movl %ebp, %esp
150 ; SSE-32-NEXT: popl %ebp
151 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
154 ; SSE-64-LABEL: sitofp_v2i64_v2f32:
156 ; SSE-64-NEXT: movq %xmm0, %rax
157 ; SSE-64-NEXT: cvtsi2ss %rax, %xmm1
158 ; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
159 ; SSE-64-NEXT: movq %xmm0, %rax
160 ; SSE-64-NEXT: xorps %xmm0, %xmm0
161 ; SSE-64-NEXT: cvtsi2ss %rax, %xmm0
162 ; SSE-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
163 ; SSE-64-NEXT: movaps %xmm1, %xmm0
166 ; SSE41-32-LABEL: sitofp_v2i64_v2f32:
168 ; SSE41-32-NEXT: pushl %ebp
169 ; SSE41-32-NEXT: .cfi_def_cfa_offset 8
170 ; SSE41-32-NEXT: .cfi_offset %ebp, -8
171 ; SSE41-32-NEXT: movl %esp, %ebp
172 ; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
173 ; SSE41-32-NEXT: andl $-8, %esp
174 ; SSE41-32-NEXT: subl $24, %esp
175 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
176 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
177 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
178 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
179 ; SSE41-32-NEXT: fstps (%esp)
180 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
181 ; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp)
182 ; SSE41-32-NEXT: wait
183 ; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
184 ; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
185 ; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
186 ; SSE41-32-NEXT: movl %ebp, %esp
187 ; SSE41-32-NEXT: popl %ebp
188 ; SSE41-32-NEXT: .cfi_def_cfa %esp, 4
189 ; SSE41-32-NEXT: retl
191 ; SSE41-64-LABEL: sitofp_v2i64_v2f32:
193 ; SSE41-64-NEXT: movq %xmm0, %rax
194 ; SSE41-64-NEXT: cvtsi2ss %rax, %xmm1
195 ; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
196 ; SSE41-64-NEXT: movq %xmm0, %rax
197 ; SSE41-64-NEXT: xorps %xmm0, %xmm0
198 ; SSE41-64-NEXT: cvtsi2ss %rax, %xmm0
199 ; SSE41-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
200 ; SSE41-64-NEXT: movaps %xmm1, %xmm0
201 ; SSE41-64-NEXT: retq
203 ; AVX-32-LABEL: sitofp_v2i64_v2f32:
205 ; AVX-32-NEXT: pushl %ebp
206 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
207 ; AVX-32-NEXT: .cfi_offset %ebp, -8
208 ; AVX-32-NEXT: movl %esp, %ebp
209 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
210 ; AVX-32-NEXT: andl $-8, %esp
211 ; AVX-32-NEXT: subl $24, %esp
212 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
213 ; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
214 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
215 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
216 ; AVX-32-NEXT: fstps (%esp)
217 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
218 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
220 ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
221 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
222 ; AVX-32-NEXT: movl %ebp, %esp
223 ; AVX-32-NEXT: popl %ebp
224 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
227 ; AVX-64-LABEL: sitofp_v2i64_v2f32:
229 ; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
230 ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
231 ; AVX-64-NEXT: vmovq %xmm0, %rax
232 ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
233 ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
236 ; AVX512DQ-32-LABEL: sitofp_v2i64_v2f32:
237 ; AVX512DQ-32: # %bb.0:
238 ; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
239 ; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm1
240 ; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
241 ; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm0
242 ; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
243 ; AVX512DQ-32-NEXT: vzeroupper
244 ; AVX512DQ-32-NEXT: retl
246 ; AVX512DQ-64-LABEL: sitofp_v2i64_v2f32:
247 ; AVX512DQ-64: # %bb.0:
248 ; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax
249 ; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
250 ; AVX512DQ-64-NEXT: vmovq %xmm0, %rax
251 ; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
252 ; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
253 ; AVX512DQ-64-NEXT: retq
255 ; AVX512DQVL-LABEL: sitofp_v2i64_v2f32:
256 ; AVX512DQVL: # %bb.0:
257 ; AVX512DQVL-NEXT: vcvtqq2ps %xmm0, %xmm0
258 ; AVX512DQVL-NEXT: ret{{[l|q]}}
259 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
260 metadata !"round.dynamic",
261 metadata !"fpexcept.strict") #0
262 ret <2 x float> %result
265 define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
266 ; SSE-32-LABEL: uitofp_v2i64_v2f32:
268 ; SSE-32-NEXT: pushl %ebp
269 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
270 ; SSE-32-NEXT: .cfi_offset %ebp, -8
271 ; SSE-32-NEXT: movl %esp, %ebp
272 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
273 ; SSE-32-NEXT: andl $-8, %esp
274 ; SSE-32-NEXT: subl $24, %esp
275 ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
276 ; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
277 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
278 ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
279 ; SSE-32-NEXT: movd %xmm1, %eax
280 ; SSE-32-NEXT: shrl $31, %eax
281 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
282 ; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
283 ; SSE-32-NEXT: fstps (%esp)
285 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
286 ; SSE-32-NEXT: movd %xmm0, %eax
287 ; SSE-32-NEXT: shrl $31, %eax
288 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
289 ; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
290 ; SSE-32-NEXT: fstps {{[0-9]+}}(%esp)
292 ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
293 ; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
294 ; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
295 ; SSE-32-NEXT: movl %ebp, %esp
296 ; SSE-32-NEXT: popl %ebp
297 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
300 ; SSE-64-LABEL: uitofp_v2i64_v2f32:
302 ; SSE-64-NEXT: movdqa %xmm0, %xmm1
303 ; SSE-64-NEXT: movq %xmm0, %rax
304 ; SSE-64-NEXT: movq %rax, %rcx
305 ; SSE-64-NEXT: shrq %rcx
306 ; SSE-64-NEXT: movl %eax, %edx
307 ; SSE-64-NEXT: andl $1, %edx
308 ; SSE-64-NEXT: orq %rcx, %rdx
309 ; SSE-64-NEXT: testq %rax, %rax
310 ; SSE-64-NEXT: cmovnsq %rax, %rdx
311 ; SSE-64-NEXT: xorps %xmm0, %xmm0
312 ; SSE-64-NEXT: cvtsi2ss %rdx, %xmm0
313 ; SSE-64-NEXT: jns .LBB3_2
314 ; SSE-64-NEXT: # %bb.1:
315 ; SSE-64-NEXT: addss %xmm0, %xmm0
316 ; SSE-64-NEXT: .LBB3_2:
317 ; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
318 ; SSE-64-NEXT: movq %xmm1, %rax
319 ; SSE-64-NEXT: movq %rax, %rcx
320 ; SSE-64-NEXT: shrq %rcx
321 ; SSE-64-NEXT: movl %eax, %edx
322 ; SSE-64-NEXT: andl $1, %edx
323 ; SSE-64-NEXT: orq %rcx, %rdx
324 ; SSE-64-NEXT: testq %rax, %rax
325 ; SSE-64-NEXT: cmovnsq %rax, %rdx
326 ; SSE-64-NEXT: xorps %xmm1, %xmm1
327 ; SSE-64-NEXT: cvtsi2ss %rdx, %xmm1
328 ; SSE-64-NEXT: jns .LBB3_4
329 ; SSE-64-NEXT: # %bb.3:
330 ; SSE-64-NEXT: addss %xmm1, %xmm1
331 ; SSE-64-NEXT: .LBB3_4:
332 ; SSE-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
335 ; SSE41-32-LABEL: uitofp_v2i64_v2f32:
337 ; SSE41-32-NEXT: pushl %ebp
338 ; SSE41-32-NEXT: .cfi_def_cfa_offset 8
339 ; SSE41-32-NEXT: .cfi_offset %ebp, -8
340 ; SSE41-32-NEXT: movl %esp, %ebp
341 ; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
342 ; SSE41-32-NEXT: andl $-8, %esp
343 ; SSE41-32-NEXT: subl $24, %esp
344 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
345 ; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
346 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
347 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
348 ; SSE41-32-NEXT: movd %xmm1, %eax
349 ; SSE41-32-NEXT: shrl $31, %eax
350 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
351 ; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
352 ; SSE41-32-NEXT: fstps (%esp)
353 ; SSE41-32-NEXT: wait
354 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
355 ; SSE41-32-NEXT: movd %xmm0, %eax
356 ; SSE41-32-NEXT: shrl $31, %eax
357 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
358 ; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
359 ; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp)
360 ; SSE41-32-NEXT: wait
361 ; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362 ; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
363 ; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
364 ; SSE41-32-NEXT: movl %ebp, %esp
365 ; SSE41-32-NEXT: popl %ebp
366 ; SSE41-32-NEXT: .cfi_def_cfa %esp, 4
367 ; SSE41-32-NEXT: retl
369 ; SSE41-64-LABEL: uitofp_v2i64_v2f32:
371 ; SSE41-64-NEXT: movdqa %xmm0, %xmm1
372 ; SSE41-64-NEXT: movq %xmm0, %rax
373 ; SSE41-64-NEXT: movq %rax, %rcx
374 ; SSE41-64-NEXT: shrq %rcx
375 ; SSE41-64-NEXT: movl %eax, %edx
376 ; SSE41-64-NEXT: andl $1, %edx
377 ; SSE41-64-NEXT: orq %rcx, %rdx
378 ; SSE41-64-NEXT: testq %rax, %rax
379 ; SSE41-64-NEXT: cmovnsq %rax, %rdx
380 ; SSE41-64-NEXT: xorps %xmm0, %xmm0
381 ; SSE41-64-NEXT: cvtsi2ss %rdx, %xmm0
382 ; SSE41-64-NEXT: jns .LBB3_2
383 ; SSE41-64-NEXT: # %bb.1:
384 ; SSE41-64-NEXT: addss %xmm0, %xmm0
385 ; SSE41-64-NEXT: .LBB3_2:
386 ; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
387 ; SSE41-64-NEXT: movq %xmm1, %rax
388 ; SSE41-64-NEXT: movq %rax, %rcx
389 ; SSE41-64-NEXT: shrq %rcx
390 ; SSE41-64-NEXT: movl %eax, %edx
391 ; SSE41-64-NEXT: andl $1, %edx
392 ; SSE41-64-NEXT: orq %rcx, %rdx
393 ; SSE41-64-NEXT: testq %rax, %rax
394 ; SSE41-64-NEXT: cmovnsq %rax, %rdx
395 ; SSE41-64-NEXT: xorps %xmm1, %xmm1
396 ; SSE41-64-NEXT: cvtsi2ss %rdx, %xmm1
397 ; SSE41-64-NEXT: jns .LBB3_4
398 ; SSE41-64-NEXT: # %bb.3:
399 ; SSE41-64-NEXT: addss %xmm1, %xmm1
400 ; SSE41-64-NEXT: .LBB3_4:
401 ; SSE41-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
402 ; SSE41-64-NEXT: retq
404 ; AVX-32-LABEL: uitofp_v2i64_v2f32:
406 ; AVX-32-NEXT: pushl %ebp
407 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
408 ; AVX-32-NEXT: .cfi_offset %ebp, -8
409 ; AVX-32-NEXT: movl %esp, %ebp
410 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
411 ; AVX-32-NEXT: andl $-8, %esp
412 ; AVX-32-NEXT: subl $24, %esp
413 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
414 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
415 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
416 ; AVX-32-NEXT: vextractps $1, %xmm0, %eax
417 ; AVX-32-NEXT: shrl $31, %eax
418 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
419 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
420 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
422 ; AVX-32-NEXT: vextractps $3, %xmm0, %eax
423 ; AVX-32-NEXT: shrl $31, %eax
424 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
425 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
426 ; AVX-32-NEXT: fstps (%esp)
428 ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
429 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
430 ; AVX-32-NEXT: movl %ebp, %esp
431 ; AVX-32-NEXT: popl %ebp
432 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
435 ; AVX1-64-LABEL: uitofp_v2i64_v2f32:
437 ; AVX1-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
438 ; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm2
439 ; AVX1-64-NEXT: vpor %xmm1, %xmm2, %xmm1
440 ; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
441 ; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
442 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
443 ; AVX1-64-NEXT: vmovq %xmm1, %rax
444 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
445 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
446 ; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
447 ; AVX1-64-NEXT: vpxor %xmm3, %xmm3, %xmm3
448 ; AVX1-64-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
449 ; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
450 ; AVX1-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
453 ; AVX512F-64-LABEL: uitofp_v2i64_v2f32:
454 ; AVX512F-64: # %bb.0:
455 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
456 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
457 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
458 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
459 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
460 ; AVX512F-64-NEXT: retq
462 ; AVX512VL-64-LABEL: uitofp_v2i64_v2f32:
463 ; AVX512VL-64: # %bb.0:
464 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
465 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
466 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
467 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
468 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
469 ; AVX512VL-64-NEXT: retq
471 ; AVX512DQ-32-LABEL: uitofp_v2i64_v2f32:
472 ; AVX512DQ-32: # %bb.0:
473 ; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
474 ; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm1
475 ; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
476 ; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm0
477 ; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
478 ; AVX512DQ-32-NEXT: vzeroupper
479 ; AVX512DQ-32-NEXT: retl
481 ; AVX512DQ-64-LABEL: uitofp_v2i64_v2f32:
482 ; AVX512DQ-64: # %bb.0:
483 ; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax
484 ; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
485 ; AVX512DQ-64-NEXT: vmovq %xmm0, %rax
486 ; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
487 ; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
488 ; AVX512DQ-64-NEXT: retq
490 ; AVX512DQVL-LABEL: uitofp_v2i64_v2f32:
491 ; AVX512DQVL: # %bb.0:
492 ; AVX512DQVL-NEXT: vcvtuqq2ps %xmm0, %xmm0
493 ; AVX512DQVL-NEXT: ret{{[l|q]}}
494 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
495 metadata !"round.dynamic",
496 metadata !"fpexcept.strict") #0
497 ret <2 x float> %result
500 define <4 x float> @sitofp_v4i1_v4f32(<4 x i1> %x) #0 {
501 ; SSE-LABEL: sitofp_v4i1_v4f32:
503 ; SSE-NEXT: pslld $31, %xmm0
504 ; SSE-NEXT: psrad $31, %xmm0
505 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
506 ; SSE-NEXT: ret{{[l|q]}}
508 ; SSE41-LABEL: sitofp_v4i1_v4f32:
510 ; SSE41-NEXT: pslld $31, %xmm0
511 ; SSE41-NEXT: psrad $31, %xmm0
512 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
513 ; SSE41-NEXT: ret{{[l|q]}}
515 ; AVX-LABEL: sitofp_v4i1_v4f32:
517 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
518 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
519 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
520 ; AVX-NEXT: ret{{[l|q]}}
521 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1> %x,
522 metadata !"round.dynamic",
523 metadata !"fpexcept.strict") #0
524 ret <4 x float> %result
527 define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 {
528 ; SSE-32-LABEL: uitofp_v4i1_v4f32:
530 ; SSE-32-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
531 ; SSE-32-NEXT: cvtdq2ps %xmm0, %xmm0
534 ; SSE-64-LABEL: uitofp_v4i1_v4f32:
536 ; SSE-64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
537 ; SSE-64-NEXT: cvtdq2ps %xmm0, %xmm0
540 ; SSE41-32-LABEL: uitofp_v4i1_v4f32:
542 ; SSE41-32-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
543 ; SSE41-32-NEXT: cvtdq2ps %xmm0, %xmm0
544 ; SSE41-32-NEXT: retl
546 ; SSE41-64-LABEL: uitofp_v4i1_v4f32:
548 ; SSE41-64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
549 ; SSE41-64-NEXT: cvtdq2ps %xmm0, %xmm0
550 ; SSE41-64-NEXT: retq
552 ; AVX1-32-LABEL: uitofp_v4i1_v4f32:
554 ; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
555 ; AVX1-32-NEXT: vcvtdq2ps %xmm0, %xmm0
558 ; AVX1-64-LABEL: uitofp_v4i1_v4f32:
560 ; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
561 ; AVX1-64-NEXT: vcvtdq2ps %xmm0, %xmm0
564 ; AVX512F-LABEL: uitofp_v4i1_v4f32:
566 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
567 ; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
568 ; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
569 ; AVX512F-NEXT: ret{{[l|q]}}
571 ; AVX512VL-32-LABEL: uitofp_v4i1_v4f32:
572 ; AVX512VL-32: # %bb.0:
573 ; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
574 ; AVX512VL-32-NEXT: vcvtdq2ps %xmm0, %xmm0
575 ; AVX512VL-32-NEXT: retl
577 ; AVX512VL-64-LABEL: uitofp_v4i1_v4f32:
578 ; AVX512VL-64: # %bb.0:
579 ; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
580 ; AVX512VL-64-NEXT: vcvtdq2ps %xmm0, %xmm0
581 ; AVX512VL-64-NEXT: retq
583 ; AVX512DQ-LABEL: uitofp_v4i1_v4f32:
585 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
586 ; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
587 ; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
588 ; AVX512DQ-NEXT: ret{{[l|q]}}
590 ; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f32:
591 ; AVX512DQVL-32: # %bb.0:
592 ; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
593 ; AVX512DQVL-32-NEXT: vcvtdq2ps %xmm0, %xmm0
594 ; AVX512DQVL-32-NEXT: retl
596 ; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f32:
597 ; AVX512DQVL-64: # %bb.0:
598 ; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
599 ; AVX512DQVL-64-NEXT: vcvtdq2ps %xmm0, %xmm0
600 ; AVX512DQVL-64-NEXT: retq
601 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x,
602 metadata !"round.dynamic",
603 metadata !"fpexcept.strict") #0
604 ret <4 x float> %result
607 define <4 x float> @sitofp_v4i8_v4f32(<4 x i8> %x) #0 {
608 ; SSE-LABEL: sitofp_v4i8_v4f32:
610 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
611 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
612 ; SSE-NEXT: psrad $24, %xmm0
613 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
614 ; SSE-NEXT: ret{{[l|q]}}
616 ; SSE41-LABEL: sitofp_v4i8_v4f32:
618 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
619 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
620 ; SSE41-NEXT: psrad $24, %xmm0
621 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
622 ; SSE41-NEXT: ret{{[l|q]}}
624 ; AVX-LABEL: sitofp_v4i8_v4f32:
626 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
627 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
628 ; AVX-NEXT: ret{{[l|q]}}
629 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8> %x,
630 metadata !"round.dynamic",
631 metadata !"fpexcept.strict") #0
632 ret <4 x float> %result
635 define <4 x float> @uitofp_v4i8_v4f32(<4 x i8> %x) #0 {
636 ; SSE-LABEL: uitofp_v4i8_v4f32:
638 ; SSE-NEXT: pxor %xmm1, %xmm1
639 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
640 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
641 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
642 ; SSE-NEXT: ret{{[l|q]}}
644 ; SSE41-LABEL: uitofp_v4i8_v4f32:
646 ; SSE41-NEXT: pxor %xmm1, %xmm1
647 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
648 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
649 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
650 ; SSE41-NEXT: ret{{[l|q]}}
652 ; AVX-LABEL: uitofp_v4i8_v4f32:
654 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
655 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
656 ; AVX-NEXT: ret{{[l|q]}}
657 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8> %x,
658 metadata !"round.dynamic",
659 metadata !"fpexcept.strict") #0
660 ret <4 x float> %result
663 define <4 x float> @sitofp_v4i16_v4f32(<4 x i16> %x) #0 {
664 ; SSE-LABEL: sitofp_v4i16_v4f32:
666 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
667 ; SSE-NEXT: psrad $16, %xmm0
668 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
669 ; SSE-NEXT: ret{{[l|q]}}
671 ; SSE41-LABEL: sitofp_v4i16_v4f32:
673 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
674 ; SSE41-NEXT: psrad $16, %xmm0
675 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
676 ; SSE41-NEXT: ret{{[l|q]}}
678 ; AVX-LABEL: sitofp_v4i16_v4f32:
680 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
681 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
682 ; AVX-NEXT: ret{{[l|q]}}
683 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16> %x,
684 metadata !"round.dynamic",
685 metadata !"fpexcept.strict") #0
686 ret <4 x float> %result
689 define <4 x float> @uitofp_v4i16_v4f32(<4 x i16> %x) #0 {
690 ; SSE-LABEL: uitofp_v4i16_v4f32:
692 ; SSE-NEXT: pxor %xmm1, %xmm1
693 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
694 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
695 ; SSE-NEXT: ret{{[l|q]}}
697 ; SSE41-LABEL: uitofp_v4i16_v4f32:
699 ; SSE41-NEXT: pxor %xmm1, %xmm1
700 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
701 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
702 ; SSE41-NEXT: ret{{[l|q]}}
704 ; AVX-LABEL: uitofp_v4i16_v4f32:
706 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
707 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
708 ; AVX-NEXT: ret{{[l|q]}}
709 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16> %x,
710 metadata !"round.dynamic",
711 metadata !"fpexcept.strict") #0
712 ret <4 x float> %result
715 define <4 x float> @sitofp_v4i32_v4f32(<4 x i32> %x) #0 {
716 ; SSE-LABEL: sitofp_v4i32_v4f32:
718 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
719 ; SSE-NEXT: ret{{[l|q]}}
721 ; SSE41-LABEL: sitofp_v4i32_v4f32:
723 ; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
724 ; SSE41-NEXT: ret{{[l|q]}}
726 ; AVX-LABEL: sitofp_v4i32_v4f32:
728 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
729 ; AVX-NEXT: ret{{[l|q]}}
730 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
731 metadata !"round.dynamic",
732 metadata !"fpexcept.strict") #0
733 ret <4 x float> %result
736 define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 {
737 ; SSE-32-LABEL: uitofp_v4i32_v4f32:
739 ; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
740 ; SSE-32-NEXT: pand %xmm0, %xmm1
741 ; SSE-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
742 ; SSE-32-NEXT: psrld $16, %xmm0
743 ; SSE-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
744 ; SSE-32-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
745 ; SSE-32-NEXT: addps %xmm1, %xmm0
748 ; SSE-64-LABEL: uitofp_v4i32_v4f32:
750 ; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
751 ; SSE-64-NEXT: pand %xmm0, %xmm1
752 ; SSE-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
753 ; SSE-64-NEXT: psrld $16, %xmm0
754 ; SSE-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
755 ; SSE-64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
756 ; SSE-64-NEXT: addps %xmm1, %xmm0
759 ; SSE41-32-LABEL: uitofp_v4i32_v4f32:
761 ; SSE41-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
762 ; SSE41-32-NEXT: pand %xmm0, %xmm1
763 ; SSE41-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
764 ; SSE41-32-NEXT: psrld $16, %xmm0
765 ; SSE41-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
766 ; SSE41-32-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
767 ; SSE41-32-NEXT: addps %xmm1, %xmm0
768 ; SSE41-32-NEXT: retl
770 ; SSE41-64-LABEL: uitofp_v4i32_v4f32:
772 ; SSE41-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
773 ; SSE41-64-NEXT: pand %xmm0, %xmm1
774 ; SSE41-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
775 ; SSE41-64-NEXT: psrld $16, %xmm0
776 ; SSE41-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
777 ; SSE41-64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
778 ; SSE41-64-NEXT: addps %xmm1, %xmm0
779 ; SSE41-64-NEXT: retq
781 ; AVX1-32-LABEL: uitofp_v4i32_v4f32:
783 ; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
784 ; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0
785 ; AVX1-32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
786 ; AVX1-32-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
787 ; AVX1-32-NEXT: vaddps %xmm0, %xmm1, %xmm0
790 ; AVX1-64-LABEL: uitofp_v4i32_v4f32:
792 ; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
793 ; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0
794 ; AVX1-64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
795 ; AVX1-64-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
796 ; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0
799 ; AVX512F-LABEL: uitofp_v4i32_v4f32:
801 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
802 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
803 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
804 ; AVX512F-NEXT: vzeroupper
805 ; AVX512F-NEXT: ret{{[l|q]}}
807 ; AVX512VL-LABEL: uitofp_v4i32_v4f32:
809 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
810 ; AVX512VL-NEXT: ret{{[l|q]}}
812 ; AVX512DQ-LABEL: uitofp_v4i32_v4f32:
814 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
815 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
816 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
817 ; AVX512DQ-NEXT: vzeroupper
818 ; AVX512DQ-NEXT: ret{{[l|q]}}
820 ; AVX512DQVL-LABEL: uitofp_v4i32_v4f32:
821 ; AVX512DQVL: # %bb.0:
822 ; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0
823 ; AVX512DQVL-NEXT: ret{{[l|q]}}
824 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
825 metadata !"round.dynamic",
826 metadata !"fpexcept.strict") #0
827 ret <4 x float> %result
830 define <2 x double> @sitofp_v2i1_v2f64(<2 x i1> %x) #0 {
831 ; SSE-LABEL: sitofp_v2i1_v2f64:
833 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
834 ; SSE-NEXT: pslld $31, %xmm0
835 ; SSE-NEXT: psrad $31, %xmm0
836 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
837 ; SSE-NEXT: ret{{[l|q]}}
839 ; SSE41-LABEL: sitofp_v2i1_v2f64:
841 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
842 ; SSE41-NEXT: pslld $31, %xmm0
843 ; SSE41-NEXT: psrad $31, %xmm0
844 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
845 ; SSE41-NEXT: ret{{[l|q]}}
847 ; AVX-LABEL: sitofp_v2i1_v2f64:
849 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
850 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
851 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
852 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
853 ; AVX-NEXT: ret{{[l|q]}}
854 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1> %x,
855 metadata !"round.dynamic",
856 metadata !"fpexcept.strict") #0
857 ret <2 x double> %result
860 define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
861 ; SSE-32-LABEL: uitofp_v2i1_v2f64:
863 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
864 ; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
865 ; SSE-32-NEXT: cvtdq2pd %xmm0, %xmm0
868 ; SSE-64-LABEL: uitofp_v2i1_v2f64:
870 ; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
871 ; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
872 ; SSE-64-NEXT: cvtdq2pd %xmm0, %xmm0
875 ; SSE41-32-LABEL: uitofp_v2i1_v2f64:
877 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
878 ; SSE41-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
879 ; SSE41-32-NEXT: cvtdq2pd %xmm0, %xmm0
880 ; SSE41-32-NEXT: retl
882 ; SSE41-64-LABEL: uitofp_v2i1_v2f64:
884 ; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
885 ; SSE41-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
886 ; SSE41-64-NEXT: cvtdq2pd %xmm0, %xmm0
887 ; SSE41-64-NEXT: retq
889 ; AVX1-32-LABEL: uitofp_v2i1_v2f64:
891 ; AVX1-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
892 ; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
893 ; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0
896 ; AVX1-64-LABEL: uitofp_v2i1_v2f64:
898 ; AVX1-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
899 ; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
900 ; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0
903 ; AVX512F-LABEL: uitofp_v2i1_v2f64:
905 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
906 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
907 ; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
908 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
909 ; AVX512F-NEXT: ret{{[l|q]}}
911 ; AVX512VL-32-LABEL: uitofp_v2i1_v2f64:
912 ; AVX512VL-32: # %bb.0:
913 ; AVX512VL-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
914 ; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
915 ; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %xmm0
916 ; AVX512VL-32-NEXT: retl
918 ; AVX512VL-64-LABEL: uitofp_v2i1_v2f64:
919 ; AVX512VL-64: # %bb.0:
920 ; AVX512VL-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
921 ; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
922 ; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %xmm0
923 ; AVX512VL-64-NEXT: retq
925 ; AVX512DQ-LABEL: uitofp_v2i1_v2f64:
927 ; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
928 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
929 ; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
930 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
931 ; AVX512DQ-NEXT: ret{{[l|q]}}
933 ; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64:
934 ; AVX512DQVL-32: # %bb.0:
935 ; AVX512DQVL-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
936 ; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
937 ; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %xmm0
938 ; AVX512DQVL-32-NEXT: retl
940 ; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64:
941 ; AVX512DQVL-64: # %bb.0:
942 ; AVX512DQVL-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
943 ; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
944 ; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %xmm0
945 ; AVX512DQVL-64-NEXT: retq
946 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x,
947 metadata !"round.dynamic",
948 metadata !"fpexcept.strict") #0
949 ret <2 x double> %result
952 define <2 x double> @sitofp_v2i8_v2f64(<2 x i8> %x) #0 {
953 ; SSE-LABEL: sitofp_v2i8_v2f64:
955 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
956 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
957 ; SSE-NEXT: psrad $24, %xmm0
958 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
959 ; SSE-NEXT: ret{{[l|q]}}
961 ; SSE41-LABEL: sitofp_v2i8_v2f64:
963 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
964 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
965 ; SSE41-NEXT: psrad $24, %xmm0
966 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
967 ; SSE41-NEXT: ret{{[l|q]}}
969 ; AVX-LABEL: sitofp_v2i8_v2f64:
971 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
972 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
973 ; AVX-NEXT: ret{{[l|q]}}
974 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8> %x,
975 metadata !"round.dynamic",
976 metadata !"fpexcept.strict") #0
977 ret <2 x double> %result
980 define <2 x double> @uitofp_v2i8_v2f64(<2 x i8> %x) #0 {
981 ; SSE-LABEL: uitofp_v2i8_v2f64:
983 ; SSE-NEXT: pxor %xmm1, %xmm1
984 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
985 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
986 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
987 ; SSE-NEXT: ret{{[l|q]}}
989 ; SSE41-LABEL: uitofp_v2i8_v2f64:
991 ; SSE41-NEXT: pxor %xmm1, %xmm1
992 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
993 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
994 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
995 ; SSE41-NEXT: ret{{[l|q]}}
997 ; AVX-LABEL: uitofp_v2i8_v2f64:
999 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1000 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
1001 ; AVX-NEXT: ret{{[l|q]}}
1002 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8> %x,
1003 metadata !"round.dynamic",
1004 metadata !"fpexcept.strict") #0
1005 ret <2 x double> %result
1008 define <2 x double> @sitofp_v2i16_v2f64(<2 x i16> %x) #0 {
1009 ; SSE-LABEL: sitofp_v2i16_v2f64:
1011 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1012 ; SSE-NEXT: psrad $16, %xmm0
1013 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
1014 ; SSE-NEXT: ret{{[l|q]}}
1016 ; SSE41-LABEL: sitofp_v2i16_v2f64:
1018 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1019 ; SSE41-NEXT: psrad $16, %xmm0
1020 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
1021 ; SSE41-NEXT: ret{{[l|q]}}
1023 ; AVX-LABEL: sitofp_v2i16_v2f64:
1025 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1026 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
1027 ; AVX-NEXT: ret{{[l|q]}}
1028 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16> %x,
1029 metadata !"round.dynamic",
1030 metadata !"fpexcept.strict") #0
1031 ret <2 x double> %result
1034 define <2 x double> @uitofp_v2i16_v2f64(<2 x i16> %x) #0 {
1035 ; SSE-LABEL: uitofp_v2i16_v2f64:
1037 ; SSE-NEXT: pxor %xmm1, %xmm1
1038 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1039 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
1040 ; SSE-NEXT: ret{{[l|q]}}
1042 ; SSE41-LABEL: uitofp_v2i16_v2f64:
1044 ; SSE41-NEXT: pxor %xmm1, %xmm1
1045 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1046 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
1047 ; SSE41-NEXT: ret{{[l|q]}}
1049 ; AVX-LABEL: uitofp_v2i16_v2f64:
1051 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1052 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
1053 ; AVX-NEXT: ret{{[l|q]}}
1054 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16> %x,
1055 metadata !"round.dynamic",
1056 metadata !"fpexcept.strict") #0
1057 ret <2 x double> %result
1060 define <2 x double> @sitofp_v2i32_v2f64(<2 x i32> %x) #0 {
1061 ; SSE-LABEL: sitofp_v2i32_v2f64:
1063 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
1064 ; SSE-NEXT: ret{{[l|q]}}
1066 ; SSE41-LABEL: sitofp_v2i32_v2f64:
1068 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
1069 ; SSE41-NEXT: ret{{[l|q]}}
1071 ; AVX-LABEL: sitofp_v2i32_v2f64:
1073 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
1074 ; AVX-NEXT: ret{{[l|q]}}
1075 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
1076 metadata !"round.dynamic",
1077 metadata !"fpexcept.strict") #0
1078 ret <2 x double> %result
1081 define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
1082 ; SSE-LABEL: uitofp_v2i32_v2f64:
1084 ; SSE-NEXT: xorpd %xmm1, %xmm1
1085 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1086 ; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1087 ; SSE-NEXT: orpd %xmm1, %xmm0
1088 ; SSE-NEXT: subpd %xmm1, %xmm0
1089 ; SSE-NEXT: ret{{[l|q]}}
1091 ; SSE41-LABEL: uitofp_v2i32_v2f64:
1093 ; SSE41-NEXT: xorpd %xmm1, %xmm1
1094 ; SSE41-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1095 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1096 ; SSE41-NEXT: orpd %xmm1, %xmm0
1097 ; SSE41-NEXT: subpd %xmm1, %xmm0
1098 ; SSE41-NEXT: ret{{[l|q]}}
1100 ; AVX1-LABEL: uitofp_v2i32_v2f64:
1102 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1103 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1104 ; AVX1-NEXT: # xmm1 = mem[0,0]
1105 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1106 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
1107 ; AVX1-NEXT: ret{{[l|q]}}
1109 ; AVX512F-LABEL: uitofp_v2i32_v2f64:
1111 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1112 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
1113 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1114 ; AVX512F-NEXT: vzeroupper
1115 ; AVX512F-NEXT: ret{{[l|q]}}
1117 ; AVX512VL-LABEL: uitofp_v2i32_v2f64:
1118 ; AVX512VL: # %bb.0:
1119 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
1120 ; AVX512VL-NEXT: ret{{[l|q]}}
1122 ; AVX512DQ-LABEL: uitofp_v2i32_v2f64:
1123 ; AVX512DQ: # %bb.0:
1124 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1125 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
1126 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1127 ; AVX512DQ-NEXT: vzeroupper
1128 ; AVX512DQ-NEXT: ret{{[l|q]}}
1130 ; AVX512DQVL-LABEL: uitofp_v2i32_v2f64:
1131 ; AVX512DQVL: # %bb.0:
1132 ; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %xmm0
1133 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1134 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
1135 metadata !"round.dynamic",
1136 metadata !"fpexcept.strict") #0
1137 ret <2 x double> %result
1140 define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
1141 ; SSE-32-LABEL: sitofp_v2i64_v2f64:
1143 ; SSE-32-NEXT: pushl %ebp
1144 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
1145 ; SSE-32-NEXT: .cfi_offset %ebp, -8
1146 ; SSE-32-NEXT: movl %esp, %ebp
1147 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
1148 ; SSE-32-NEXT: andl $-8, %esp
1149 ; SSE-32-NEXT: subl $32, %esp
1150 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1151 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1152 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1153 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
1154 ; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp)
1155 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
1156 ; SSE-32-NEXT: fstpl (%esp)
1158 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1159 ; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1160 ; SSE-32-NEXT: movl %ebp, %esp
1161 ; SSE-32-NEXT: popl %ebp
1162 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
1165 ; SSE-64-LABEL: sitofp_v2i64_v2f64:
1167 ; SSE-64-NEXT: movq %xmm0, %rax
1168 ; SSE-64-NEXT: cvtsi2sd %rax, %xmm1
1169 ; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1170 ; SSE-64-NEXT: movq %xmm0, %rax
1171 ; SSE-64-NEXT: xorps %xmm0, %xmm0
1172 ; SSE-64-NEXT: cvtsi2sd %rax, %xmm0
1173 ; SSE-64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1174 ; SSE-64-NEXT: movapd %xmm1, %xmm0
1177 ; SSE41-32-LABEL: sitofp_v2i64_v2f64:
1178 ; SSE41-32: # %bb.0:
1179 ; SSE41-32-NEXT: pushl %ebp
1180 ; SSE41-32-NEXT: .cfi_def_cfa_offset 8
1181 ; SSE41-32-NEXT: .cfi_offset %ebp, -8
1182 ; SSE41-32-NEXT: movl %esp, %ebp
1183 ; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
1184 ; SSE41-32-NEXT: andl $-8, %esp
1185 ; SSE41-32-NEXT: subl $32, %esp
1186 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1187 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1188 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1189 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
1190 ; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp)
1191 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
1192 ; SSE41-32-NEXT: fstpl (%esp)
1193 ; SSE41-32-NEXT: wait
1194 ; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1195 ; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1196 ; SSE41-32-NEXT: movl %ebp, %esp
1197 ; SSE41-32-NEXT: popl %ebp
1198 ; SSE41-32-NEXT: .cfi_def_cfa %esp, 4
1199 ; SSE41-32-NEXT: retl
1201 ; SSE41-64-LABEL: sitofp_v2i64_v2f64:
1202 ; SSE41-64: # %bb.0:
1203 ; SSE41-64-NEXT: movq %xmm0, %rax
1204 ; SSE41-64-NEXT: cvtsi2sd %rax, %xmm1
1205 ; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1206 ; SSE41-64-NEXT: movq %xmm0, %rax
1207 ; SSE41-64-NEXT: xorps %xmm0, %xmm0
1208 ; SSE41-64-NEXT: cvtsi2sd %rax, %xmm0
1209 ; SSE41-64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1210 ; SSE41-64-NEXT: movapd %xmm1, %xmm0
1211 ; SSE41-64-NEXT: retq
1213 ; AVX-32-LABEL: sitofp_v2i64_v2f64:
1215 ; AVX-32-NEXT: pushl %ebp
1216 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1217 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1218 ; AVX-32-NEXT: movl %esp, %ebp
1219 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1220 ; AVX-32-NEXT: andl $-8, %esp
1221 ; AVX-32-NEXT: subl $32, %esp
1222 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
1223 ; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
1224 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
1225 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1226 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
1227 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1228 ; AVX-32-NEXT: fstpl (%esp)
1230 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1231 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1232 ; AVX-32-NEXT: movl %ebp, %esp
1233 ; AVX-32-NEXT: popl %ebp
1234 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1237 ; AVX-64-LABEL: sitofp_v2i64_v2f64:
1239 ; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
1240 ; AVX-64-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
1241 ; AVX-64-NEXT: vmovq %xmm0, %rax
1242 ; AVX-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
1243 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1246 ; AVX512DQ-LABEL: sitofp_v2i64_v2f64:
1247 ; AVX512DQ: # %bb.0:
1248 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
1249 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
1250 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1251 ; AVX512DQ-NEXT: vzeroupper
1252 ; AVX512DQ-NEXT: ret{{[l|q]}}
1254 ; AVX512DQVL-LABEL: sitofp_v2i64_v2f64:
1255 ; AVX512DQVL: # %bb.0:
1256 ; AVX512DQVL-NEXT: vcvtqq2pd %xmm0, %xmm0
1257 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1258 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
1259 metadata !"round.dynamic",
1260 metadata !"fpexcept.strict") #0
1261 ret <2 x double> %result
1264 define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
1265 ; SSE-32-LABEL: uitofp_v2i64_v2f64:
1267 ; SSE-32-NEXT: pushl %ebp
1268 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
1269 ; SSE-32-NEXT: .cfi_offset %ebp, -8
1270 ; SSE-32-NEXT: movl %esp, %ebp
1271 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
1272 ; SSE-32-NEXT: andl $-8, %esp
1273 ; SSE-32-NEXT: subl $32, %esp
1274 ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1275 ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1276 ; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
1277 ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1278 ; SSE-32-NEXT: movd %xmm1, %eax
1279 ; SSE-32-NEXT: shrl $31, %eax
1280 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
1281 ; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1282 ; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp)
1284 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1285 ; SSE-32-NEXT: movd %xmm0, %eax
1286 ; SSE-32-NEXT: shrl $31, %eax
1287 ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
1288 ; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1289 ; SSE-32-NEXT: fstpl (%esp)
1291 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1292 ; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1293 ; SSE-32-NEXT: movl %ebp, %esp
1294 ; SSE-32-NEXT: popl %ebp
1295 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
1298 ; SSE-64-LABEL: uitofp_v2i64_v2f64:
1300 ; SSE-64-NEXT: movdqa %xmm0, %xmm1
1301 ; SSE-64-NEXT: movq %xmm0, %rax
1302 ; SSE-64-NEXT: movq %rax, %rcx
1303 ; SSE-64-NEXT: shrq %rcx
1304 ; SSE-64-NEXT: movl %eax, %edx
1305 ; SSE-64-NEXT: andl $1, %edx
1306 ; SSE-64-NEXT: orq %rcx, %rdx
1307 ; SSE-64-NEXT: testq %rax, %rax
1308 ; SSE-64-NEXT: cmovnsq %rax, %rdx
1309 ; SSE-64-NEXT: xorps %xmm0, %xmm0
1310 ; SSE-64-NEXT: cvtsi2sd %rdx, %xmm0
1311 ; SSE-64-NEXT: jns .LBB21_2
1312 ; SSE-64-NEXT: # %bb.1:
1313 ; SSE-64-NEXT: addsd %xmm0, %xmm0
1314 ; SSE-64-NEXT: .LBB21_2:
1315 ; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1316 ; SSE-64-NEXT: movq %xmm1, %rax
1317 ; SSE-64-NEXT: movq %rax, %rcx
1318 ; SSE-64-NEXT: shrq %rcx
1319 ; SSE-64-NEXT: movl %eax, %edx
1320 ; SSE-64-NEXT: andl $1, %edx
1321 ; SSE-64-NEXT: orq %rcx, %rdx
1322 ; SSE-64-NEXT: testq %rax, %rax
1323 ; SSE-64-NEXT: cmovnsq %rax, %rdx
1324 ; SSE-64-NEXT: xorps %xmm1, %xmm1
1325 ; SSE-64-NEXT: cvtsi2sd %rdx, %xmm1
1326 ; SSE-64-NEXT: jns .LBB21_4
1327 ; SSE-64-NEXT: # %bb.3:
1328 ; SSE-64-NEXT: addsd %xmm1, %xmm1
1329 ; SSE-64-NEXT: .LBB21_4:
1330 ; SSE-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1333 ; SSE41-32-LABEL: uitofp_v2i64_v2f64:
1334 ; SSE41-32: # %bb.0:
1335 ; SSE41-32-NEXT: pushl %ebp
1336 ; SSE41-32-NEXT: .cfi_def_cfa_offset 8
1337 ; SSE41-32-NEXT: .cfi_offset %ebp, -8
1338 ; SSE41-32-NEXT: movl %esp, %ebp
1339 ; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
1340 ; SSE41-32-NEXT: andl $-8, %esp
1341 ; SSE41-32-NEXT: subl $32, %esp
1342 ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
1343 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1344 ; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
1345 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1346 ; SSE41-32-NEXT: movd %xmm1, %eax
1347 ; SSE41-32-NEXT: shrl $31, %eax
1348 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
1349 ; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1350 ; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp)
1351 ; SSE41-32-NEXT: wait
1352 ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1353 ; SSE41-32-NEXT: movd %xmm0, %eax
1354 ; SSE41-32-NEXT: shrl $31, %eax
1355 ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
1356 ; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1357 ; SSE41-32-NEXT: fstpl (%esp)
1358 ; SSE41-32-NEXT: wait
1359 ; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1360 ; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1361 ; SSE41-32-NEXT: movl %ebp, %esp
1362 ; SSE41-32-NEXT: popl %ebp
1363 ; SSE41-32-NEXT: .cfi_def_cfa %esp, 4
1364 ; SSE41-32-NEXT: retl
1366 ; SSE41-64-LABEL: uitofp_v2i64_v2f64:
1367 ; SSE41-64: # %bb.0:
1368 ; SSE41-64-NEXT: movdqa %xmm0, %xmm1
1369 ; SSE41-64-NEXT: movq %xmm0, %rax
1370 ; SSE41-64-NEXT: movq %rax, %rcx
1371 ; SSE41-64-NEXT: shrq %rcx
1372 ; SSE41-64-NEXT: movl %eax, %edx
1373 ; SSE41-64-NEXT: andl $1, %edx
1374 ; SSE41-64-NEXT: orq %rcx, %rdx
1375 ; SSE41-64-NEXT: testq %rax, %rax
1376 ; SSE41-64-NEXT: cmovnsq %rax, %rdx
1377 ; SSE41-64-NEXT: xorps %xmm0, %xmm0
1378 ; SSE41-64-NEXT: cvtsi2sd %rdx, %xmm0
1379 ; SSE41-64-NEXT: jns .LBB21_2
1380 ; SSE41-64-NEXT: # %bb.1:
1381 ; SSE41-64-NEXT: addsd %xmm0, %xmm0
1382 ; SSE41-64-NEXT: .LBB21_2:
1383 ; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1384 ; SSE41-64-NEXT: movq %xmm1, %rax
1385 ; SSE41-64-NEXT: movq %rax, %rcx
1386 ; SSE41-64-NEXT: shrq %rcx
1387 ; SSE41-64-NEXT: movl %eax, %edx
1388 ; SSE41-64-NEXT: andl $1, %edx
1389 ; SSE41-64-NEXT: orq %rcx, %rdx
1390 ; SSE41-64-NEXT: testq %rax, %rax
1391 ; SSE41-64-NEXT: cmovnsq %rax, %rdx
1392 ; SSE41-64-NEXT: xorps %xmm1, %xmm1
1393 ; SSE41-64-NEXT: cvtsi2sd %rdx, %xmm1
1394 ; SSE41-64-NEXT: jns .LBB21_4
1395 ; SSE41-64-NEXT: # %bb.3:
1396 ; SSE41-64-NEXT: addsd %xmm1, %xmm1
1397 ; SSE41-64-NEXT: .LBB21_4:
1398 ; SSE41-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1399 ; SSE41-64-NEXT: retq
1401 ; AVX-32-LABEL: uitofp_v2i64_v2f64:
1403 ; AVX-32-NEXT: pushl %ebp
1404 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1405 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1406 ; AVX-32-NEXT: movl %esp, %ebp
1407 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1408 ; AVX-32-NEXT: andl $-8, %esp
1409 ; AVX-32-NEXT: subl $32, %esp
1410 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
1411 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1412 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
1413 ; AVX-32-NEXT: vextractps $1, %xmm0, %eax
1414 ; AVX-32-NEXT: shrl $31, %eax
1415 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1416 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1417 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
1419 ; AVX-32-NEXT: vextractps $3, %xmm0, %eax
1420 ; AVX-32-NEXT: shrl $31, %eax
1421 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1422 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1423 ; AVX-32-NEXT: fstpl (%esp)
1425 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1426 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1427 ; AVX-32-NEXT: movl %ebp, %esp
1428 ; AVX-32-NEXT: popl %ebp
1429 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1432 ; AVX1-64-LABEL: uitofp_v2i64_v2f64:
1434 ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
1435 ; AVX1-64-NEXT: movq %rax, %rcx
1436 ; AVX1-64-NEXT: shrq %rcx
1437 ; AVX1-64-NEXT: movl %eax, %edx
1438 ; AVX1-64-NEXT: andl $1, %edx
1439 ; AVX1-64-NEXT: orq %rcx, %rdx
1440 ; AVX1-64-NEXT: testq %rax, %rax
1441 ; AVX1-64-NEXT: cmovnsq %rax, %rdx
1442 ; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
1443 ; AVX1-64-NEXT: jns .LBB21_2
1444 ; AVX1-64-NEXT: # %bb.1:
1445 ; AVX1-64-NEXT: vaddsd %xmm1, %xmm1, %xmm1
1446 ; AVX1-64-NEXT: .LBB21_2:
1447 ; AVX1-64-NEXT: vmovq %xmm0, %rax
1448 ; AVX1-64-NEXT: movq %rax, %rcx
1449 ; AVX1-64-NEXT: shrq %rcx
1450 ; AVX1-64-NEXT: movl %eax, %edx
1451 ; AVX1-64-NEXT: andl $1, %edx
1452 ; AVX1-64-NEXT: orq %rcx, %rdx
1453 ; AVX1-64-NEXT: testq %rax, %rax
1454 ; AVX1-64-NEXT: cmovnsq %rax, %rdx
1455 ; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0
1456 ; AVX1-64-NEXT: jns .LBB21_4
1457 ; AVX1-64-NEXT: # %bb.3:
1458 ; AVX1-64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
1459 ; AVX1-64-NEXT: .LBB21_4:
1460 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1461 ; AVX1-64-NEXT: retq
1463 ; AVX512F-64-LABEL: uitofp_v2i64_v2f64:
1464 ; AVX512F-64: # %bb.0:
1465 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
1466 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
1467 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
1468 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0
1469 ; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1470 ; AVX512F-64-NEXT: retq
1472 ; AVX512VL-64-LABEL: uitofp_v2i64_v2f64:
1473 ; AVX512VL-64: # %bb.0:
1474 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
1475 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
1476 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
1477 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0
1478 ; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1479 ; AVX512VL-64-NEXT: retq
1481 ; AVX512DQ-LABEL: uitofp_v2i64_v2f64:
1482 ; AVX512DQ: # %bb.0:
1483 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
1484 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
1485 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1486 ; AVX512DQ-NEXT: vzeroupper
1487 ; AVX512DQ-NEXT: ret{{[l|q]}}
1489 ; AVX512DQVL-LABEL: uitofp_v2i64_v2f64:
1490 ; AVX512DQVL: # %bb.0:
1491 ; AVX512DQVL-NEXT: vcvtuqq2pd %xmm0, %xmm0
1492 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1493 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
1494 metadata !"round.dynamic",
1495 metadata !"fpexcept.strict") #0
1496 ret <2 x double> %result
1499 attributes #0 = { strictfp }