1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX1-64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX-32,AVX2-32
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX2-64
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-32,AVX512F-32
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
15 declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
16 declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
17 declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8>, metadata, metadata)
18 declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8>, metadata, metadata)
19 declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16>, metadata, metadata)
20 declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16>, metadata, metadata)
21 declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32>, metadata, metadata)
22 declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32>, metadata, metadata)
23 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1>, metadata, metadata)
24 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1>, metadata, metadata)
25 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8>, metadata, metadata)
26 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8>, metadata, metadata)
27 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16>, metadata, metadata)
28 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16>, metadata, metadata)
29 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
30 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
31 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
32 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
33 declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
34 declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
36 define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 {
37 ; AVX1-LABEL: sitofp_v8i1_v8f32:
39 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
40 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
41 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
42 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
43 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
44 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
45 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
46 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
47 ; AVX1-NEXT: ret{{[l|q]}}
49 ; AVX2-LABEL: sitofp_v8i1_v8f32:
51 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
52 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
53 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
54 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
55 ; AVX2-NEXT: ret{{[l|q]}}
57 ; AVX512F-LABEL: sitofp_v8i1_v8f32:
59 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
61 ; AVX512F-NEXT: vpsrad $31, %ymm0, %ymm0
62 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
63 ; AVX512F-NEXT: ret{{[l|q]}}
65 ; AVX512VL-LABEL: sitofp_v8i1_v8f32:
67 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
69 ; AVX512VL-NEXT: vpsrad $31, %ymm0, %ymm0
70 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0
71 ; AVX512VL-NEXT: ret{{[l|q]}}
73 ; AVX512DQ-LABEL: sitofp_v8i1_v8f32:
75 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
76 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
77 ; AVX512DQ-NEXT: vpsrad $31, %ymm0, %ymm0
78 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
79 ; AVX512DQ-NEXT: ret{{[l|q]}}
81 ; AVX512DQVL-LABEL: sitofp_v8i1_v8f32:
82 ; AVX512DQVL: # %bb.0:
83 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
84 ; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0
85 ; AVX512DQVL-NEXT: vpsrad $31, %ymm0, %ymm0
86 ; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0
87 ; AVX512DQVL-NEXT: ret{{[l|q]}}
88 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1> %x,
89 metadata !"round.dynamic",
90 metadata !"fpexcept.strict") #0
91 ret <8 x float> %result
94 define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 {
95 ; AVX1-32-LABEL: uitofp_v8i1_v8f32:
97 ; AVX1-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
98 ; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
99 ; AVX1-32-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
100 ; AVX1-32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
101 ; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
102 ; AVX1-32-NEXT: vcvtdq2ps %ymm0, %ymm0
105 ; AVX1-64-LABEL: uitofp_v8i1_v8f32:
107 ; AVX1-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
108 ; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
109 ; AVX1-64-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
110 ; AVX1-64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
111 ; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
112 ; AVX1-64-NEXT: vcvtdq2ps %ymm0, %ymm0
115 ; AVX2-32-LABEL: uitofp_v8i1_v8f32:
117 ; AVX2-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
118 ; AVX2-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
119 ; AVX2-32-NEXT: vcvtdq2ps %ymm0, %ymm0
122 ; AVX2-64-LABEL: uitofp_v8i1_v8f32:
124 ; AVX2-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
125 ; AVX2-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
126 ; AVX2-64-NEXT: vcvtdq2ps %ymm0, %ymm0
129 ; AVX512F-32-LABEL: uitofp_v8i1_v8f32:
130 ; AVX512F-32: # %bb.0:
131 ; AVX512F-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
132 ; AVX512F-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
133 ; AVX512F-32-NEXT: vcvtdq2ps %ymm0, %ymm0
134 ; AVX512F-32-NEXT: retl
136 ; AVX512F-64-LABEL: uitofp_v8i1_v8f32:
137 ; AVX512F-64: # %bb.0:
138 ; AVX512F-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
139 ; AVX512F-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
140 ; AVX512F-64-NEXT: vcvtdq2ps %ymm0, %ymm0
141 ; AVX512F-64-NEXT: retq
143 ; AVX512VL-32-LABEL: uitofp_v8i1_v8f32:
144 ; AVX512VL-32: # %bb.0:
145 ; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
146 ; AVX512VL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
147 ; AVX512VL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
148 ; AVX512VL-32-NEXT: retl
150 ; AVX512VL-64-LABEL: uitofp_v8i1_v8f32:
151 ; AVX512VL-64: # %bb.0:
152 ; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
153 ; AVX512VL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
154 ; AVX512VL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
155 ; AVX512VL-64-NEXT: retq
157 ; AVX512DQ-32-LABEL: uitofp_v8i1_v8f32:
158 ; AVX512DQ-32: # %bb.0:
159 ; AVX512DQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
160 ; AVX512DQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
161 ; AVX512DQ-32-NEXT: vcvtdq2ps %ymm0, %ymm0
162 ; AVX512DQ-32-NEXT: retl
164 ; AVX512DQ-64-LABEL: uitofp_v8i1_v8f32:
165 ; AVX512DQ-64: # %bb.0:
166 ; AVX512DQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167 ; AVX512DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
168 ; AVX512DQ-64-NEXT: vcvtdq2ps %ymm0, %ymm0
169 ; AVX512DQ-64-NEXT: retq
171 ; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32:
172 ; AVX512DQVL-32: # %bb.0:
173 ; AVX512DQVL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
174 ; AVX512DQVL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
175 ; AVX512DQVL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
176 ; AVX512DQVL-32-NEXT: retl
178 ; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32:
179 ; AVX512DQVL-64: # %bb.0:
180 ; AVX512DQVL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
181 ; AVX512DQVL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
182 ; AVX512DQVL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
183 ; AVX512DQVL-64-NEXT: retq
184 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x,
185 metadata !"round.dynamic",
186 metadata !"fpexcept.strict") #0
187 ret <8 x float> %result
190 define <8 x float> @sitofp_v8i8_v8f32(<8 x i8> %x) #0 {
191 ; AVX1-LABEL: sitofp_v8i8_v8f32:
193 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
194 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
195 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
196 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
197 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
198 ; AVX1-NEXT: ret{{[l|q]}}
200 ; AVX2-LABEL: sitofp_v8i8_v8f32:
202 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
203 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
204 ; AVX2-NEXT: ret{{[l|q]}}
206 ; AVX512F-LABEL: sitofp_v8i8_v8f32:
208 ; AVX512F-NEXT: vpmovsxbd %xmm0, %ymm0
209 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
210 ; AVX512F-NEXT: ret{{[l|q]}}
212 ; AVX512VL-LABEL: sitofp_v8i8_v8f32:
214 ; AVX512VL-NEXT: vpmovsxbd %xmm0, %ymm0
215 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0
216 ; AVX512VL-NEXT: ret{{[l|q]}}
218 ; AVX512DQ-LABEL: sitofp_v8i8_v8f32:
220 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %ymm0
221 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
222 ; AVX512DQ-NEXT: ret{{[l|q]}}
224 ; AVX512DQVL-LABEL: sitofp_v8i8_v8f32:
225 ; AVX512DQVL: # %bb.0:
226 ; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %ymm0
227 ; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0
228 ; AVX512DQVL-NEXT: ret{{[l|q]}}
229 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8> %x,
230 metadata !"round.dynamic",
231 metadata !"fpexcept.strict") #0
232 ret <8 x float> %result
235 define <8 x float> @uitofp_v8i8_v8f32(<8 x i8> %x) #0 {
236 ; AVX1-LABEL: uitofp_v8i8_v8f32:
238 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
239 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
240 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
241 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
242 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
243 ; AVX1-NEXT: ret{{[l|q]}}
245 ; AVX2-LABEL: uitofp_v8i8_v8f32:
247 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
248 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
249 ; AVX2-NEXT: ret{{[l|q]}}
251 ; AVX512F-LABEL: uitofp_v8i8_v8f32:
253 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
254 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
255 ; AVX512F-NEXT: ret{{[l|q]}}
257 ; AVX512VL-LABEL: uitofp_v8i8_v8f32:
259 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
260 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0
261 ; AVX512VL-NEXT: ret{{[l|q]}}
263 ; AVX512DQ-LABEL: uitofp_v8i8_v8f32:
265 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
266 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
267 ; AVX512DQ-NEXT: ret{{[l|q]}}
269 ; AVX512DQVL-LABEL: uitofp_v8i8_v8f32:
270 ; AVX512DQVL: # %bb.0:
271 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
272 ; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0
273 ; AVX512DQVL-NEXT: ret{{[l|q]}}
274 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8> %x,
275 metadata !"round.dynamic",
276 metadata !"fpexcept.strict") #0
277 ret <8 x float> %result
280 define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 {
281 ; AVX1-LABEL: sitofp_v8i16_v8f32:
283 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
284 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
285 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
286 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
287 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
288 ; AVX1-NEXT: ret{{[l|q]}}
290 ; AVX2-LABEL: sitofp_v8i16_v8f32:
292 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
293 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
294 ; AVX2-NEXT: ret{{[l|q]}}
296 ; AVX512F-LABEL: sitofp_v8i16_v8f32:
298 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
299 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
300 ; AVX512F-NEXT: ret{{[l|q]}}
302 ; AVX512VL-LABEL: sitofp_v8i16_v8f32:
304 ; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
305 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0
306 ; AVX512VL-NEXT: ret{{[l|q]}}
308 ; AVX512DQ-LABEL: sitofp_v8i16_v8f32:
310 ; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0
311 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
312 ; AVX512DQ-NEXT: ret{{[l|q]}}
314 ; AVX512DQVL-LABEL: sitofp_v8i16_v8f32:
315 ; AVX512DQVL: # %bb.0:
316 ; AVX512DQVL-NEXT: vpmovsxwd %xmm0, %ymm0
317 ; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0
318 ; AVX512DQVL-NEXT: ret{{[l|q]}}
319 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %x,
320 metadata !"round.dynamic",
321 metadata !"fpexcept.strict") #0
322 ret <8 x float> %result
325 define <8 x float> @uitofp_v8i16_v8f32(<8 x i16> %x) #0 {
326 ; AVX1-LABEL: uitofp_v8i16_v8f32:
328 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
329 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
330 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
331 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
332 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
333 ; AVX1-NEXT: ret{{[l|q]}}
335 ; AVX2-LABEL: uitofp_v8i16_v8f32:
337 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
338 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
339 ; AVX2-NEXT: ret{{[l|q]}}
341 ; AVX512F-LABEL: uitofp_v8i16_v8f32:
343 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
344 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
345 ; AVX512F-NEXT: ret{{[l|q]}}
347 ; AVX512VL-LABEL: uitofp_v8i16_v8f32:
349 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
350 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0
351 ; AVX512VL-NEXT: ret{{[l|q]}}
353 ; AVX512DQ-LABEL: uitofp_v8i16_v8f32:
355 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
356 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
357 ; AVX512DQ-NEXT: ret{{[l|q]}}
359 ; AVX512DQVL-LABEL: uitofp_v8i16_v8f32:
360 ; AVX512DQVL: # %bb.0:
361 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
362 ; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0
363 ; AVX512DQVL-NEXT: ret{{[l|q]}}
364 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %x,
365 metadata !"round.dynamic",
366 metadata !"fpexcept.strict") #0
367 ret <8 x float> %result
370 define <8 x float> @sitofp_v8i32_v8f32(<8 x i32> %x) #0 {
371 ; CHECK-LABEL: sitofp_v8i32_v8f32:
373 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
374 ; CHECK-NEXT: ret{{[l|q]}}
375 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32> %x,
376 metadata !"round.dynamic",
377 metadata !"fpexcept.strict") #0
378 ret <8 x float> %result
381 define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 {
382 ; AVX1-32-LABEL: uitofp_v8i32_v8f32:
384 ; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm1
385 ; AVX1-32-NEXT: vextractf128 $1, %ymm0, %xmm2
386 ; AVX1-32-NEXT: vpsrld $16, %xmm2, %xmm2
387 ; AVX1-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
388 ; AVX1-32-NEXT: vcvtdq2ps %ymm1, %ymm1
389 ; AVX1-32-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
390 ; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
391 ; AVX1-32-NEXT: vcvtdq2ps %ymm0, %ymm0
392 ; AVX1-32-NEXT: vaddps %ymm0, %ymm1, %ymm0
395 ; AVX1-64-LABEL: uitofp_v8i32_v8f32:
397 ; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm1
398 ; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2
399 ; AVX1-64-NEXT: vpsrld $16, %xmm2, %xmm2
400 ; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
401 ; AVX1-64-NEXT: vcvtdq2ps %ymm1, %ymm1
402 ; AVX1-64-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
403 ; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
404 ; AVX1-64-NEXT: vcvtdq2ps %ymm0, %ymm0
405 ; AVX1-64-NEXT: vaddps %ymm0, %ymm1, %ymm0
408 ; AVX2-LABEL: uitofp_v8i32_v8f32:
410 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
411 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
412 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
413 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
414 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
415 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
416 ; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0
417 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
418 ; AVX2-NEXT: ret{{[l|q]}}
420 ; AVX512F-LABEL: uitofp_v8i32_v8f32:
422 ; AVX512F-NEXT: vmovaps %ymm0, %ymm0
423 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
424 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
425 ; AVX512F-NEXT: ret{{[l|q]}}
427 ; AVX512VL-LABEL: uitofp_v8i32_v8f32:
429 ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
430 ; AVX512VL-NEXT: ret{{[l|q]}}
432 ; AVX512DQ-LABEL: uitofp_v8i32_v8f32:
434 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
435 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
436 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
437 ; AVX512DQ-NEXT: ret{{[l|q]}}
439 ; AVX512DQVL-LABEL: uitofp_v8i32_v8f32:
440 ; AVX512DQVL: # %bb.0:
441 ; AVX512DQVL-NEXT: vcvtudq2ps %ymm0, %ymm0
442 ; AVX512DQVL-NEXT: ret{{[l|q]}}
443 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x,
444 metadata !"round.dynamic",
445 metadata !"fpexcept.strict") #0
446 ret <8 x float> %result
449 define <4 x double> @sitofp_v4i1_v4f64(<4 x i1> %x) #0 {
450 ; CHECK-LABEL: sitofp_v4i1_v4f64:
452 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
453 ; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
454 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
455 ; CHECK-NEXT: ret{{[l|q]}}
456 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1> %x,
457 metadata !"round.dynamic",
458 metadata !"fpexcept.strict") #0
459 ret <4 x double> %result
462 define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 {
463 ; AVX1-32-LABEL: uitofp_v4i1_v4f64:
465 ; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
466 ; AVX1-32-NEXT: vcvtdq2pd %xmm0, %ymm0
469 ; AVX1-64-LABEL: uitofp_v4i1_v4f64:
471 ; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
472 ; AVX1-64-NEXT: vcvtdq2pd %xmm0, %ymm0
475 ; AVX2-LABEL: uitofp_v4i1_v4f64:
477 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
478 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
479 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
480 ; AVX2-NEXT: ret{{[l|q]}}
482 ; AVX512F-LABEL: uitofp_v4i1_v4f64:
484 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
485 ; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
486 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
487 ; AVX512F-NEXT: ret{{[l|q]}}
489 ; AVX512VL-32-LABEL: uitofp_v4i1_v4f64:
490 ; AVX512VL-32: # %bb.0:
491 ; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
492 ; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %ymm0
493 ; AVX512VL-32-NEXT: retl
495 ; AVX512VL-64-LABEL: uitofp_v4i1_v4f64:
496 ; AVX512VL-64: # %bb.0:
497 ; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
498 ; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %ymm0
499 ; AVX512VL-64-NEXT: retq
501 ; AVX512DQ-LABEL: uitofp_v4i1_v4f64:
503 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
504 ; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
505 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
506 ; AVX512DQ-NEXT: ret{{[l|q]}}
508 ; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f64:
509 ; AVX512DQVL-32: # %bb.0:
510 ; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
511 ; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %ymm0
512 ; AVX512DQVL-32-NEXT: retl
514 ; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f64:
515 ; AVX512DQVL-64: # %bb.0:
516 ; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
517 ; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %ymm0
518 ; AVX512DQVL-64-NEXT: retq
519 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x,
520 metadata !"round.dynamic",
521 metadata !"fpexcept.strict") #0
522 ret <4 x double> %result
525 define <4 x double> @sitofp_v4i8_v4f64(<4 x i8> %x) #0 {
526 ; CHECK-LABEL: sitofp_v4i8_v4f64:
528 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
529 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
530 ; CHECK-NEXT: ret{{[l|q]}}
531 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8> %x,
532 metadata !"round.dynamic",
533 metadata !"fpexcept.strict") #0
534 ret <4 x double> %result
537 define <4 x double> @uitofp_v4i8_v4f64(<4 x i8> %x) #0 {
538 ; CHECK-LABEL: uitofp_v4i8_v4f64:
540 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
541 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
542 ; CHECK-NEXT: ret{{[l|q]}}
543 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8> %x,
544 metadata !"round.dynamic",
545 metadata !"fpexcept.strict") #0
546 ret <4 x double> %result
549 define <4 x double> @sitofp_v4i16_v4f64(<4 x i16> %x) #0 {
550 ; CHECK-LABEL: sitofp_v4i16_v4f64:
552 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
553 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
554 ; CHECK-NEXT: ret{{[l|q]}}
555 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16> %x,
556 metadata !"round.dynamic",
557 metadata !"fpexcept.strict") #0
558 ret <4 x double> %result
561 define <4 x double> @uitofp_v4i16_v4f64(<4 x i16> %x) #0 {
562 ; CHECK-LABEL: uitofp_v4i16_v4f64:
564 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
565 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
566 ; CHECK-NEXT: ret{{[l|q]}}
567 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16> %x,
568 metadata !"round.dynamic",
569 metadata !"fpexcept.strict") #0
570 ret <4 x double> %result
573 define <4 x double> @sitofp_v4i32_v4f64(<4 x i32> %x) #0 {
574 ; CHECK-LABEL: sitofp_v4i32_v4f64:
576 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
577 ; CHECK-NEXT: ret{{[l|q]}}
578 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
579 metadata !"round.dynamic",
580 metadata !"fpexcept.strict") #0
581 ret <4 x double> %result
584 define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 {
585 ; AVX1-LABEL: uitofp_v4i32_v4f64:
587 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
588 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
589 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
590 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
591 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
592 ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
593 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
594 ; AVX1-NEXT: ret{{[l|q]}}
596 ; AVX2-LABEL: uitofp_v4i32_v4f64:
598 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
599 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
600 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
601 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
602 ; AVX2-NEXT: ret{{[l|q]}}
604 ; AVX512F-LABEL: uitofp_v4i32_v4f64:
606 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
607 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
608 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
609 ; AVX512F-NEXT: ret{{[l|q]}}
611 ; AVX512VL-LABEL: uitofp_v4i32_v4f64:
613 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
614 ; AVX512VL-NEXT: ret{{[l|q]}}
616 ; AVX512DQ-LABEL: uitofp_v4i32_v4f64:
618 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
619 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
620 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
621 ; AVX512DQ-NEXT: ret{{[l|q]}}
623 ; AVX512DQVL-LABEL: uitofp_v4i32_v4f64:
624 ; AVX512DQVL: # %bb.0:
625 ; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %ymm0
626 ; AVX512DQVL-NEXT: ret{{[l|q]}}
627 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
628 metadata !"round.dynamic",
629 metadata !"fpexcept.strict") #0
630 ret <4 x double> %result
633 define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
634 ; AVX-32-LABEL: sitofp_v4i64_v4f64:
636 ; AVX-32-NEXT: pushl %ebp
637 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
638 ; AVX-32-NEXT: .cfi_offset %ebp, -8
639 ; AVX-32-NEXT: movl %esp, %ebp
640 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
641 ; AVX-32-NEXT: andl $-8, %esp
642 ; AVX-32-NEXT: subl $64, %esp
643 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
644 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
645 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
646 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
647 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
648 ; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
649 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
650 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
651 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
652 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
653 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
654 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
655 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
656 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
657 ; AVX-32-NEXT: fstpl (%esp)
659 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
660 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
661 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
662 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
663 ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
664 ; AVX-32-NEXT: movl %ebp, %esp
665 ; AVX-32-NEXT: popl %ebp
666 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
669 ; AVX1-64-LABEL: sitofp_v4i64_v4f64:
671 ; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1
672 ; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
673 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
674 ; AVX1-64-NEXT: vmovq %xmm1, %rax
675 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
676 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
677 ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
678 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
679 ; AVX1-64-NEXT: vmovq %xmm0, %rax
680 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
681 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
682 ; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
685 ; AVX2-64-LABEL: sitofp_v4i64_v4f64:
687 ; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm1
688 ; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
689 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
690 ; AVX2-64-NEXT: vmovq %xmm1, %rax
691 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
692 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
693 ; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
694 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
695 ; AVX2-64-NEXT: vmovq %xmm0, %rax
696 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
697 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
698 ; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
701 ; AVX512F-64-LABEL: sitofp_v4i64_v4f64:
702 ; AVX512F-64: # %bb.0:
703 ; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1
704 ; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax
705 ; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
706 ; AVX512F-64-NEXT: vmovq %xmm1, %rax
707 ; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
708 ; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
709 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
710 ; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
711 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
712 ; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
713 ; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
714 ; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
715 ; AVX512F-64-NEXT: retq
717 ; AVX512VL-64-LABEL: sitofp_v4i64_v4f64:
718 ; AVX512VL-64: # %bb.0:
719 ; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1
720 ; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax
721 ; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
722 ; AVX512VL-64-NEXT: vmovq %xmm1, %rax
723 ; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
724 ; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
725 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
726 ; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
727 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
728 ; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
729 ; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
730 ; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
731 ; AVX512VL-64-NEXT: retq
733 ; AVX512DQ-LABEL: sitofp_v4i64_v4f64:
735 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
736 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
737 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
738 ; AVX512DQ-NEXT: ret{{[l|q]}}
740 ; AVX512DQVL-LABEL: sitofp_v4i64_v4f64:
741 ; AVX512DQVL: # %bb.0:
742 ; AVX512DQVL-NEXT: vcvtqq2pd %ymm0, %ymm0
743 ; AVX512DQVL-NEXT: ret{{[l|q]}}
744 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
745 metadata !"round.dynamic",
746 metadata !"fpexcept.strict") #0
747 ret <4 x double> %result
750 define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
751 ; AVX-32-LABEL: uitofp_v4i64_v4f64:
753 ; AVX-32-NEXT: pushl %ebp
754 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
755 ; AVX-32-NEXT: .cfi_offset %ebp, -8
756 ; AVX-32-NEXT: movl %esp, %ebp
757 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
758 ; AVX-32-NEXT: andl $-8, %esp
759 ; AVX-32-NEXT: subl $64, %esp
760 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
761 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
762 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
763 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
764 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
765 ; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3]
766 ; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
767 ; AVX-32-NEXT: vextractps $1, %xmm0, %eax
768 ; AVX-32-NEXT: shrl $31, %eax
769 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
770 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
771 ; AVX-32-NEXT: fstpl (%esp)
773 ; AVX-32-NEXT: vextractps $3, %xmm0, %eax
774 ; AVX-32-NEXT: shrl $31, %eax
775 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
776 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
777 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
779 ; AVX-32-NEXT: vextractps $1, %xmm1, %eax
780 ; AVX-32-NEXT: shrl $31, %eax
781 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
782 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
783 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
785 ; AVX-32-NEXT: vextractps $3, %xmm1, %eax
786 ; AVX-32-NEXT: shrl $31, %eax
787 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
788 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
789 ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
791 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
792 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
793 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
794 ; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
795 ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
796 ; AVX-32-NEXT: movl %ebp, %esp
797 ; AVX-32-NEXT: popl %ebp
798 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
801 ; AVX1-64-LABEL: uitofp_v4i64_v4f64:
803 ; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1
804 ; AVX1-64-NEXT: vpextrd $2, %xmm1, %eax
805 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
806 ; AVX1-64-NEXT: vmovd %xmm1, %eax
807 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
808 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
809 ; AVX1-64-NEXT: vextractps $2, %xmm0, %eax
810 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
811 ; AVX1-64-NEXT: vmovq %xmm0, %rax
812 ; AVX1-64-NEXT: movl %eax, %eax
813 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
814 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
815 ; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
816 ; AVX1-64-NEXT: vpextrd $3, %xmm1, %eax
817 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
818 ; AVX1-64-NEXT: vpextrd $1, %xmm1, %eax
819 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
820 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
821 ; AVX1-64-NEXT: vpextrd $3, %xmm0, %eax
822 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
823 ; AVX1-64-NEXT: vpextrd $1, %xmm0, %eax
824 ; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
825 ; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
826 ; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
827 ; AVX1-64-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
828 ; AVX1-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
831 ; AVX2-64-LABEL: uitofp_v4i64_v4f64:
833 ; AVX2-64-NEXT: vextractf128 $1, %ymm0, %xmm1
834 ; AVX2-64-NEXT: vextractps $3, %xmm1, %eax
835 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
836 ; AVX2-64-NEXT: vextractps $1, %xmm1, %eax
837 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
838 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
839 ; AVX2-64-NEXT: vextractps $3, %xmm0, %eax
840 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
841 ; AVX2-64-NEXT: vextractps $1, %xmm0, %eax
842 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
843 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
844 ; AVX2-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
845 ; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm3 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9]
846 ; AVX2-64-NEXT: vmulpd %ymm3, %ymm2, %ymm2
847 ; AVX2-64-NEXT: vextractps $2, %xmm1, %eax
848 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
849 ; AVX2-64-NEXT: vmovd %xmm1, %eax
850 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
851 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
852 ; AVX2-64-NEXT: vextractps $2, %xmm0, %eax
853 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
854 ; AVX2-64-NEXT: vmovq %xmm0, %rax
855 ; AVX2-64-NEXT: movl %eax, %eax
856 ; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
857 ; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
858 ; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
859 ; AVX2-64-NEXT: vaddpd %ymm0, %ymm2, %ymm0
862 ; AVX512F-64-LABEL: uitofp_v4i64_v4f64:
863 ; AVX512F-64: # %bb.0:
864 ; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1
865 ; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax
866 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
867 ; AVX512F-64-NEXT: vmovq %xmm1, %rax
868 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
869 ; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
870 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
871 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2
872 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
873 ; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
874 ; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
875 ; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
876 ; AVX512F-64-NEXT: retq
878 ; AVX512VL-64-LABEL: uitofp_v4i64_v4f64:
879 ; AVX512VL-64: # %bb.0:
880 ; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1
881 ; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax
882 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
883 ; AVX512VL-64-NEXT: vmovq %xmm1, %rax
884 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
885 ; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
886 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
887 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2
888 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
889 ; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
890 ; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
891 ; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
892 ; AVX512VL-64-NEXT: retq
894 ; AVX512DQ-LABEL: uitofp_v4i64_v4f64:
896 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
897 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
898 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
899 ; AVX512DQ-NEXT: ret{{[l|q]}}
901 ; AVX512DQVL-LABEL: uitofp_v4i64_v4f64:
902 ; AVX512DQVL: # %bb.0:
903 ; AVX512DQVL-NEXT: vcvtuqq2pd %ymm0, %ymm0
904 ; AVX512DQVL-NEXT: ret{{[l|q]}}
905 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
906 metadata !"round.dynamic",
907 metadata !"fpexcept.strict") #0
908 ret <4 x double> %result
911 define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
912 ; AVX-32-LABEL: sitofp_v4i64_v4f32:
914 ; AVX-32-NEXT: pushl %ebp
915 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
916 ; AVX-32-NEXT: .cfi_offset %ebp, -8
917 ; AVX-32-NEXT: movl %esp, %ebp
918 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
919 ; AVX-32-NEXT: andl $-8, %esp
920 ; AVX-32-NEXT: subl $48, %esp
921 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
922 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
923 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
924 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
925 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
926 ; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
927 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
928 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
929 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
930 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
931 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
932 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
933 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
934 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
935 ; AVX-32-NEXT: fstps (%esp)
937 ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
938 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
939 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
940 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
941 ; AVX-32-NEXT: movl %ebp, %esp
942 ; AVX-32-NEXT: popl %ebp
943 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
944 ; AVX-32-NEXT: vzeroupper
947 ; AVX1-64-LABEL: sitofp_v4i64_v4f32:
949 ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
950 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
951 ; AVX1-64-NEXT: vmovq %xmm0, %rax
952 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
953 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
954 ; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm0
955 ; AVX1-64-NEXT: vmovq %xmm0, %rax
956 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
957 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
958 ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
959 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
960 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
961 ; AVX1-64-NEXT: vzeroupper
964 ; AVX2-64-LABEL: sitofp_v4i64_v4f32:
966 ; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
967 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
968 ; AVX2-64-NEXT: vmovq %xmm0, %rax
969 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
970 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
971 ; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0
972 ; AVX2-64-NEXT: vmovq %xmm0, %rax
973 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
974 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
975 ; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
976 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
977 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
978 ; AVX2-64-NEXT: vzeroupper
981 ; AVX512F-64-LABEL: sitofp_v4i64_v4f32:
982 ; AVX512F-64: # %bb.0:
983 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
984 ; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
985 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
986 ; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
987 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
988 ; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
989 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
990 ; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
991 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
992 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
993 ; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
994 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
995 ; AVX512F-64-NEXT: vzeroupper
996 ; AVX512F-64-NEXT: retq
998 ; AVX512VL-64-LABEL: sitofp_v4i64_v4f32:
999 ; AVX512VL-64: # %bb.0:
1000 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
1001 ; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
1002 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
1003 ; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
1004 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1005 ; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
1006 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
1007 ; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1008 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1009 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
1010 ; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
1011 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1012 ; AVX512VL-64-NEXT: vzeroupper
1013 ; AVX512VL-64-NEXT: retq
1015 ; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
1016 ; AVX512DQ: # %bb.0:
1017 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
1018 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1019 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1020 ; AVX512DQ-NEXT: vzeroupper
1021 ; AVX512DQ-NEXT: ret{{[l|q]}}
1023 ; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
1024 ; AVX512DQVL: # %bb.0:
1025 ; AVX512DQVL-NEXT: vcvtqq2ps %ymm0, %xmm0
1026 ; AVX512DQVL-NEXT: vzeroupper
1027 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1028 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
1029 metadata !"round.dynamic",
1030 metadata !"fpexcept.strict") #0
1031 ret <4 x float> %result
1034 define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
1035 ; AVX-32-LABEL: uitofp_v4i64_v4f32:
1037 ; AVX-32-NEXT: pushl %ebp
1038 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1039 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1040 ; AVX-32-NEXT: movl %esp, %ebp
1041 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1042 ; AVX-32-NEXT: andl $-8, %esp
1043 ; AVX-32-NEXT: subl $48, %esp
1044 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
1045 ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1046 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
1047 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
1048 ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
1049 ; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3]
1050 ; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
1051 ; AVX-32-NEXT: vextractps $1, %xmm0, %eax
1052 ; AVX-32-NEXT: shrl $31, %eax
1053 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1054 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1055 ; AVX-32-NEXT: fstps (%esp)
1057 ; AVX-32-NEXT: vextractps $3, %xmm0, %eax
1058 ; AVX-32-NEXT: shrl $31, %eax
1059 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1060 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1061 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
1063 ; AVX-32-NEXT: vextractps $1, %xmm1, %eax
1064 ; AVX-32-NEXT: shrl $31, %eax
1065 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1066 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1067 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
1069 ; AVX-32-NEXT: vextractps $3, %xmm1, %eax
1070 ; AVX-32-NEXT: shrl $31, %eax
1071 ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
1072 ; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1073 ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
1075 ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1076 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
1077 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1078 ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
1079 ; AVX-32-NEXT: movl %ebp, %esp
1080 ; AVX-32-NEXT: popl %ebp
1081 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1082 ; AVX-32-NEXT: vzeroupper
1085 ; AVX1-64-LABEL: uitofp_v4i64_v4f32:
1087 ; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm1
1088 ; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1089 ; AVX1-64-NEXT: vpsrlq $1, %xmm2, %xmm3
1090 ; AVX1-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1091 ; AVX1-64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
1092 ; AVX1-64-NEXT: vorpd %ymm3, %ymm1, %ymm1
1093 ; AVX1-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
1094 ; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
1095 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
1096 ; AVX1-64-NEXT: vmovq %xmm1, %rax
1097 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
1098 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1099 ; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm1
1100 ; AVX1-64-NEXT: vmovq %xmm1, %rax
1101 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
1102 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1103 ; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
1104 ; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
1105 ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1106 ; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm3
1107 ; AVX1-64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1108 ; AVX1-64-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
1109 ; AVX1-64-NEXT: vzeroupper
1110 ; AVX1-64-NEXT: retq
1112 ; AVX2-64-LABEL: uitofp_v4i64_v4f32:
1114 ; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
1115 ; AVX2-64-NEXT: vpand %ymm1, %ymm0, %ymm1
1116 ; AVX2-64-NEXT: vpsrlq $1, %ymm0, %ymm2
1117 ; AVX2-64-NEXT: vpor %ymm1, %ymm2, %ymm1
1118 ; AVX2-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
1119 ; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
1120 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1121 ; AVX2-64-NEXT: vmovq %xmm1, %rax
1122 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
1123 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
1124 ; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm1
1125 ; AVX2-64-NEXT: vmovq %xmm1, %rax
1126 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
1127 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
1128 ; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
1129 ; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
1130 ; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
1131 ; AVX2-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
1132 ; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm3
1133 ; AVX2-64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
1134 ; AVX2-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1135 ; AVX2-64-NEXT: vzeroupper
1136 ; AVX2-64-NEXT: retq
1138 ; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
1139 ; AVX512F-64: # %bb.0:
1140 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
1141 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
1142 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
1143 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
1144 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1145 ; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
1146 ; AVX512F-64-NEXT: vmovq %xmm0, %rax
1147 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
1148 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1149 ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
1150 ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
1151 ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1152 ; AVX512F-64-NEXT: vzeroupper
1153 ; AVX512F-64-NEXT: retq
1155 ; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
1156 ; AVX512VL-64: # %bb.0:
1157 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
1158 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
1159 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
1160 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
1161 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1162 ; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
1163 ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
1164 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
1165 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1166 ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
1167 ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
1168 ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1169 ; AVX512VL-64-NEXT: vzeroupper
1170 ; AVX512VL-64-NEXT: retq
1172 ; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
1173 ; AVX512DQ: # %bb.0:
1174 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
1175 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1176 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1177 ; AVX512DQ-NEXT: vzeroupper
1178 ; AVX512DQ-NEXT: ret{{[l|q]}}
1180 ; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
1181 ; AVX512DQVL: # %bb.0:
1182 ; AVX512DQVL-NEXT: vcvtuqq2ps %ymm0, %xmm0
1183 ; AVX512DQVL-NEXT: vzeroupper
1184 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1185 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
1186 metadata !"round.dynamic",
1187 metadata !"fpexcept.strict") #0
1188 ret <4 x float> %result
1191 attributes #0 = { strictfp }