1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
9 define <2 x double> @mask_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
10 ; X32-SSE-LABEL: mask_sitofp_2i64_2f64:
12 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
13 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
14 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
17 ; X32-AVX-LABEL: mask_sitofp_2i64_2f64:
19 ; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
20 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
23 ; X64-SSE-LABEL: mask_sitofp_2i64_2f64:
25 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
26 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
27 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
30 ; X64-AVX-LABEL: mask_sitofp_2i64_2f64:
32 ; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
33 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
35 %and = and <2 x i64> %a, <i64 255, i64 65535>
36 %cvt = sitofp <2 x i64> %and to <2 x double>
40 define <2 x double> @mask_uitofp_2i64_2f64(<2 x i64> %a) nounwind {
41 ; X32-SSE-LABEL: mask_uitofp_2i64_2f64:
43 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
44 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
45 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
48 ; X32-AVX-LABEL: mask_uitofp_2i64_2f64:
50 ; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
51 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
54 ; X64-SSE-LABEL: mask_uitofp_2i64_2f64:
56 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
57 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
58 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
61 ; X64-AVX-LABEL: mask_uitofp_2i64_2f64:
63 ; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
64 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
66 %and = and <2 x i64> %a, <i64 255, i64 65535>
67 %cvt = uitofp <2 x i64> %and to <2 x double>
71 define <4 x float> @mask_sitofp_4i64_4f32(<4 x i64> %a) nounwind {
72 ; X32-SSE-LABEL: mask_sitofp_4i64_4f32:
74 ; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
75 ; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0
76 ; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
79 ; X32-AVX-LABEL: mask_sitofp_4i64_4f32:
81 ; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
82 ; X32-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
83 ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
84 ; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
85 ; X32-AVX-NEXT: vzeroupper
88 ; X64-SSE-LABEL: mask_sitofp_4i64_4f32:
90 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
91 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0
92 ; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
95 ; X64-AVX-LABEL: mask_sitofp_4i64_4f32:
97 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
98 ; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
99 ; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
100 ; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
101 ; X64-AVX-NEXT: vzeroupper
103 %and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535>
104 %cvt = sitofp <4 x i64> %and to <4 x float>
108 define <4 x float> @mask_uitofp_4i64_4f32(<4 x i64> %a) nounwind {
109 ; X32-SSE-LABEL: mask_uitofp_4i64_4f32:
111 ; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
112 ; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0
113 ; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
116 ; X32-AVX-LABEL: mask_uitofp_4i64_4f32:
118 ; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
119 ; X32-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
120 ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
121 ; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
122 ; X32-AVX-NEXT: vzeroupper
125 ; X64-SSE-LABEL: mask_uitofp_4i64_4f32:
127 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
128 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0
129 ; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
132 ; X64-AVX-LABEL: mask_uitofp_4i64_4f32:
134 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
135 ; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
136 ; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
137 ; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
138 ; X64-AVX-NEXT: vzeroupper
140 %and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535>
141 %cvt = uitofp <4 x i64> %and to <4 x float>
145 define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
146 ; X32-SSE-LABEL: clamp_sitofp_2i64_2f64:
148 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
149 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2
150 ; X32-SSE-NEXT: pxor %xmm1, %xmm2
151 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2147483393,4294967295,2147483393,4294967295]
152 ; X32-SSE-NEXT: movdqa %xmm2, %xmm4
153 ; X32-SSE-NEXT: pcmpgtd %xmm3, %xmm4
154 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
155 ; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
156 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
157 ; X32-SSE-NEXT: pand %xmm5, %xmm2
158 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
159 ; X32-SSE-NEXT: por %xmm2, %xmm3
160 ; X32-SSE-NEXT: pand %xmm3, %xmm0
161 ; X32-SSE-NEXT: pandn {{\.LCPI.*}}, %xmm3
162 ; X32-SSE-NEXT: por %xmm0, %xmm3
163 ; X32-SSE-NEXT: pxor %xmm3, %xmm1
164 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,0,2147483903,0]
165 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2
166 ; X32-SSE-NEXT: pcmpgtd %xmm1, %xmm2
167 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
168 ; X32-SSE-NEXT: pcmpeqd %xmm0, %xmm1
169 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
170 ; X32-SSE-NEXT: pand %xmm4, %xmm0
171 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
172 ; X32-SSE-NEXT: por %xmm0, %xmm1
173 ; X32-SSE-NEXT: pand %xmm1, %xmm3
174 ; X32-SSE-NEXT: pandn {{\.LCPI.*}}, %xmm1
175 ; X32-SSE-NEXT: por %xmm3, %xmm1
176 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
177 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
180 ; X32-AVX-LABEL: clamp_sitofp_2i64_2f64:
182 ; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967041,4294967295,4294967041,4294967295]
183 ; X32-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
184 ; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
185 ; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0]
186 ; X32-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
187 ; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
188 ; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
189 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
192 ; X64-SSE-LABEL: clamp_sitofp_2i64_2f64:
194 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
195 ; X64-SSE-NEXT: movdqa %xmm0, %xmm2
196 ; X64-SSE-NEXT: pxor %xmm1, %xmm2
197 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
198 ; X64-SSE-NEXT: movdqa %xmm2, %xmm4
199 ; X64-SSE-NEXT: pcmpgtd %xmm3, %xmm4
200 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
201 ; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2
202 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
203 ; X64-SSE-NEXT: pand %xmm5, %xmm2
204 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
205 ; X64-SSE-NEXT: por %xmm2, %xmm3
206 ; X64-SSE-NEXT: pand %xmm3, %xmm0
207 ; X64-SSE-NEXT: pandn {{.*}}(%rip), %xmm3
208 ; X64-SSE-NEXT: por %xmm0, %xmm3
209 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
210 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
211 ; X64-SSE-NEXT: movdqa %xmm0, %xmm2
212 ; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm2
213 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
214 ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
215 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
216 ; X64-SSE-NEXT: pand %xmm4, %xmm0
217 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
218 ; X64-SSE-NEXT: por %xmm0, %xmm1
219 ; X64-SSE-NEXT: pand %xmm1, %xmm3
220 ; X64-SSE-NEXT: pandn {{.*}}(%rip), %xmm1
221 ; X64-SSE-NEXT: por %xmm3, %xmm1
222 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
223 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
226 ; X64-AVX-LABEL: clamp_sitofp_2i64_2f64:
228 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551361,18446744073709551361]
229 ; X64-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
230 ; X64-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
231 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
232 ; X64-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
233 ; X64-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
234 ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
235 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
237 %clo = icmp slt <2 x i64> %a, <i64 -255, i64 -255>
238 %lo = select <2 x i1> %clo, <2 x i64> <i64 -255, i64 -255>, <2 x i64> %a
239 %chi = icmp sgt <2 x i64> %lo, <i64 255, i64 255>
240 %hi = select <2 x i1> %chi, <2 x i64> <i64 255, i64 255>, <2 x i64> %lo
241 %cvt = sitofp <2 x i64> %hi to <2 x double>
242 ret <2 x double> %cvt