1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx | FileCheck %s
3 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
5 define <2 x float> @cvt_v2i8_v2f32(<2 x i8> %src) {
6 ; CHECK-LABEL: cvt_v2i8_v2f32:
8 ; CHECK-NEXT: vpsllq $56, %xmm0, %xmm0
9 ; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
10 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
11 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
14 ; CHECK-WIDE-LABEL: cvt_v2i8_v2f32:
15 ; CHECK-WIDE: ## %bb.0:
16 ; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
17 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
18 ; CHECK-WIDE-NEXT: retl
19 %res = sitofp <2 x i8> %src to <2 x float>
23 define <2 x float> @cvt_v2i16_v2f32(<2 x i16> %src) {
24 ; CHECK-LABEL: cvt_v2i16_v2f32:
26 ; CHECK-NEXT: vpsllq $48, %xmm0, %xmm0
27 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
28 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
29 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
32 ; CHECK-WIDE-LABEL: cvt_v2i16_v2f32:
33 ; CHECK-WIDE: ## %bb.0:
34 ; CHECK-WIDE-NEXT: vpmovsxwd %xmm0, %xmm0
35 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
36 ; CHECK-WIDE-NEXT: retl
37 %res = sitofp <2 x i16> %src to <2 x float>
41 define <2 x float> @cvt_v2i32_v2f32(<2 x i32> %src) {
42 ; CHECK-LABEL: cvt_v2i32_v2f32:
44 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
45 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
48 ; CHECK-WIDE-LABEL: cvt_v2i32_v2f32:
49 ; CHECK-WIDE: ## %bb.0:
50 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
51 ; CHECK-WIDE-NEXT: retl
52 %res = sitofp <2 x i32> %src to <2 x float>
56 define <2 x float> @cvt_v2u8_v2f32(<2 x i8> %src) {
57 ; CHECK-LABEL: cvt_v2u8_v2f32:
59 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
60 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
63 ; CHECK-WIDE-LABEL: cvt_v2u8_v2f32:
64 ; CHECK-WIDE: ## %bb.0:
65 ; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
66 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
67 ; CHECK-WIDE-NEXT: retl
68 %res = uitofp <2 x i8> %src to <2 x float>
72 define <2 x float> @cvt_v2u16_v2f32(<2 x i16> %src) {
73 ; CHECK-LABEL: cvt_v2u16_v2f32:
75 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[8,9],zero,zero,xmm0[8,9],zero,zero,xmm0[10,11],zero,zero
76 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
79 ; CHECK-WIDE-LABEL: cvt_v2u16_v2f32:
80 ; CHECK-WIDE: ## %bb.0:
81 ; CHECK-WIDE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
82 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
83 ; CHECK-WIDE-NEXT: retl
84 %res = uitofp <2 x i16> %src to <2 x float>
88 define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
89 ; CHECK-LABEL: cvt_v2u32_v2f32:
91 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
92 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
93 ; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
94 ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
95 ; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0
96 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
99 ; CHECK-WIDE-LABEL: cvt_v2u32_v2f32:
100 ; CHECK-WIDE: ## %bb.0:
101 ; CHECK-WIDE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
102 ; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
103 ; CHECK-WIDE-NEXT: vpor %xmm1, %xmm0, %xmm0
104 ; CHECK-WIDE-NEXT: vsubpd %xmm1, %xmm0, %xmm0
105 ; CHECK-WIDE-NEXT: vcvtpd2ps %xmm0, %xmm0
106 ; CHECK-WIDE-NEXT: retl
107 %res = uitofp <2 x i32> %src to <2 x float>
111 define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
112 ; CHECK-LABEL: cvt_v2f32_v2i8:
114 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
115 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
118 ; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
119 ; CHECK-WIDE: ## %bb.0:
120 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
121 ; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
122 ; CHECK-WIDE-NEXT: retl
123 %res = fptosi <2 x float> %src to <2 x i8>
127 define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
128 ; CHECK-LABEL: cvt_v2f32_v2i16:
130 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
131 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
134 ; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
135 ; CHECK-WIDE: ## %bb.0:
136 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
137 ; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
138 ; CHECK-WIDE-NEXT: retl
139 %res = fptosi <2 x float> %src to <2 x i16>
143 define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) {
144 ; CHECK-LABEL: cvt_v2f32_v2i32:
146 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
147 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
150 ; CHECK-WIDE-LABEL: cvt_v2f32_v2i32:
151 ; CHECK-WIDE: ## %bb.0:
152 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
153 ; CHECK-WIDE-NEXT: retl
154 %res = fptosi <2 x float> %src to <2 x i32>
158 define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
159 ; CHECK-LABEL: cvt_v2f32_v2u8:
161 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
162 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
165 ; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
166 ; CHECK-WIDE: ## %bb.0:
167 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
168 ; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
169 ; CHECK-WIDE-NEXT: retl
170 %res = fptoui <2 x float> %src to <2 x i8>
174 define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
175 ; CHECK-LABEL: cvt_v2f32_v2u16:
177 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
178 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
181 ; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
182 ; CHECK-WIDE: ## %bb.0:
183 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
184 ; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
185 ; CHECK-WIDE-NEXT: retl
186 %res = fptoui <2 x float> %src to <2 x i16>
190 define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) {
191 ; CHECK-LABEL: cvt_v2f32_v2u32:
193 ; CHECK-NEXT: subl $36, %esp
194 ; CHECK-NEXT: .cfi_def_cfa_offset 40
195 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
196 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
197 ; CHECK-NEXT: vucomiss %xmm1, %xmm2
198 ; CHECK-NEXT: jb LBB11_2
199 ; CHECK-NEXT: ## %bb.1:
200 ; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm2
201 ; CHECK-NEXT: LBB11_2:
202 ; CHECK-NEXT: vmovss %xmm2, (%esp)
203 ; CHECK-NEXT: flds (%esp)
204 ; CHECK-NEXT: fisttpll (%esp)
205 ; CHECK-NEXT: setae %al
206 ; CHECK-NEXT: movzbl %al, %eax
207 ; CHECK-NEXT: shll $31, %eax
208 ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
209 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
210 ; CHECK-NEXT: jb LBB11_4
211 ; CHECK-NEXT: ## %bb.3:
212 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
213 ; CHECK-NEXT: LBB11_4:
214 ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
215 ; CHECK-NEXT: flds {{[0-9]+}}(%esp)
216 ; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
217 ; CHECK-NEXT: setae %cl
218 ; CHECK-NEXT: movzbl %cl, %ecx
219 ; CHECK-NEXT: shll $31, %ecx
220 ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
221 ; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
222 ; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
223 ; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
224 ; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
225 ; CHECK-NEXT: addl $36, %esp
228 ; CHECK-WIDE-LABEL: cvt_v2f32_v2u32:
229 ; CHECK-WIDE: ## %bb.0:
230 ; CHECK-WIDE-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
231 ; CHECK-WIDE-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
232 ; CHECK-WIDE-NEXT: vsubps %xmm1, %xmm0, %xmm1
233 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm1, %xmm1
234 ; CHECK-WIDE-NEXT: vxorps LCPI11_1, %xmm1, %xmm1
235 ; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
236 ; CHECK-WIDE-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
237 ; CHECK-WIDE-NEXT: retl
238 %res = fptoui <2 x float> %src to <2 x i32>
242 define <32 x i8> @PR40146(<4 x i64> %x) {
243 ; CHECK-LABEL: PR40146:
245 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
246 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
247 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
248 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
251 ; CHECK-WIDE-LABEL: PR40146:
252 ; CHECK-WIDE: ## %bb.0:
253 ; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
254 ; CHECK-WIDE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
255 ; CHECK-WIDE-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
256 ; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
257 ; CHECK-WIDE-NEXT: retl
258 %perm = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
259 %t1 = bitcast <4 x i64> %perm to <32 x i8>
260 %t2 = shufflevector <32 x i8> %t1, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 16, i32 48, i32 17, i32 48, i32 18, i32 48, i32 19, i32 48, i32 20, i32 48, i32 21, i32 48, i32 22, i32 48, i32 23, i32 48>