1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64
5 ; Verify that the backend correctly folds a sign/zero extend of a vector where
6 ; elements are all constant values or UNDEFs.
7 ; The backend should be able to optimize all the test functions below into
8 ; simple loads from constant pool of the result. That is because the resulting
9 ; vector should be known at static time.
11 define <4 x i16> @test_sext_4i8_4i16() {
12 ; X32-LABEL: test_sext_4i8_4i16:
14 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = [0,65535,2,65533,0,65535,2,65533]
15 ; X32-NEXT: # xmm0 = mem[0,0]
18 ; X64-LABEL: test_sext_4i8_4i16:
20 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = [0,65535,2,65533,0,65535,2,65533]
21 ; X64-NEXT: # xmm0 = mem[0,0]
23 %1 = insertelement <4 x i8> undef, i8 0, i32 0
24 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
25 %3 = insertelement <4 x i8> %2, i8 2, i32 2
26 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
27 %5 = sext <4 x i8> %4 to <4 x i16>
31 define <4 x i16> @test_sext_4i8_4i16_undef() {
32 ; X32-LABEL: test_sext_4i8_4i16_undef:
34 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = [0,65535,0,65533,0,65535,0,65533]
35 ; X32-NEXT: # xmm0 = mem[0,0]
38 ; X64-LABEL: test_sext_4i8_4i16_undef:
40 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = [0,65535,0,65533,0,65535,0,65533]
41 ; X64-NEXT: # xmm0 = mem[0,0]
43 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
44 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
45 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
46 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
47 %5 = sext <4 x i8> %4 to <4 x i16>
51 define <4 x i32> @test_sext_4i8_4i32() {
52 ; X32-LABEL: test_sext_4i8_4i32:
54 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
57 ; X64-LABEL: test_sext_4i8_4i32:
59 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
61 %1 = insertelement <4 x i8> undef, i8 0, i32 0
62 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
63 %3 = insertelement <4 x i8> %2, i8 2, i32 2
64 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
65 %5 = sext <4 x i8> %4 to <4 x i32>
69 define <4 x i32> @test_sext_4i8_4i32_undef() {
70 ; X32-LABEL: test_sext_4i8_4i32_undef:
72 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,0,4294967293]
75 ; X64-LABEL: test_sext_4i8_4i32_undef:
77 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,0,4294967293]
79 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
80 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
81 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
82 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
83 %5 = sext <4 x i8> %4 to <4 x i32>
87 define <4 x i64> @test_sext_4i8_4i64() {
88 ; X32-LABEL: test_sext_4i8_4i64:
90 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295]
93 ; X64-LABEL: test_sext_4i8_4i64:
95 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,2,18446744073709551613]
97 %1 = insertelement <4 x i8> undef, i8 0, i32 0
98 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
99 %3 = insertelement <4 x i8> %2, i8 2, i32 2
100 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
101 %5 = sext <4 x i8> %4 to <4 x i64>
105 define <4 x i64> @test_sext_4i8_4i64_undef() {
106 ; X32-LABEL: test_sext_4i8_4i64_undef:
108 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,0,0,4294967293,4294967295]
111 ; X64-LABEL: test_sext_4i8_4i64_undef:
113 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,0,18446744073709551613]
115 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
116 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
117 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
118 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
119 %5 = sext <4 x i8> %4 to <4 x i64>
123 define <8 x i16> @test_sext_8i8_8i16() {
124 ; X32-LABEL: test_sext_8i8_8i16:
126 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529]
129 ; X64-LABEL: test_sext_8i8_8i16:
131 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529]
133 %1 = insertelement <8 x i8> undef, i8 0, i32 0
134 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
135 %3 = insertelement <8 x i8> %2, i8 2, i32 2
136 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
137 %5 = insertelement <8 x i8> %4, i8 4, i32 4
138 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
139 %7 = insertelement <8 x i8> %6, i8 6, i32 6
140 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
141 %9 = sext <8 x i8> %8 to <8 x i16>
145 define <8 x i32> @test_sext_8i8_8i32() {
146 ; X32-LABEL: test_sext_8i8_8i32:
148 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289]
151 ; X64-LABEL: test_sext_8i8_8i32:
153 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289]
155 %1 = insertelement <8 x i8> undef, i8 0, i32 0
156 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
157 %3 = insertelement <8 x i8> %2, i8 2, i32 2
158 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
159 %5 = insertelement <8 x i8> %4, i8 4, i32 4
160 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
161 %7 = insertelement <8 x i8> %6, i8 6, i32 6
162 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
163 %9 = sext <8 x i8> %8 to <8 x i32>
167 define <8 x i16> @test_sext_8i8_8i16_undef() {
168 ; X32-LABEL: test_sext_8i8_8i16_undef:
170 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,0,65533,0,65531,0,65529]
173 ; X64-LABEL: test_sext_8i8_8i16_undef:
175 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,0,65533,0,65531,0,65529]
177 %1 = insertelement <8 x i8> undef, i8 undef, i32 0
178 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
179 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
180 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
181 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
182 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
183 %7 = insertelement <8 x i8> %6, i8 undef, i32 6
184 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
185 %9 = sext <8 x i8> %8 to <8 x i16>
189 define <8 x i32> @test_sext_8i8_8i32_undef() {
190 ; X32-LABEL: test_sext_8i8_8i32_undef:
192 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,0,4,0,6,0]
195 ; X64-LABEL: test_sext_8i8_8i32_undef:
197 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,0,4,0,6,0]
199 %1 = insertelement <8 x i8> undef, i8 0, i32 0
200 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
201 %3 = insertelement <8 x i8> %2, i8 2, i32 2
202 %4 = insertelement <8 x i8> %3, i8 undef, i32 3
203 %5 = insertelement <8 x i8> %4, i8 4, i32 4
204 %6 = insertelement <8 x i8> %5, i8 undef, i32 5
205 %7 = insertelement <8 x i8> %6, i8 6, i32 6
206 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
207 %9 = sext <8 x i8> %8 to <8 x i32>
211 define <4 x i16> @test_zext_4i8_4i16() {
212 ; X32-LABEL: test_zext_4i8_4i16:
214 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = [0,255,2,253,0,255,2,253]
215 ; X32-NEXT: # xmm0 = mem[0,0]
218 ; X64-LABEL: test_zext_4i8_4i16:
220 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = [0,255,2,253,0,255,2,253]
221 ; X64-NEXT: # xmm0 = mem[0,0]
223 %1 = insertelement <4 x i8> undef, i8 0, i32 0
224 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
225 %3 = insertelement <4 x i8> %2, i8 2, i32 2
226 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
227 %5 = zext <4 x i8> %4 to <4 x i16>
231 define <4 x i32> @test_zext_4i8_4i32() {
232 ; X32-LABEL: test_zext_4i8_4i32:
234 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
237 ; X64-LABEL: test_zext_4i8_4i32:
239 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
241 %1 = insertelement <4 x i8> undef, i8 0, i32 0
242 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
243 %3 = insertelement <4 x i8> %2, i8 2, i32 2
244 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
245 %5 = zext <4 x i8> %4 to <4 x i32>
249 define <4 x i64> @test_zext_4i8_4i64() {
250 ; X32-LABEL: test_zext_4i8_4i64:
252 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0]
255 ; X64-LABEL: test_zext_4i8_4i64:
257 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253]
259 %1 = insertelement <4 x i8> undef, i8 0, i32 0
260 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
261 %3 = insertelement <4 x i8> %2, i8 2, i32 2
262 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
263 %5 = zext <4 x i8> %4 to <4 x i64>
267 define <4 x i16> @test_zext_4i8_4i16_undef() {
268 ; X32-LABEL: test_zext_4i8_4i16_undef:
270 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = [0,255,0,253,0,255,0,253]
271 ; X32-NEXT: # xmm0 = mem[0,0]
274 ; X64-LABEL: test_zext_4i8_4i16_undef:
276 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = [0,255,0,253,0,255,0,253]
277 ; X64-NEXT: # xmm0 = mem[0,0]
279 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
280 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
281 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
282 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
283 %5 = zext <4 x i8> %4 to <4 x i16>
287 define <4 x i32> @test_zext_4i8_4i32_undef() {
288 ; X32-LABEL: test_zext_4i8_4i32_undef:
290 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
293 ; X64-LABEL: test_zext_4i8_4i32_undef:
295 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
297 %1 = insertelement <4 x i8> undef, i8 0, i32 0
298 %2 = insertelement <4 x i8> %1, i8 undef, i32 1
299 %3 = insertelement <4 x i8> %2, i8 2, i32 2
300 %4 = insertelement <4 x i8> %3, i8 undef, i32 3
301 %5 = zext <4 x i8> %4 to <4 x i32>
305 define <4 x i64> @test_zext_4i8_4i64_undef() {
306 ; X32-LABEL: test_zext_4i8_4i64_undef:
308 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0]
311 ; X64-LABEL: test_zext_4i8_4i64_undef:
313 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0]
315 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
316 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
317 %3 = insertelement <4 x i8> %2, i8 2, i32 2
318 %4 = insertelement <4 x i8> %3, i8 undef, i32 3
319 %5 = zext <4 x i8> %4 to <4 x i64>
323 define <8 x i16> @test_zext_8i8_8i16() {
324 ; X32-LABEL: test_zext_8i8_8i16:
326 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
329 ; X64-LABEL: test_zext_8i8_8i16:
331 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
333 %1 = insertelement <8 x i8> undef, i8 0, i32 0
334 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
335 %3 = insertelement <8 x i8> %2, i8 2, i32 2
336 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
337 %5 = insertelement <8 x i8> %4, i8 4, i32 4
338 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
339 %7 = insertelement <8 x i8> %6, i8 6, i32 6
340 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
341 %9 = zext <8 x i8> %8 to <8 x i16>
345 define <8 x i32> @test_zext_8i8_8i32() {
346 ; X32-LABEL: test_zext_8i8_8i32:
348 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
351 ; X64-LABEL: test_zext_8i8_8i32:
353 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
355 %1 = insertelement <8 x i8> undef, i8 0, i32 0
356 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
357 %3 = insertelement <8 x i8> %2, i8 2, i32 2
358 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
359 %5 = insertelement <8 x i8> %4, i8 4, i32 4
360 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
361 %7 = insertelement <8 x i8> %6, i8 6, i32 6
362 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
363 %9 = zext <8 x i8> %8 to <8 x i32>
367 define <8 x i16> @test_zext_8i8_8i16_undef() {
368 ; X32-LABEL: test_zext_8i8_8i16_undef:
370 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
373 ; X64-LABEL: test_zext_8i8_8i16_undef:
375 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
377 %1 = insertelement <8 x i8> undef, i8 undef, i32 0
378 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
379 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
380 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
381 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
382 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
383 %7 = insertelement <8 x i8> %6, i8 undef, i32 6
384 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
385 %9 = zext <8 x i8> %8 to <8 x i16>
389 define <8 x i32> @test_zext_8i8_8i32_undef() {
390 ; X32-LABEL: test_zext_8i8_8i32_undef:
392 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
395 ; X64-LABEL: test_zext_8i8_8i32_undef:
397 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
399 %1 = insertelement <8 x i8> undef, i8 0, i32 0
400 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
401 %3 = insertelement <8 x i8> %2, i8 2, i32 2
402 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
403 %5 = insertelement <8 x i8> %4, i8 4, i32 4
404 %6 = insertelement <8 x i8> %5, i8 undef, i32 5
405 %7 = insertelement <8 x i8> %6, i8 6, i32 6
406 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
407 %9 = zext <8 x i8> %8 to <8 x i32>