1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64
5 ; Verify that the backend correctly folds a sign/zero extend of a vector where
6 ; elements are all constant values or UNDEFs.
7 ; The backend should be able to optimize all the test functions below into
8 ; simple loads from constant pool of the result. That is because the resulting
9 ; vector should be known at static time.
11 define <4 x i16> @test_sext_4i8_4i16() {
12 ; X32-LABEL: test_sext_4i8_4i16:
14 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
17 ; X64-LABEL: test_sext_4i8_4i16:
19 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
21 %1 = insertelement <4 x i8> undef, i8 0, i32 0
22 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
23 %3 = insertelement <4 x i8> %2, i8 2, i32 2
24 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
25 %5 = sext <4 x i8> %4 to <4 x i16>
29 define <4 x i16> @test_sext_4i8_4i16_undef() {
30 ; X32-LABEL: test_sext_4i8_4i16_undef:
32 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
35 ; X64-LABEL: test_sext_4i8_4i16_undef:
37 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
39 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
40 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
41 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
42 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
43 %5 = sext <4 x i8> %4 to <4 x i16>
47 define <4 x i32> @test_sext_4i8_4i32() {
48 ; X32-LABEL: test_sext_4i8_4i32:
50 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
53 ; X64-LABEL: test_sext_4i8_4i32:
55 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
57 %1 = insertelement <4 x i8> undef, i8 0, i32 0
58 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
59 %3 = insertelement <4 x i8> %2, i8 2, i32 2
60 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
61 %5 = sext <4 x i8> %4 to <4 x i32>
65 define <4 x i32> @test_sext_4i8_4i32_undef() {
66 ; X32-LABEL: test_sext_4i8_4i32_undef:
68 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
71 ; X64-LABEL: test_sext_4i8_4i32_undef:
73 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
75 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
76 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
77 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
78 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
79 %5 = sext <4 x i8> %4 to <4 x i32>
83 define <4 x i64> @test_sext_4i8_4i64() {
84 ; X32-LABEL: test_sext_4i8_4i64:
86 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295]
89 ; X64-LABEL: test_sext_4i8_4i64:
91 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,2,18446744073709551613]
93 %1 = insertelement <4 x i8> undef, i8 0, i32 0
94 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
95 %3 = insertelement <4 x i8> %2, i8 2, i32 2
96 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
97 %5 = sext <4 x i8> %4 to <4 x i64>
101 define <4 x i64> @test_sext_4i8_4i64_undef() {
102 ; X32-LABEL: test_sext_4i8_4i64_undef:
104 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,4294967295,4294967295,u,u,4294967293,4294967295>
107 ; X64-LABEL: test_sext_4i8_4i64_undef:
109 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,18446744073709551615,u,18446744073709551613>
111 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
112 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
113 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
114 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
115 %5 = sext <4 x i8> %4 to <4 x i64>
119 define <8 x i16> @test_sext_8i8_8i16() {
120 ; X32-LABEL: test_sext_8i8_8i16:
122 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529]
125 ; X64-LABEL: test_sext_8i8_8i16:
127 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529]
129 %1 = insertelement <8 x i8> undef, i8 0, i32 0
130 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
131 %3 = insertelement <8 x i8> %2, i8 2, i32 2
132 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
133 %5 = insertelement <8 x i8> %4, i8 4, i32 4
134 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
135 %7 = insertelement <8 x i8> %6, i8 6, i32 6
136 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
137 %9 = sext <8 x i8> %8 to <8 x i16>
141 define <8 x i32> @test_sext_8i8_8i32() {
142 ; X32-LABEL: test_sext_8i8_8i32:
144 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289]
147 ; X64-LABEL: test_sext_8i8_8i32:
149 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289]
151 %1 = insertelement <8 x i8> undef, i8 0, i32 0
152 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
153 %3 = insertelement <8 x i8> %2, i8 2, i32 2
154 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
155 %5 = insertelement <8 x i8> %4, i8 4, i32 4
156 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
157 %7 = insertelement <8 x i8> %6, i8 6, i32 6
158 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
159 %9 = sext <8 x i8> %8 to <8 x i32>
163 define <8 x i16> @test_sext_8i8_8i16_undef() {
164 ; X32-LABEL: test_sext_8i8_8i16_undef:
166 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,65531,u,65529>
169 ; X64-LABEL: test_sext_8i8_8i16_undef:
171 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,65531,u,65529>
173 %1 = insertelement <8 x i8> undef, i8 undef, i32 0
174 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
175 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
176 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
177 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
178 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
179 %7 = insertelement <8 x i8> %6, i8 undef, i32 6
180 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
181 %9 = sext <8 x i8> %8 to <8 x i16>
185 define <8 x i32> @test_sext_8i8_8i32_undef() {
186 ; X32-LABEL: test_sext_8i8_8i32_undef:
188 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u>
191 ; X64-LABEL: test_sext_8i8_8i32_undef:
193 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u>
195 %1 = insertelement <8 x i8> undef, i8 0, i32 0
196 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
197 %3 = insertelement <8 x i8> %2, i8 2, i32 2
198 %4 = insertelement <8 x i8> %3, i8 undef, i32 3
199 %5 = insertelement <8 x i8> %4, i8 4, i32 4
200 %6 = insertelement <8 x i8> %5, i8 undef, i32 5
201 %7 = insertelement <8 x i8> %6, i8 6, i32 6
202 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
203 %9 = sext <8 x i8> %8 to <8 x i32>
207 define <4 x i16> @test_zext_4i8_4i16() {
208 ; X32-LABEL: test_zext_4i8_4i16:
210 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u>
213 ; X64-LABEL: test_zext_4i8_4i16:
215 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u>
217 %1 = insertelement <4 x i8> undef, i8 0, i32 0
218 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
219 %3 = insertelement <4 x i8> %2, i8 2, i32 2
220 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
221 %5 = zext <4 x i8> %4 to <4 x i16>
225 define <4 x i32> @test_zext_4i8_4i32() {
226 ; X32-LABEL: test_zext_4i8_4i32:
228 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
231 ; X64-LABEL: test_zext_4i8_4i32:
233 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
235 %1 = insertelement <4 x i8> undef, i8 0, i32 0
236 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
237 %3 = insertelement <4 x i8> %2, i8 2, i32 2
238 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
239 %5 = zext <4 x i8> %4 to <4 x i32>
243 define <4 x i64> @test_zext_4i8_4i64() {
244 ; X32-LABEL: test_zext_4i8_4i64:
246 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0]
249 ; X64-LABEL: test_zext_4i8_4i64:
251 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253]
253 %1 = insertelement <4 x i8> undef, i8 0, i32 0
254 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
255 %3 = insertelement <4 x i8> %2, i8 2, i32 2
256 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
257 %5 = zext <4 x i8> %4 to <4 x i64>
261 define <4 x i16> @test_zext_4i8_4i16_undef() {
262 ; X32-LABEL: test_zext_4i8_4i16_undef:
264 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u>
267 ; X64-LABEL: test_zext_4i8_4i16_undef:
269 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u>
271 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
272 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
273 %3 = insertelement <4 x i8> %2, i8 undef, i32 2
274 %4 = insertelement <4 x i8> %3, i8 -3, i32 3
275 %5 = zext <4 x i8> %4 to <4 x i16>
279 define <4 x i32> @test_zext_4i8_4i32_undef() {
280 ; X32-LABEL: test_zext_4i8_4i32_undef:
282 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
285 ; X64-LABEL: test_zext_4i8_4i32_undef:
287 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
289 %1 = insertelement <4 x i8> undef, i8 0, i32 0
290 %2 = insertelement <4 x i8> %1, i8 undef, i32 1
291 %3 = insertelement <4 x i8> %2, i8 2, i32 2
292 %4 = insertelement <4 x i8> %3, i8 undef, i32 3
293 %5 = zext <4 x i8> %4 to <4 x i32>
297 define <4 x i64> @test_zext_4i8_4i64_undef() {
298 ; X32-LABEL: test_zext_4i8_4i64_undef:
300 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0]
303 ; X64-LABEL: test_zext_4i8_4i64_undef:
305 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0]
307 %1 = insertelement <4 x i8> undef, i8 undef, i32 0
308 %2 = insertelement <4 x i8> %1, i8 -1, i32 1
309 %3 = insertelement <4 x i8> %2, i8 2, i32 2
310 %4 = insertelement <4 x i8> %3, i8 undef, i32 3
311 %5 = zext <4 x i8> %4 to <4 x i64>
315 define <8 x i16> @test_zext_8i8_8i16() {
316 ; X32-LABEL: test_zext_8i8_8i16:
318 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
321 ; X64-LABEL: test_zext_8i8_8i16:
323 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
325 %1 = insertelement <8 x i8> undef, i8 0, i32 0
326 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
327 %3 = insertelement <8 x i8> %2, i8 2, i32 2
328 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
329 %5 = insertelement <8 x i8> %4, i8 4, i32 4
330 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
331 %7 = insertelement <8 x i8> %6, i8 6, i32 6
332 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
333 %9 = zext <8 x i8> %8 to <8 x i16>
337 define <8 x i32> @test_zext_8i8_8i32() {
338 ; X32-LABEL: test_zext_8i8_8i32:
340 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
343 ; X64-LABEL: test_zext_8i8_8i32:
345 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
347 %1 = insertelement <8 x i8> undef, i8 0, i32 0
348 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
349 %3 = insertelement <8 x i8> %2, i8 2, i32 2
350 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
351 %5 = insertelement <8 x i8> %4, i8 4, i32 4
352 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
353 %7 = insertelement <8 x i8> %6, i8 6, i32 6
354 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
355 %9 = zext <8 x i8> %8 to <8 x i32>
359 define <8 x i16> @test_zext_8i8_8i16_undef() {
360 ; X32-LABEL: test_zext_8i8_8i16_undef:
362 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
365 ; X64-LABEL: test_zext_8i8_8i16_undef:
367 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
369 %1 = insertelement <8 x i8> undef, i8 undef, i32 0
370 %2 = insertelement <8 x i8> %1, i8 -1, i32 1
371 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
372 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
373 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
374 %6 = insertelement <8 x i8> %5, i8 -5, i32 5
375 %7 = insertelement <8 x i8> %6, i8 undef, i32 6
376 %8 = insertelement <8 x i8> %7, i8 -7, i32 7
377 %9 = zext <8 x i8> %8 to <8 x i16>
381 define <8 x i32> @test_zext_8i8_8i32_undef() {
382 ; X32-LABEL: test_zext_8i8_8i32_undef:
384 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
387 ; X64-LABEL: test_zext_8i8_8i32_undef:
389 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
391 %1 = insertelement <8 x i8> undef, i8 0, i32 0
392 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
393 %3 = insertelement <8 x i8> %2, i8 2, i32 2
394 %4 = insertelement <8 x i8> %3, i8 -3, i32 3
395 %5 = insertelement <8 x i8> %4, i8 4, i32 4
396 %6 = insertelement <8 x i8> %5, i8 undef, i32 5
397 %7 = insertelement <8 x i8> %6, i8 6, i32 6
398 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
399 %9 = zext <8 x i8> %8 to <8 x i32>