1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
4 define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
5 ; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef:
7 ; CHECK-NEXT: movi v1.2d, #0000000000000000
8 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s
10 %e = extractelement <4 x i32> %x, i32 0
11 %z = zext i32 %e to i64
12 %r = insertelement <2 x i64> undef, i64 %z, i32 0
16 define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
17 ; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero:
19 ; CHECK-NEXT: fmov w8, s0
20 ; CHECK-NEXT: movi v0.2d, #0000000000000000
21 ; CHECK-NEXT: mov v0.d[0], x8
23 %e = extractelement <4 x i32> %x, i32 0
24 %z = zext i32 %e to i64
25 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
29 define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
30 ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
32 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s
33 ; CHECK-NEXT: movi v1.2d, #0000000000000000
34 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
36 %e = extractelement <4 x i32> %x, i32 1
37 %z = zext i32 %e to i64
38 %r = insertelement <2 x i64> undef, i64 %z, i32 0
42 define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
43 ; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero:
45 ; CHECK-NEXT: mov w8, v0.s[1]
46 ; CHECK-NEXT: movi v0.2d, #0000000000000000
47 ; CHECK-NEXT: mov v0.d[0], x8
49 %e = extractelement <4 x i32> %x, i32 1
50 %z = zext i32 %e to i64
51 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
55 define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
56 ; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef:
58 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
59 ; CHECK-NEXT: movi v1.2d, #0000000000000000
60 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
62 %e = extractelement <4 x i32> %x, i32 2
63 %z = zext i32 %e to i64
64 %r = insertelement <2 x i64> undef, i64 %z, i32 0
68 define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
69 ; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero:
71 ; CHECK-NEXT: mov w8, v0.s[2]
72 ; CHECK-NEXT: movi v0.2d, #0000000000000000
73 ; CHECK-NEXT: mov v0.d[0], x8
75 %e = extractelement <4 x i32> %x, i32 2
76 %z = zext i32 %e to i64
77 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
81 define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
82 ; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef:
84 ; CHECK-NEXT: movi v1.2d, #0000000000000000
85 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
87 %e = extractelement <4 x i32> %x, i32 3
88 %z = zext i32 %e to i64
89 %r = insertelement <2 x i64> undef, i64 %z, i32 0
93 define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
94 ; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero:
96 ; CHECK-NEXT: mov w8, v0.s[3]
97 ; CHECK-NEXT: movi v0.2d, #0000000000000000
98 ; CHECK-NEXT: mov v0.d[0], x8
100 %e = extractelement <4 x i32> %x, i32 3
101 %z = zext i32 %e to i64
102 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
106 define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
107 ; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef:
109 ; CHECK-NEXT: movi v1.2d, #0000000000000000
110 ; CHECK-NEXT: zip1 v1.4s, v0.4s, v1.4s
111 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8
113 %e = extractelement <4 x i32> %x, i32 0
114 %z = zext i32 %e to i64
115 %r = insertelement <2 x i64> undef, i64 %z, i32 1
119 define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
120 ; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero:
122 ; CHECK-NEXT: fmov w8, s0
123 ; CHECK-NEXT: movi v0.2d, #0000000000000000
124 ; CHECK-NEXT: mov v0.d[1], x8
126 %e = extractelement <4 x i32> %x, i32 0
127 %z = zext i32 %e to i64
128 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
132 define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
133 ; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef:
135 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
136 ; CHECK-NEXT: movi v1.2d, #0000000000000000
137 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4
139 %e = extractelement <4 x i32> %x, i32 1
140 %z = zext i32 %e to i64
141 %r = insertelement <2 x i64> undef, i64 %z, i32 1
145 define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
146 ; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero:
148 ; CHECK-NEXT: mov w8, v0.s[1]
149 ; CHECK-NEXT: movi v0.2d, #0000000000000000
150 ; CHECK-NEXT: mov v0.d[1], x8
152 %e = extractelement <4 x i32> %x, i32 1
153 %z = zext i32 %e to i64
154 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
158 define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
159 ; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef:
161 ; CHECK-NEXT: mov v0.s[3], wzr
163 %e = extractelement <4 x i32> %x, i32 2
164 %z = zext i32 %e to i64
165 %r = insertelement <2 x i64> undef, i64 %z, i32 1
169 define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
170 ; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero:
172 ; CHECK-NEXT: mov w8, v0.s[2]
173 ; CHECK-NEXT: movi v0.2d, #0000000000000000
174 ; CHECK-NEXT: mov v0.d[1], x8
176 %e = extractelement <4 x i32> %x, i32 2
177 %z = zext i32 %e to i64
178 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
182 define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
183 ; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef:
185 ; CHECK-NEXT: movi v1.2d, #0000000000000000
186 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4
188 %e = extractelement <4 x i32> %x, i32 3
189 %z = zext i32 %e to i64
190 %r = insertelement <2 x i64> undef, i64 %z, i32 1
194 define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
195 ; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero:
197 ; CHECK-NEXT: mov w8, v0.s[3]
198 ; CHECK-NEXT: movi v0.2d, #0000000000000000
199 ; CHECK-NEXT: mov v0.d[1], x8
201 %e = extractelement <4 x i32> %x, i32 3
202 %z = zext i32 %e to i64
203 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
207 define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
208 ; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
210 ; CHECK-NEXT: umov w8, v0.h[0]
211 ; CHECK-NEXT: and x8, x8, #0xffff
212 ; CHECK-NEXT: fmov d0, x8
214 %e = extractelement <8 x i16> %x, i32 0
215 %z = zext i16 %e to i64
216 %r = insertelement <2 x i64> undef, i64 %z, i32 0
220 define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
221 ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
223 ; CHECK-NEXT: umov w8, v0.h[0]
224 ; CHECK-NEXT: and x8, x8, #0xffff
225 ; CHECK-NEXT: movi v0.2d, #0000000000000000
226 ; CHECK-NEXT: mov v0.d[0], x8
228 %e = extractelement <8 x i16> %x, i32 0
229 %z = zext i16 %e to i64
230 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
234 define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
235 ; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
237 ; CHECK-NEXT: umov w8, v0.h[1]
238 ; CHECK-NEXT: and x8, x8, #0xffff
239 ; CHECK-NEXT: fmov d0, x8
241 %e = extractelement <8 x i16> %x, i32 1
242 %z = zext i16 %e to i64
243 %r = insertelement <2 x i64> undef, i64 %z, i32 0
247 define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
248 ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
250 ; CHECK-NEXT: umov w8, v0.h[1]
251 ; CHECK-NEXT: and x8, x8, #0xffff
252 ; CHECK-NEXT: movi v0.2d, #0000000000000000
253 ; CHECK-NEXT: mov v0.d[0], x8
255 %e = extractelement <8 x i16> %x, i32 1
256 %z = zext i16 %e to i64
257 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
261 define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
262 ; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
264 ; CHECK-NEXT: umov w8, v0.h[2]
265 ; CHECK-NEXT: and x8, x8, #0xffff
266 ; CHECK-NEXT: fmov d0, x8
268 %e = extractelement <8 x i16> %x, i32 2
269 %z = zext i16 %e to i64
270 %r = insertelement <2 x i64> undef, i64 %z, i32 0
274 define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
275 ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
277 ; CHECK-NEXT: umov w8, v0.h[2]
278 ; CHECK-NEXT: and x8, x8, #0xffff
279 ; CHECK-NEXT: movi v0.2d, #0000000000000000
280 ; CHECK-NEXT: mov v0.d[0], x8
282 %e = extractelement <8 x i16> %x, i32 2
283 %z = zext i16 %e to i64
284 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
288 define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
289 ; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
291 ; CHECK-NEXT: umov w8, v0.h[3]
292 ; CHECK-NEXT: and x8, x8, #0xffff
293 ; CHECK-NEXT: fmov d0, x8
295 %e = extractelement <8 x i16> %x, i32 3
296 %z = zext i16 %e to i64
297 %r = insertelement <2 x i64> undef, i64 %z, i32 0
301 define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
302 ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
304 ; CHECK-NEXT: umov w8, v0.h[3]
305 ; CHECK-NEXT: and x8, x8, #0xffff
306 ; CHECK-NEXT: movi v0.2d, #0000000000000000
307 ; CHECK-NEXT: mov v0.d[0], x8
309 %e = extractelement <8 x i16> %x, i32 3
310 %z = zext i16 %e to i64
311 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
315 define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
316 ; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
318 ; CHECK-NEXT: umov w8, v0.h[0]
319 ; CHECK-NEXT: and x8, x8, #0xffff
320 ; CHECK-NEXT: dup v0.2d, x8
322 %e = extractelement <8 x i16> %x, i32 0
323 %z = zext i16 %e to i64
324 %r = insertelement <2 x i64> undef, i64 %z, i32 1
328 define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
329 ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
331 ; CHECK-NEXT: umov w8, v0.h[0]
332 ; CHECK-NEXT: and x8, x8, #0xffff
333 ; CHECK-NEXT: movi v0.2d, #0000000000000000
334 ; CHECK-NEXT: mov v0.d[1], x8
336 %e = extractelement <8 x i16> %x, i32 0
337 %z = zext i16 %e to i64
338 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
342 define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
343 ; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
345 ; CHECK-NEXT: umov w8, v0.h[1]
346 ; CHECK-NEXT: and x8, x8, #0xffff
347 ; CHECK-NEXT: dup v0.2d, x8
349 %e = extractelement <8 x i16> %x, i32 1
350 %z = zext i16 %e to i64
351 %r = insertelement <2 x i64> undef, i64 %z, i32 1
355 define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
356 ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
358 ; CHECK-NEXT: umov w8, v0.h[1]
359 ; CHECK-NEXT: and x8, x8, #0xffff
360 ; CHECK-NEXT: movi v0.2d, #0000000000000000
361 ; CHECK-NEXT: mov v0.d[1], x8
363 %e = extractelement <8 x i16> %x, i32 1
364 %z = zext i16 %e to i64
365 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
369 define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
370 ; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
372 ; CHECK-NEXT: umov w8, v0.h[2]
373 ; CHECK-NEXT: and x8, x8, #0xffff
374 ; CHECK-NEXT: dup v0.2d, x8
376 %e = extractelement <8 x i16> %x, i32 2
377 %z = zext i16 %e to i64
378 %r = insertelement <2 x i64> undef, i64 %z, i32 1
382 define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
383 ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
385 ; CHECK-NEXT: umov w8, v0.h[2]
386 ; CHECK-NEXT: and x8, x8, #0xffff
387 ; CHECK-NEXT: movi v0.2d, #0000000000000000
388 ; CHECK-NEXT: mov v0.d[1], x8
390 %e = extractelement <8 x i16> %x, i32 2
391 %z = zext i16 %e to i64
392 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
396 define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
397 ; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
399 ; CHECK-NEXT: umov w8, v0.h[3]
400 ; CHECK-NEXT: and x8, x8, #0xffff
401 ; CHECK-NEXT: dup v0.2d, x8
403 %e = extractelement <8 x i16> %x, i32 3
404 %z = zext i16 %e to i64
405 %r = insertelement <2 x i64> undef, i64 %z, i32 1
409 define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
410 ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
412 ; CHECK-NEXT: umov w8, v0.h[3]
413 ; CHECK-NEXT: and x8, x8, #0xffff
414 ; CHECK-NEXT: movi v0.2d, #0000000000000000
415 ; CHECK-NEXT: mov v0.d[1], x8
417 %e = extractelement <8 x i16> %x, i32 3
418 %z = zext i16 %e to i64
419 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
423 ; This would crash because we did not expect to create
424 ; a shuffle for a vector where the source operand is
425 ; not the same size as the result.
426 ; TODO: Should we handle this pattern? Ie, is moving to/from
427 ; registers the optimal code?
429 define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
430 ; CHECK-LABEL: larger_bv_than_source:
432 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
433 ; CHECK-NEXT: umov w8, v0.h[2]
434 ; CHECK-NEXT: fmov s0, w8
436 %t1 = extractelement <4 x i16> %t0, i32 2
437 %vgetq_lane = zext i16 %t1 to i32
438 %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0