1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
4 define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) {
5 ; CHECK-LABEL: test_shuf1:
7 ; CHECK-NEXT: ext v3.16b, v6.16b, v1.16b, #4
8 ; CHECK-NEXT: uzp1 v5.4s, v1.4s, v0.4s
9 ; CHECK-NEXT: uzp2 v16.4s, v2.4s, v4.4s
10 ; CHECK-NEXT: dup v17.4s, v4.s[0]
11 ; CHECK-NEXT: trn2 v4.4s, v1.4s, v3.4s
12 ; CHECK-NEXT: mov v17.s[0], v6.s[3]
13 ; CHECK-NEXT: trn2 v1.4s, v5.4s, v1.4s
14 ; CHECK-NEXT: rev64 v3.4s, v7.4s
15 ; CHECK-NEXT: trn1 v2.4s, v16.4s, v2.4s
16 ; CHECK-NEXT: mov v4.s[0], v7.s[1]
17 ; CHECK-NEXT: ext v1.16b, v0.16b, v1.16b, #12
18 ; CHECK-NEXT: mov v3.d[0], v17.d[0]
19 ; CHECK-NEXT: mov v2.s[3], v7.s[0]
20 ; CHECK-NEXT: mov v0.16b, v4.16b
22 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30>
26 define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) {
27 ; CHECK-LABEL: test_shuf2:
29 ; CHECK-NEXT: zip2 v0.4s, v7.4s, v6.4s
30 ; CHECK-NEXT: trn2 v2.4s, v7.4s, v0.4s
31 ; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #4
32 ; CHECK-NEXT: mov v0.d[0], v2.d[0]
34 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4>
38 define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) {
39 ; CHECK-LABEL: test_shuf3:
41 ; CHECK-NEXT: uzp1 v2.4s, v1.4s, v0.4s
42 ; CHECK-NEXT: trn2 v1.4s, v2.4s, v1.4s
43 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
45 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2>
49 define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) {
50 ; CHECK-LABEL: test_shuf4:
52 ; CHECK-NEXT: uzp2 v0.4s, v2.4s, v4.4s
53 ; CHECK-NEXT: trn1 v0.4s, v0.4s, v2.4s
54 ; CHECK-NEXT: mov v0.s[3], v7.s[0]
56 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28>
60 define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) {
61 ; CHECK-LABEL: test_shuf5:
63 ; CHECK-NEXT: ext v1.16b, v6.16b, v4.16b, #12
64 ; CHECK-NEXT: rev64 v0.4s, v7.4s
65 ; CHECK-NEXT: mov v0.d[0], v1.d[0]
67 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30>
71 define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b)
72 ; CHECK-LABEL: test1503:
74 ; CHECK-NEXT: zip1 v1.4s, v0.4s, v1.4s
75 ; CHECK-NEXT: ext v1.16b, v1.16b, v0.16b, #8
76 ; CHECK-NEXT: mov v1.s[3], v0.s[3]
77 ; CHECK-NEXT: mov v0.16b, v1.16b
80 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3>
84 define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b)
85 ; CHECK-LABEL: test4366:
87 ; CHECK-NEXT: trn1 v1.4s, v1.4s, v1.4s
88 ; CHECK-NEXT: mov v1.s[1], v0.s[3]
89 ; CHECK-NEXT: mov v0.16b, v1.16b
92 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
96 define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b)
97 ; CHECK-LABEL: test7367:
99 ; CHECK-NEXT: mov v2.16b, v1.16b
100 ; CHECK-NEXT: mov v2.d[0], v0.d[1]
101 ; CHECK-NEXT: mov v2.s[0], v1.s[3]
102 ; CHECK-NEXT: mov v0.16b, v2.16b
105 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7>
109 define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b)
110 ; CHECK-LABEL: test4045:
112 ; CHECK-NEXT: trn1 v0.4s, v1.4s, v0.4s
113 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
116 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 4, i32 5>
120 define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b)
121 ; CHECK-LABEL: test0067:
123 ; CHECK-NEXT: trn1 v0.4s, v0.4s, v0.4s
124 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
127 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 7>
131 define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
132 ; CHECK-LABEL: test_shuf6:
134 ; CHECK-NEXT: mov v0.s[2], v1.s[3]
135 ; CHECK-NEXT: trn1 v0.4s, v0.4s, v0.4s
138 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
142 define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
143 ; CHECK-LABEL: test_shuf7:
145 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
146 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
147 ; CHECK-NEXT: mov v0.h[2], v1.h[3]
148 ; CHECK-NEXT: trn1 v0.4h, v0.4h, v0.4h
151 %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
155 define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
156 ; CHECK-LABEL: test_shuf8:
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
159 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
160 ; CHECK-NEXT: adrp x8, .LCPI12_0
161 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
162 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
163 ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
166 %r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
170 define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
171 ; CHECK-LABEL: test_shuf9:
173 ; CHECK-NEXT: adrp x8, .LCPI13_0
174 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
175 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
176 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
177 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
180 %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
184 define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b)
185 ; CHECK-LABEL: test_shuf10:
187 ; CHECK-NEXT: adrp x8, .LCPI14_0
188 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
189 ; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
192 %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
196 define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
197 ; CHECK-LABEL: test_shuf11:
199 ; CHECK-NEXT: adrp x8, .LCPI15_0
200 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
201 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
202 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
203 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
206 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
210 define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
211 ; CHECK-LABEL: test_shuf12:
213 ; CHECK-NEXT: adrp x8, .LCPI16_0
214 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
215 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
216 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
217 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
220 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
224 define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
225 ; CHECK-LABEL: test_shuf13:
227 ; CHECK-NEXT: adrp x8, .LCPI17_0
228 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
229 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
230 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
231 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
234 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
238 define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
239 ; CHECK-LABEL: test_shuf14:
241 ; CHECK-NEXT: adrp x8, .LCPI18_0
242 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
243 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
244 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
245 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
248 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 1, i32 1, i32 0, i32 8, i32 1, i32 15>
252 define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
253 ; CHECK-LABEL: test_shuf15:
255 ; CHECK-NEXT: adrp x8, .LCPI19_0
256 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
257 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
258 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
259 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
262 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 7, i32 2, i32 0, i32 3, i32 2, i32 15>
266 define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) {
267 ; CHECK-LABEL: extract_shuffle:
269 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #3
271 %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
272 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
273 %c = zext <4 x i16> %b to <4 x i32>
274 %d = shl <4 x i32> %c, <i32 3, i32 3, i32 3, i32 3>