1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
6 ; CHECK-LABEL: tbl1_8b:
8 ; CHECK-NEXT: tbl.8b v0, { v0 }, v1
10 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
14 define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
15 ; CHECK-LABEL: tbl1_16b:
17 ; CHECK-NEXT: tbl.16b v0, { v0 }, v1
19 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
23 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
24 ; CHECK-SD-LABEL: tbl2_8b:
26 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
27 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
28 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
31 ; CHECK-GI-LABEL: tbl2_8b:
33 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
34 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
35 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
37 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
41 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
42 ; CHECK-SD-LABEL: tbl2_16b:
44 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
45 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
46 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
49 ; CHECK-GI-LABEL: tbl2_16b:
51 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
52 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
53 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
55 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
59 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
60 ; CHECK-SD-LABEL: tbl3_8b:
62 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
63 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
64 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
65 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
68 ; CHECK-GI-LABEL: tbl3_8b:
70 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
71 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
72 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
73 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
79 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
80 ; CHECK-SD-LABEL: tbl3_16b:
82 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
83 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
84 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
85 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
88 ; CHECK-GI-LABEL: tbl3_16b:
90 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
91 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
92 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
93 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
95 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
99 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
100 ; CHECK-SD-LABEL: tbl4_8b:
101 ; CHECK-SD: // %bb.0:
102 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
103 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
104 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
105 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
106 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
109 ; CHECK-GI-LABEL: tbl4_8b:
110 ; CHECK-GI: // %bb.0:
111 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
112 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
113 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
114 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
115 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
117 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
121 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
122 ; CHECK-SD-LABEL: tbl4_16b:
123 ; CHECK-SD: // %bb.0:
124 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
125 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
126 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
127 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
128 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
131 ; CHECK-GI-LABEL: tbl4_16b:
132 ; CHECK-GI: // %bb.0:
133 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
134 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
135 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
136 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
137 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
143 ; CHECK-SD-LABEL: .LCPI8_0:
144 ; CHECK-SD: .byte 0 // 0x0
145 ; CHECK-SD-NEXT: .byte 4 // 0x4
146 ; CHECK-SD-NEXT: .byte 8 // 0x8
147 ; CHECK-SD-NEXT: .byte 12 // 0xc
148 ; CHECK-SD-NEXT: .byte 255 // 0xff
149 ; CHECK-SD-NEXT: .byte 255 // 0xff
150 ; CHECK-SD-NEXT: .byte 255 // 0xff
151 ; CHECK-SD-NEXT: .byte 255 // 0xff
153 ; CHECK-GI-LABEL: .LCPI8_0:
154 ; CHECK-GI: .byte 0 // 0x0
155 ; CHECK-GI-NEXT: .byte 1 // 0x1
156 ; CHECK-GI-NEXT: .byte 2 // 0x2
157 ; CHECK-GI-NEXT: .byte 3 // 0x3
158 ; CHECK-GI-NEXT: .byte 12 // 0xc
159 ; CHECK-GI-NEXT: .byte 13 // 0xd
160 ; CHECK-GI-NEXT: .byte 14 // 0xe
161 ; CHECK-GI-NEXT: .byte 15 // 0xf
162 ; CHECK-GI-LABEL: .LCPI8_1:
163 ; CHECK-GI: .byte 0 // 0x0
164 ; CHECK-GI-NEXT: .byte 4 // 0x4
165 ; CHECK-GI-NEXT: .byte 8 // 0x8
166 ; CHECK-GI-NEXT: .byte 12 // 0xc
167 ; CHECK-GI-NEXT: .byte 255 // 0xff
168 ; CHECK-GI-NEXT: .byte 255 // 0xff
169 ; CHECK-GI-NEXT: .byte 255 // 0xff
170 ; CHECK-GI-NEXT: .byte 255 // 0xff
172 define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
173 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
174 ; CHECK-SD: // %bb.0:
175 ; CHECK-SD-NEXT: adrp x8, .LCPI8_0
176 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
177 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
178 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
179 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
180 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
181 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
182 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
183 ; CHECK-SD-NEXT: mov.s v0[1], v1[1]
184 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
187 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
188 ; CHECK-GI: // %bb.0:
189 ; CHECK-GI-NEXT: adrp x8, .LCPI8_1
190 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
191 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
192 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
193 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
194 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
195 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0
196 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
197 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
198 ; CHECK-GI-NEXT: mov.d v0[1], v1[0]
199 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
200 ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
201 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
203 %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
204 %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
205 %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
209 ; CHECK-SD-LABEL: .LCPI9_0:
210 ; CHECK-SD-NEXT: .byte 0 // 0x0
211 ; CHECK-SD-NEXT: .byte 4 // 0x4
212 ; CHECK-SD-NEXT: .byte 8 // 0x8
213 ; CHECK-SD-NEXT: .byte 12 // 0xc
214 ; CHECK-SD-NEXT: .byte 16 // 0x10
215 ; CHECK-SD-NEXT: .byte 20 // 0x14
216 ; CHECK-SD-NEXT: .byte 24 // 0x18
217 ; CHECK-SD-NEXT: .byte 28 // 0x1c
218 ; CHECK-SD-NEXT: .byte 32 // 0x20
219 ; CHECK-SD-NEXT: .byte 36 // 0x24
220 ; CHECK-SD-NEXT: .byte 40 // 0x28
221 ; CHECK-SD-NEXT: .byte 44 // 0x2c
222 ; CHECK-SD-NEXT: .byte 48 // 0x30
223 ; CHECK-SD-NEXT: .byte 52 // 0x34
224 ; CHECK-SD-NEXT: .byte 56 // 0x38
225 ; CHECK-SD-NEXT: .byte 60 // 0x3c
227 ;CHECK-GI-LABEL: .LCPI9_0:
228 ;CHECK-GI: .byte 0 // 0x0
229 ;CHECK-GI-NEXT: .byte 1 // 0x1
230 ;CHECK-GI-NEXT: .byte 2 // 0x2
231 ;CHECK-GI-NEXT: .byte 3 // 0x3
232 ;CHECK-GI-NEXT: .byte 4 // 0x4
233 ;CHECK-GI-NEXT: .byte 5 // 0x5
234 ;CHECK-GI-NEXT: .byte 6 // 0x6
235 ;CHECK-GI-NEXT: .byte 7 // 0x7
236 ;CHECK-GI-NEXT: .byte 16 // 0x10
237 ;CHECK-GI-NEXT: .byte 17 // 0x11
238 ;CHECK-GI-NEXT: .byte 18 // 0x12
239 ;CHECK-GI-NEXT: .byte 19 // 0x13
240 ;CHECK-GI-NEXT: .byte 20 // 0x14
241 ;CHECK-GI-NEXT: .byte 21 // 0x15
242 ;CHECK-GI-NEXT: .byte 22 // 0x16
243 ;CHECK-GI-NEXT: .byte 23 // 0x17
244 ;CHECK-GI-LABEL: .LCPI9_1:
245 ;CHECK-GI: .byte 0 // 0x0
246 ;CHECK-GI-NEXT: .byte 4 // 0x4
247 ;CHECK-GI-NEXT: .byte 8 // 0x8
248 ;CHECK-GI-NEXT: .byte 12 // 0xc
249 ;CHECK-GI-NEXT: .byte 16 // 0x10
250 ;CHECK-GI-NEXT: .byte 20 // 0x14
251 ;CHECK-GI-NEXT: .byte 24 // 0x18
252 ;CHECK-GI-NEXT: .byte 28 // 0x1c
253 ;CHECK-GI-NEXT: .byte 255 // 0xff
254 ;CHECK-GI-NEXT: .byte 255 // 0xff
255 ;CHECK-GI-NEXT: .byte 255 // 0xff
256 ;CHECK-GI-NEXT: .byte 255 // 0xff
257 ;CHECK-GI-NEXT: .byte 255 // 0xff
258 ;CHECK-GI-NEXT: .byte 255 // 0xff
259 ;CHECK-GI-NEXT: .byte 255 // 0xff
260 ;CHECK-GI-NEXT: .byte 255 // 0xff
262 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
263 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
264 ; CHECK-SD: // %bb.0:
265 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
266 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0
267 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
268 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
269 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
270 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
271 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
274 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
275 ; CHECK-GI: // %bb.0:
276 ; CHECK-GI-NEXT: adrp x8, .LCPI9_1
277 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
278 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
279 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
280 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
281 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
282 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0
283 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
284 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
285 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
286 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
288 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
289 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
290 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
294 ; CHECK-GI-LABEL: .LCPI10_0:
295 ; CHECK-GI: .byte 0 // 0x0
296 ; CHECK-GI-NEXT: .byte 1 // 0x1
297 ; CHECK-GI-NEXT: .byte 2 // 0x2
298 ; CHECK-GI-NEXT: .byte 3 // 0x3
299 ; CHECK-GI-NEXT: .byte 4 // 0x4
300 ; CHECK-GI-NEXT: .byte 5 // 0x5
301 ; CHECK-GI-NEXT: .byte 6 // 0x6
302 ; CHECK-GI-NEXT: .byte 7 // 0x7
303 ; CHECK-GI-NEXT: .byte 16 // 0x10
304 ; CHECK-GI-NEXT: .byte 17 // 0x11
305 ; CHECK-GI-NEXT: .byte 18 // 0x12
306 ; CHECK-GI-NEXT: .byte 19 // 0x13
307 ; CHECK-GI-NEXT: .byte 20 // 0x14
308 ; CHECK-GI-NEXT: .byte 21 // 0x15
309 ; CHECK-GI-NEXT: .byte 22 // 0x16
310 ; CHECK-GI-NEXT: .byte 23 // 0x17
311 ; CHECK-GI-LABEL: .LCPI10_1:
312 ; CHECK-GI: .byte 0 // 0x0
313 ; CHECK-GI-NEXT: .byte 4 // 0x4
314 ; CHECK-GI-NEXT: .byte 8 // 0x8
315 ; CHECK-GI-NEXT: .byte 12 // 0xc
316 ; CHECK-GI-NEXT: .byte 16 // 0x10
317 ; CHECK-GI-NEXT: .byte 20 // 0x14
318 ; CHECK-GI-NEXT: .byte 24 // 0x18
319 ; CHECK-GI-NEXT: .byte 28 // 0x1c
320 ; CHECK-GI-NEXT: .byte 255 // 0xff
321 ; CHECK-GI-NEXT: .byte 255 // 0xff
322 ; CHECK-GI-NEXT: .byte 255 // 0xff
323 ; CHECK-GI-NEXT: .byte 255 // 0xff
324 ; CHECK-GI-NEXT: .byte 255 // 0xff
325 ; CHECK-GI-NEXT: .byte 255 // 0xff
326 ; CHECK-GI-NEXT: .byte 255 // 0xff
327 ; CHECK-GI-NEXT: .byte 255 // 0xff
329 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
330 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
331 ; CHECK-SD: // %bb.0:
332 ; CHECK-SD-NEXT: fmov s4, w0
333 ; CHECK-SD-NEXT: mov w8, #32 // =0x20
334 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
335 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
336 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
337 ; CHECK-SD-NEXT: mov.b v4[1], w0
338 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
339 ; CHECK-SD-NEXT: mov.b v4[2], w0
340 ; CHECK-SD-NEXT: mov.b v4[3], w0
341 ; CHECK-SD-NEXT: mov.b v4[4], w0
342 ; CHECK-SD-NEXT: mov.b v4[5], w0
343 ; CHECK-SD-NEXT: mov.b v4[6], w0
344 ; CHECK-SD-NEXT: mov.b v4[7], w0
345 ; CHECK-SD-NEXT: mov.b v4[8], w8
346 ; CHECK-SD-NEXT: mov w8, #36 // =0x24
347 ; CHECK-SD-NEXT: mov.b v4[9], w8
348 ; CHECK-SD-NEXT: mov w8, #40 // =0x28
349 ; CHECK-SD-NEXT: mov.b v4[10], w8
350 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c
351 ; CHECK-SD-NEXT: mov.b v4[11], w8
352 ; CHECK-SD-NEXT: mov w8, #48 // =0x30
353 ; CHECK-SD-NEXT: mov.b v4[12], w8
354 ; CHECK-SD-NEXT: mov w8, #52 // =0x34
355 ; CHECK-SD-NEXT: mov.b v4[13], w8
356 ; CHECK-SD-NEXT: mov w8, #56 // =0x38
357 ; CHECK-SD-NEXT: mov.b v4[14], w8
358 ; CHECK-SD-NEXT: mov w8, #60 // =0x3c
359 ; CHECK-SD-NEXT: mov.b v4[15], w8
360 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
363 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
364 ; CHECK-GI: // %bb.0:
365 ; CHECK-GI-NEXT: fmov s4, w0
366 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
367 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
368 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
369 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
370 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
371 ; CHECK-GI-NEXT: mov.b v4[1], w0
372 ; CHECK-GI-NEXT: mov.b v4[2], w0
373 ; CHECK-GI-NEXT: mov.b v4[3], w0
374 ; CHECK-GI-NEXT: mov.b v4[4], w0
375 ; CHECK-GI-NEXT: mov.b v4[5], w0
376 ; CHECK-GI-NEXT: mov.b v4[6], w0
377 ; CHECK-GI-NEXT: mov.b v4[7], w0
378 ; CHECK-GI-NEXT: mov.b v4[8], w8
379 ; CHECK-GI-NEXT: mov.b v4[9], w8
380 ; CHECK-GI-NEXT: mov.b v4[10], w8
381 ; CHECK-GI-NEXT: mov.b v4[11], w8
382 ; CHECK-GI-NEXT: mov.b v4[12], w8
383 ; CHECK-GI-NEXT: mov.b v4[13], w8
384 ; CHECK-GI-NEXT: mov.b v4[14], w8
385 ; CHECK-GI-NEXT: mov.b v4[15], w8
386 ; CHECK-GI-NEXT: adrp x8, .LCPI10_1
387 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI10_1]
388 ; CHECK-GI-NEXT: adrp x8, .LCPI10_0
389 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
390 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
391 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
392 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
394 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
395 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
396 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
397 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
398 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
399 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
400 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
401 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
402 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
403 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
404 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
405 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
406 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
407 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
408 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
409 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
410 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
411 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
412 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
416 ; CHECK-GI-LABEL: .LCPI11_0:
417 ; CHECK-GI: .byte 0 // 0x0
418 ; CHECK-GI-NEXT: .byte 1 // 0x1
419 ; CHECK-GI-NEXT: .byte 2 // 0x2
420 ; CHECK-GI-NEXT: .byte 3 // 0x3
421 ; CHECK-GI-NEXT: .byte 4 // 0x4
422 ; CHECK-GI-NEXT: .byte 5 // 0x5
423 ; CHECK-GI-NEXT: .byte 6 // 0x6
424 ; CHECK-GI-NEXT: .byte 15 // 0xf
425 ; CHECK-GI-NEXT: .byte 16 // 0x10
426 ; CHECK-GI-NEXT: .byte 17 // 0x11
427 ; CHECK-GI-NEXT: .byte 18 // 0x12
428 ; CHECK-GI-NEXT: .byte 19 // 0x13
429 ; CHECK-GI-NEXT: .byte 20 // 0x14
430 ; CHECK-GI-NEXT: .byte 21 // 0x15
431 ; CHECK-GI-NEXT: .byte 22 // 0x16
432 ; CHECK-GI-NEXT: .byte 31 // 0x1f
433 ; CHECK-GI-LABEL: .LCPI11_1:
434 ; CHECK-GI: .byte 0 // 0x0
435 ; CHECK-GI-NEXT: .byte 4 // 0x4
436 ; CHECK-GI-NEXT: .byte 8 // 0x8
437 ; CHECK-GI-NEXT: .byte 12 // 0xc
438 ; CHECK-GI-NEXT: .byte 16 // 0x10
439 ; CHECK-GI-NEXT: .byte 20 // 0x14
440 ; CHECK-GI-NEXT: .byte 24 // 0x18
441 ; CHECK-GI-NEXT: .byte 28 // 0x1c
442 ; CHECK-GI-NEXT: .byte 255 // 0xff
443 ; CHECK-GI-NEXT: .byte 255 // 0xff
444 ; CHECK-GI-NEXT: .byte 255 // 0xff
445 ; CHECK-GI-NEXT: .byte 255 // 0xff
446 ; CHECK-GI-NEXT: .byte 255 // 0xff
447 ; CHECK-GI-NEXT: .byte 255 // 0xff
448 ; CHECK-GI-NEXT: .byte 255 // 0xff
449 ; CHECK-GI-NEXT: .byte 255 // 0xff
451 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
452 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
453 ; CHECK-SD: // %bb.0:
454 ; CHECK-SD-NEXT: mov w8, #1 // =0x1
455 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
456 ; CHECK-SD-NEXT: fmov s4, w8
457 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
458 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
459 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
460 ; CHECK-SD-NEXT: mov.b v4[1], w8
461 ; CHECK-SD-NEXT: mov.b v4[2], w8
462 ; CHECK-SD-NEXT: mov.b v4[3], w8
463 ; CHECK-SD-NEXT: mov.b v4[4], w8
464 ; CHECK-SD-NEXT: mov.b v4[5], w8
465 ; CHECK-SD-NEXT: mov.b v4[6], w8
466 ; CHECK-SD-NEXT: mov w8, #32 // =0x20
467 ; CHECK-SD-NEXT: mov.b v4[7], w0
468 ; CHECK-SD-NEXT: mov.b v4[8], w8
469 ; CHECK-SD-NEXT: mov w8, #36 // =0x24
470 ; CHECK-SD-NEXT: mov.b v4[9], w8
471 ; CHECK-SD-NEXT: mov w8, #40 // =0x28
472 ; CHECK-SD-NEXT: mov.b v4[10], w8
473 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c
474 ; CHECK-SD-NEXT: mov.b v4[11], w8
475 ; CHECK-SD-NEXT: mov w8, #48 // =0x30
476 ; CHECK-SD-NEXT: mov.b v4[12], w8
477 ; CHECK-SD-NEXT: mov w8, #52 // =0x34
478 ; CHECK-SD-NEXT: mov.b v4[13], w8
479 ; CHECK-SD-NEXT: mov w8, #56 // =0x38
480 ; CHECK-SD-NEXT: mov.b v4[14], w8
481 ; CHECK-SD-NEXT: mov w8, #31 // =0x1f
482 ; CHECK-SD-NEXT: mov.b v4[15], w8
483 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
486 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
487 ; CHECK-GI: // %bb.0:
488 ; CHECK-GI-NEXT: mov w8, #1 // =0x1
489 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
490 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
491 ; CHECK-GI-NEXT: fmov s4, w8
492 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
493 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
494 ; CHECK-GI-NEXT: mov.b v4[1], w8
495 ; CHECK-GI-NEXT: mov.b v4[2], w8
496 ; CHECK-GI-NEXT: mov.b v4[3], w8
497 ; CHECK-GI-NEXT: mov.b v4[4], w8
498 ; CHECK-GI-NEXT: mov.b v4[5], w8
499 ; CHECK-GI-NEXT: mov.b v4[6], w8
500 ; CHECK-GI-NEXT: mov.b v4[7], w8
501 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
502 ; CHECK-GI-NEXT: mov.b v4[8], w8
503 ; CHECK-GI-NEXT: mov.b v4[9], w8
504 ; CHECK-GI-NEXT: mov.b v4[10], w8
505 ; CHECK-GI-NEXT: mov.b v4[11], w8
506 ; CHECK-GI-NEXT: mov.b v4[12], w0
507 ; CHECK-GI-NEXT: mov.b v4[13], w0
508 ; CHECK-GI-NEXT: mov.b v4[14], w8
509 ; CHECK-GI-NEXT: adrp x8, .LCPI11_1
510 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI11_1]
511 ; CHECK-GI-NEXT: adrp x8, .LCPI11_0
512 ; CHECK-GI-NEXT: mov.b v4[15], w0
513 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
514 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
515 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
516 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
518 %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
519 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
520 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
521 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
522 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
523 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
524 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
525 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
526 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
527 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
528 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
529 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
530 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
531 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
532 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
533 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
534 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
535 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
536 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
540 ; CHECK-SD-LABEL: .LCPI12_0:
541 ; CHECK-SD: .byte 0 // 0x0
542 ; CHECK-SD-NEXT: .byte 4 // 0x4
543 ; CHECK-SD-NEXT: .byte 8 // 0x8
544 ; CHECK-SD-NEXT: .byte 12 // 0xc
545 ; CHECK-SD-NEXT: .byte 16 // 0x10
546 ; CHECK-SD-NEXT: .byte 20 // 0x14
547 ; CHECK-SD-NEXT: .byte 24 // 0x18
548 ; CHECK-SD-NEXT: .byte 28 // 0x1c
549 ; CHECK-SD-NEXT: .byte 255 // 0xff
550 ; CHECK-SD-NEXT: .byte 255 // 0xff
551 ; CHECK-SD-NEXT: .byte 255 // 0xff
552 ; CHECK-SD-NEXT: .byte 255 // 0xff
553 ; CHECK-SD-NEXT: .byte 255 // 0xff
554 ; CHECK-SD-NEXT: .byte 255 // 0xff
555 ; CHECK-SD-NEXT: .byte 255 // 0xff
556 ; CHECK-SD-NEXT: .byte 255 // 0xff
558 ; CHECK-GI-LABEL: .LCPI12_0:
559 ; CHECK-GI: .byte 0 // 0x0
560 ; CHECK-GI-NEXT: .byte 1 // 0x1
561 ; CHECK-GI-NEXT: .byte 2 // 0x2
562 ; CHECK-GI-NEXT: .byte 3 // 0x3
563 ; CHECK-GI-NEXT: .byte 4 // 0x4
564 ; CHECK-GI-NEXT: .byte 5 // 0x5
565 ; CHECK-GI-NEXT: .byte 6 // 0x6
566 ; CHECK-GI-NEXT: .byte 7 // 0x7
567 ; CHECK-GI-NEXT: .byte 16 // 0x10
568 ; CHECK-GI-NEXT: .byte 17 // 0x11
569 ; CHECK-GI-NEXT: .byte 18 // 0x12
570 ; CHECK-GI-NEXT: .byte 19 // 0x13
571 ; CHECK-GI-NEXT: .byte 20 // 0x14
572 ; CHECK-GI-NEXT: .byte 21 // 0x15
573 ; CHECK-GI-NEXT: .byte 22 // 0x16
574 ; CHECK-GI-NEXT: .byte 23 // 0x17
575 ; CHECK-GI-LABEL: .LCPI12_1:
576 ; CHECK-GI: .byte 0 // 0x0
577 ; CHECK-GI-NEXT: .byte 4 // 0x4
578 ; CHECK-GI-NEXT: .byte 8 // 0x8
579 ; CHECK-GI-NEXT: .byte 12 // 0xc
580 ; CHECK-GI-NEXT: .byte 16 // 0x10
581 ; CHECK-GI-NEXT: .byte 20 // 0x14
582 ; CHECK-GI-NEXT: .byte 24 // 0x18
583 ; CHECK-GI-NEXT: .byte 28 // 0x1c
584 ; CHECK-GI-NEXT: .byte 255 // 0xff
585 ; CHECK-GI-NEXT: .byte 255 // 0xff
586 ; CHECK-GI-NEXT: .byte 255 // 0xff
587 ; CHECK-GI-NEXT: .byte 255 // 0xff
588 ; CHECK-GI-NEXT: .byte 255 // 0xff
589 ; CHECK-GI-NEXT: .byte 255 // 0xff
590 ; CHECK-GI-NEXT: .byte 255 // 0xff
591 ; CHECK-GI-NEXT: .byte 255 // 0xff
593 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
594 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
595 ; CHECK-SD: // %bb.0:
596 ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff
597 ; CHECK-SD-NEXT: adrp x8, .LCPI12_0
598 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
599 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
600 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0]
601 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
602 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
603 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
604 ; CHECK-SD-NEXT: mov.b v4[0], w0
605 ; CHECK-SD-NEXT: mov.b v4[1], w0
606 ; CHECK-SD-NEXT: mov.b v4[2], w0
607 ; CHECK-SD-NEXT: mov.b v4[3], w0
608 ; CHECK-SD-NEXT: mov.b v4[4], w0
609 ; CHECK-SD-NEXT: mov.b v4[5], w0
610 ; CHECK-SD-NEXT: mov.b v4[6], w0
611 ; CHECK-SD-NEXT: mov.b v4[7], w0
612 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4
613 ; CHECK-SD-NEXT: mov.d v2[1], v0[0]
614 ; CHECK-SD-NEXT: mov.16b v0, v2
617 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
618 ; CHECK-GI: // %bb.0:
619 ; CHECK-GI-NEXT: fmov s4, w0
620 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
621 ; CHECK-GI-NEXT: adrp x9, .LCPI12_1
622 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
623 ; CHECK-GI-NEXT: ldr q5, [x9, :lo12:.LCPI12_1]
624 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
625 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
626 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
627 ; CHECK-GI-NEXT: mov.b v4[1], w0
628 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5
629 ; CHECK-GI-NEXT: mov.b v4[2], w0
630 ; CHECK-GI-NEXT: mov.b v4[3], w0
631 ; CHECK-GI-NEXT: mov.b v4[4], w0
632 ; CHECK-GI-NEXT: mov.b v4[5], w0
633 ; CHECK-GI-NEXT: mov.b v4[6], w0
634 ; CHECK-GI-NEXT: mov.b v4[7], w0
635 ; CHECK-GI-NEXT: mov.b v4[8], w8
636 ; CHECK-GI-NEXT: mov.b v4[9], w8
637 ; CHECK-GI-NEXT: mov.b v4[10], w8
638 ; CHECK-GI-NEXT: mov.b v4[11], w8
639 ; CHECK-GI-NEXT: mov.b v4[12], w8
640 ; CHECK-GI-NEXT: mov.b v4[13], w8
641 ; CHECK-GI-NEXT: mov.b v4[14], w8
642 ; CHECK-GI-NEXT: mov.b v4[15], w8
643 ; CHECK-GI-NEXT: adrp x8, .LCPI12_0
644 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4
645 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
646 ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
648 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
649 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
650 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
651 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
652 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
653 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
654 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
655 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
656 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
657 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
658 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
659 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
660 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
661 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
662 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
663 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
664 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
665 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
666 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
670 ; CHECK-SD-LABEL: .LCPI13_0:
671 ; CHECK-SD: .byte 0 // 0x0
672 ; CHECK-SD-NEXT: .byte 4 // 0x4
673 ; CHECK-SD-NEXT: .byte 8 // 0x8
674 ; CHECK-SD-NEXT: .byte 12 // 0xc
675 ; CHECK-SD-NEXT: .byte 16 // 0x10
676 ; CHECK-SD-NEXT: .byte 20 // 0x14
677 ; CHECK-SD-NEXT: .byte 24 // 0x18
678 ; CHECK-SD-NEXT: .byte 28 // 0x1c
679 ; CHECK-SD-NEXT: .byte 255 // 0xff
680 ; CHECK-SD-NEXT: .byte 255 // 0xff
681 ; CHECK-SD-NEXT: .byte 255 // 0xff
682 ; CHECK-SD-NEXT: .byte 255 // 0xff
683 ; CHECK-SD-NEXT: .byte 255 // 0xff
684 ; CHECK-SD-NEXT: .byte 255 // 0xff
685 ; CHECK-SD-NEXT: .byte 255 // 0xff
686 ; CHECK-SD-NEXT: .byte 255 // 0xff
687 ; CHECK-SD-LABEL: .LCPI13_1:
688 ; CHECK-SD: .byte 0 // 0x0
689 ; CHECK-SD-NEXT: .byte 1 // 0x1
690 ; CHECK-SD-NEXT: .byte 2 // 0x2
691 ; CHECK-SD-NEXT: .byte 3 // 0x3
692 ; CHECK-SD-NEXT: .byte 4 // 0x4
693 ; CHECK-SD-NEXT: .byte 5 // 0x5
694 ; CHECK-SD-NEXT: .byte 6 // 0x6
695 ; CHECK-SD-NEXT: .byte 7 // 0x7
696 ; CHECK-SD-NEXT: .byte 16 // 0x10
697 ; CHECK-SD-NEXT: .byte 17 // 0x11
698 ; CHECK-SD-NEXT: .byte 18 // 0x12
699 ; CHECK-SD-NEXT: .byte 19 // 0x13
700 ; CHECK-SD-NEXT: .byte 20 // 0x14
701 ; CHECK-SD-NEXT: .byte 21 // 0x15
702 ; CHECK-SD-NEXT: .byte 30 // 0x1e
703 ; CHECK-SD-NEXT: .byte 31 // 0x1f
705 ; CHECK-GI-LABEL: .LCPI13_0:
706 ; CHECK-GI: .byte 0 // 0x0
707 ; CHECK-GI-NEXT: .byte 1 // 0x1
708 ; CHECK-GI-NEXT: .byte 2 // 0x2
709 ; CHECK-GI-NEXT: .byte 3 // 0x3
710 ; CHECK-GI-NEXT: .byte 4 // 0x4
711 ; CHECK-GI-NEXT: .byte 5 // 0x5
712 ; CHECK-GI-NEXT: .byte 6 // 0x6
713 ; CHECK-GI-NEXT: .byte 7 // 0x7
714 ; CHECK-GI-NEXT: .byte 16 // 0x10
715 ; CHECK-GI-NEXT: .byte 17 // 0x11
716 ; CHECK-GI-NEXT: .byte 18 // 0x12
717 ; CHECK-GI-NEXT: .byte 19 // 0x13
718 ; CHECK-GI-NEXT: .byte 20 // 0x14
719 ; CHECK-GI-NEXT: .byte 21 // 0x15
720 ; CHECK-GI-NEXT: .byte 30 // 0x1e
721 ; CHECK-GI-NEXT: .byte 31 // 0x1f
722 ; CHECK-GI-LABEL: .LCPI13_1:
723 ; CHECK-GI: .byte 0 // 0x0
724 ; CHECK-GI-NEXT: .byte 4 // 0x4
725 ; CHECK-GI-NEXT: .byte 8 // 0x8
726 ; CHECK-GI-NEXT: .byte 12 // 0xc
727 ; CHECK-GI-NEXT: .byte 16 // 0x10
728 ; CHECK-GI-NEXT: .byte 20 // 0x14
729 ; CHECK-GI-NEXT: .byte 24 // 0x18
730 ; CHECK-GI-NEXT: .byte 28 // 0x1c
731 ; CHECK-GI-NEXT: .byte 255 // 0xff
732 ; CHECK-GI-NEXT: .byte 255 // 0xff
733 ; CHECK-GI-NEXT: .byte 255 // 0xff
734 ; CHECK-GI-NEXT: .byte 255 // 0xff
735 ; CHECK-GI-NEXT: .byte 255 // 0xff
736 ; CHECK-GI-NEXT: .byte 255 // 0xff
737 ; CHECK-GI-NEXT: .byte 255 // 0xff
738 ; CHECK-GI-NEXT: .byte 255 // 0xff
740 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
741 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
742 ; CHECK-SD: // %bb.0:
743 ; CHECK-SD-NEXT: dup.16b v4, w0
744 ; CHECK-SD-NEXT: mov w8, #255 // =0xff
745 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
746 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
747 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
748 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
749 ; CHECK-SD-NEXT: mov.b v4[8], w8
750 ; CHECK-SD-NEXT: mov.b v4[9], w8
751 ; CHECK-SD-NEXT: mov.b v4[10], w8
752 ; CHECK-SD-NEXT: mov.b v4[11], w8
753 ; CHECK-SD-NEXT: mov.b v4[12], w8
754 ; CHECK-SD-NEXT: mov.b v4[13], w8
755 ; CHECK-SD-NEXT: adrp x8, .LCPI13_0
756 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0]
757 ; CHECK-SD-NEXT: adrp x8, .LCPI13_1
758 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
759 ; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4
760 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1]
761 ; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0
764 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
765 ; CHECK-GI: // %bb.0:
766 ; CHECK-GI-NEXT: fmov s4, w0
767 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
768 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
769 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
770 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
771 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
772 ; CHECK-GI-NEXT: mov.b v4[1], w0
773 ; CHECK-GI-NEXT: mov.b v4[2], w0
774 ; CHECK-GI-NEXT: mov.b v4[3], w0
775 ; CHECK-GI-NEXT: mov.b v4[4], w0
776 ; CHECK-GI-NEXT: mov.b v4[5], w0
777 ; CHECK-GI-NEXT: mov.b v4[6], w0
778 ; CHECK-GI-NEXT: mov.b v4[7], w0
779 ; CHECK-GI-NEXT: mov.b v4[8], w8
780 ; CHECK-GI-NEXT: mov.b v4[9], w8
781 ; CHECK-GI-NEXT: mov.b v4[10], w8
782 ; CHECK-GI-NEXT: mov.b v4[11], w8
783 ; CHECK-GI-NEXT: mov.b v4[12], w8
784 ; CHECK-GI-NEXT: mov.b v4[13], w8
785 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1
786 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI13_1]
787 ; CHECK-GI-NEXT: adrp x8, .LCPI13_0
788 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5
789 ; CHECK-GI-NEXT: mov.b v4[14], w0
790 ; CHECK-GI-NEXT: mov.b v4[15], w0
791 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4
792 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
793 ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
795 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
796 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
797 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
798 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
799 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
800 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
801 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
802 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
803 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
804 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
805 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
806 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
807 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
808 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
809 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
810 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
811 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
812 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
813 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
817 ; CHECK-SD-LABEL: .LCPI14_0:
818 ; CHECK-SD: .byte 0 // 0x0
819 ; CHECK-SD-NEXT: .byte 4 // 0x4
820 ; CHECK-SD-NEXT: .byte 52 // 0x34
821 ; CHECK-SD-NEXT: .byte 12 // 0xc
822 ; CHECK-SD-NEXT: .byte 16 // 0x10
823 ; CHECK-SD-NEXT: .byte 20 // 0x14
824 ; CHECK-SD-NEXT: .byte 24 // 0x18
825 ; CHECK-SD-NEXT: .byte 28 // 0x1c
826 ; CHECK-SD-NEXT: .byte 32 // 0x20
827 ; CHECK-SD-NEXT: .byte 36 // 0x24
828 ; CHECK-SD-NEXT: .byte 40 // 0x28
829 ; CHECK-SD-NEXT: .byte 44 // 0x2c
830 ; CHECK-SD-NEXT: .byte 48 // 0x30
831 ; CHECK-SD-NEXT: .byte 52 // 0x34
832 ; CHECK-SD-NEXT: .byte 56 // 0x38
833 ; CHECK-SD-NEXT: .byte 60 // 0x3c
835 ; CHECK-GI-LABEL: .LCPI14_0:
836 ; CHECK-GI: .byte 0 // 0x0
837 ; CHECK-GI-NEXT: .byte 1 // 0x1
838 ; CHECK-GI-NEXT: .byte 21 // 0x15
839 ; CHECK-GI-NEXT: .byte 3 // 0x3
840 ; CHECK-GI-NEXT: .byte 4 // 0x4
841 ; CHECK-GI-NEXT: .byte 5 // 0x5
842 ; CHECK-GI-NEXT: .byte 6 // 0x6
843 ; CHECK-GI-NEXT: .byte 7 // 0x7
844 ; CHECK-GI-NEXT: .byte 16 // 0x10
845 ; CHECK-GI-NEXT: .byte 17 // 0x11
846 ; CHECK-GI-NEXT: .byte 18 // 0x12
847 ; CHECK-GI-NEXT: .byte 19 // 0x13
848 ; CHECK-GI-NEXT: .byte 20 // 0x14
849 ; CHECK-GI-NEXT: .byte 21 // 0x15
850 ; CHECK-GI-NEXT: .byte 22 // 0x16
851 ; CHECK-GI-NEXT: .byte 23 // 0x17
852 ; CHECK-GI-LABEL: .LCPI14_1:
853 ; CHECK-GI: .byte 0 // 0x0
854 ; CHECK-GI-NEXT: .byte 4 // 0x4
855 ; CHECK-GI-NEXT: .byte 8 // 0x8
856 ; CHECK-GI-NEXT: .byte 12 // 0xc
857 ; CHECK-GI-NEXT: .byte 16 // 0x10
858 ; CHECK-GI-NEXT: .byte 20 // 0x14
859 ; CHECK-GI-NEXT: .byte 24 // 0x18
860 ; CHECK-GI-NEXT: .byte 28 // 0x1c
861 ; CHECK-GI-NEXT: .byte 255 // 0xff
862 ; CHECK-GI-NEXT: .byte 255 // 0xff
863 ; CHECK-GI-NEXT: .byte 255 // 0xff
864 ; CHECK-GI-NEXT: .byte 255 // 0xff
865 ; CHECK-GI-NEXT: .byte 255 // 0xff
866 ; CHECK-GI-NEXT: .byte 255 // 0xff
867 ; CHECK-GI-NEXT: .byte 255 // 0xff
868 ; CHECK-GI-NEXT: .byte 255 // 0xff
870 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
871 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
872 ; CHECK-SD: // %bb.0:
873 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
874 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0
875 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
876 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
877 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
878 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
879 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
882 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
883 ; CHECK-GI: // %bb.0:
884 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1
885 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
886 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
887 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1]
888 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
889 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
890 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0
891 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
892 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
893 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
894 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
896 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
897 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
898 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
902 ; CHECK-SD-LABEL: .LCPI15_0:
903 ; CHECK-SD: .byte 0 // 0x0
904 ; CHECK-SD-NEXT: .byte 4 // 0x4
905 ; CHECK-SD-NEXT: .byte 52 // 0x34
906 ; CHECK-SD-NEXT: .byte 12 // 0xc
907 ; CHECK-SD-NEXT: .byte 16 // 0x10
908 ; CHECK-SD-NEXT: .byte 20 // 0x14
909 ; CHECK-SD-NEXT: .byte 24 // 0x18
910 ; CHECK-SD-NEXT: .byte 28 // 0x1c
911 ; CHECK-SD-NEXT: .byte 32 // 0x20
912 ; CHECK-SD-NEXT: .byte 36 // 0x24
913 ; CHECK-SD-NEXT: .byte 40 // 0x28
914 ; CHECK-SD-NEXT: .byte 44 // 0x2c
915 ; CHECK-SD-NEXT: .byte 48 // 0x30
916 ; CHECK-SD-NEXT: .byte 52 // 0x34
917 ; CHECK-SD-NEXT: .byte 56 // 0x38
918 ; CHECK-SD-NEXT: .byte 60 // 0x3c
920 ; CHECK-GI-LABEL: .LCPI15_0:
921 ; CHECK-GI: .byte 0 // 0x0
922 ; CHECK-GI-NEXT: .byte 1 // 0x1
923 ; CHECK-GI-NEXT: .byte 21 // 0x15
924 ; CHECK-GI-NEXT: .byte 3 // 0x3
925 ; CHECK-GI-NEXT: .byte 4 // 0x4
926 ; CHECK-GI-NEXT: .byte 5 // 0x5
927 ; CHECK-GI-NEXT: .byte 6 // 0x6
928 ; CHECK-GI-NEXT: .byte 7 // 0x7
929 ; CHECK-GI-NEXT: .byte 16 // 0x10
930 ; CHECK-GI-NEXT: .byte 17 // 0x11
931 ; CHECK-GI-NEXT: .byte 18 // 0x12
932 ; CHECK-GI-NEXT: .byte 19 // 0x13
933 ; CHECK-GI-NEXT: .byte 20 // 0x14
934 ; CHECK-GI-NEXT: .byte 21 // 0x15
935 ; CHECK-GI-NEXT: .byte 22 // 0x16
936 ; CHECK-GI-NEXT: .byte 23 // 0x17
937 ; CHECK-GI-LABEL: .LCPI15_1:
938 ; CHECK-GI: .byte 0 // 0x0
939 ; CHECK-GI-NEXT: .byte 4 // 0x4
940 ; CHECK-GI-NEXT: .byte 8 // 0x8
941 ; CHECK-GI-NEXT: .byte 12 // 0xc
942 ; CHECK-GI-NEXT: .byte 16 // 0x10
943 ; CHECK-GI-NEXT: .byte 20 // 0x14
944 ; CHECK-GI-NEXT: .byte 24 // 0x18
945 ; CHECK-GI-NEXT: .byte 28 // 0x1c
946 ; CHECK-GI-NEXT: .byte 255 // 0xff
947 ; CHECK-GI-NEXT: .byte 255 // 0xff
948 ; CHECK-GI-NEXT: .byte 255 // 0xff
949 ; CHECK-GI-NEXT: .byte 255 // 0xff
950 ; CHECK-GI-NEXT: .byte 255 // 0xff
951 ; CHECK-GI-NEXT: .byte 255 // 0xff
952 ; CHECK-GI-NEXT: .byte 255 // 0xff
953 ; CHECK-GI-NEXT: .byte 255 // 0xff
954 ; CHECK-GI-LABEL: .LCPI15_2:
955 ; CHECK-GI: .byte 0 // 0x0
956 ; CHECK-GI-NEXT: .byte 4 // 0x4
957 ; CHECK-GI-NEXT: .byte 8 // 0x8
958 ; CHECK-GI-NEXT: .byte 12 // 0xc
959 ; CHECK-GI-NEXT: .byte 16 // 0x10
960 ; CHECK-GI-NEXT: .byte 20 // 0x14
961 ; CHECK-GI-NEXT: .byte 24 // 0x18
962 ; CHECK-GI-NEXT: .byte 28 // 0x1c
963 ; CHECK-GI-NEXT: .byte 0 // 0x0
964 ; CHECK-GI-NEXT: .byte 255 // 0xff
965 ; CHECK-GI-NEXT: .byte 255 // 0xff
966 ; CHECK-GI-NEXT: .byte 255 // 0xff
967 ; CHECK-GI-NEXT: .byte 255 // 0xff
968 ; CHECK-GI-NEXT: .byte 255 // 0xff
969 ; CHECK-GI-NEXT: .byte 255 // 0xff
970 ; CHECK-GI-NEXT: .byte 255 // 0xff
972 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
973 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
974 ; CHECK-SD: // %bb.0:
975 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
976 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0
977 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
978 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
979 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
980 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
981 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
984 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
985 ; CHECK-GI: // %bb.0:
986 ; CHECK-GI-NEXT: adrp x8, .LCPI15_2
987 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
988 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
989 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2]
990 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1
991 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
992 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
993 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1]
994 ; CHECK-GI-NEXT: adrp x8, .LCPI15_0
995 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
996 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
997 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
998 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
1000 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1001 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1002 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1006 ; CHECK-SD-LABEL: .LCPI16_0:
1007 ; CHECK-SD: .byte 0 // 0x0
1008 ; CHECK-SD-NEXT: .byte 4 // 0x4
1009 ; CHECK-SD-NEXT: .byte 52 // 0x34
1010 ; CHECK-SD-NEXT: .byte 12 // 0xc
1011 ; CHECK-SD-NEXT: .byte 16 // 0x10
1012 ; CHECK-SD-NEXT: .byte 20 // 0x14
1013 ; CHECK-SD-NEXT: .byte 24 // 0x18
1014 ; CHECK-SD-NEXT: .byte 28 // 0x1c
1015 ; CHECK-SD-NEXT: .byte 32 // 0x20
1016 ; CHECK-SD-NEXT: .byte 36 // 0x24
1017 ; CHECK-SD-NEXT: .byte 40 // 0x28
1018 ; CHECK-SD-NEXT: .byte 44 // 0x2c
1019 ; CHECK-SD-NEXT: .byte 48 // 0x30
1020 ; CHECK-SD-NEXT: .byte 52 // 0x34
1021 ; CHECK-SD-NEXT: .byte 56 // 0x38
1022 ; CHECK-SD-NEXT: .byte 60 // 0x3c
1024 ; CHECK-GI-LABEL: .LCPI16_0:
1025 ; CHECK-GI: .byte 0 // 0x0
1026 ; CHECK-GI-NEXT: .byte 1 // 0x1
1027 ; CHECK-GI-NEXT: .byte 21 // 0x15
1028 ; CHECK-GI-NEXT: .byte 3 // 0x3
1029 ; CHECK-GI-NEXT: .byte 4 // 0x4
1030 ; CHECK-GI-NEXT: .byte 5 // 0x5
1031 ; CHECK-GI-NEXT: .byte 6 // 0x6
1032 ; CHECK-GI-NEXT: .byte 7 // 0x7
1033 ; CHECK-GI-NEXT: .byte 16 // 0x10
1034 ; CHECK-GI-NEXT: .byte 17 // 0x11
1035 ; CHECK-GI-NEXT: .byte 18 // 0x12
1036 ; CHECK-GI-NEXT: .byte 19 // 0x13
1037 ; CHECK-GI-NEXT: .byte 20 // 0x14
1038 ; CHECK-GI-NEXT: .byte 21 // 0x15
1039 ; CHECK-GI-NEXT: .byte 22 // 0x16
1040 ; CHECK-GI-NEXT: .byte 23 // 0x17
1041 ; CHECK-GI-LABEL: .LCPI16_1:
1042 ; CHECK-GI: .byte 0 // 0x0
1043 ; CHECK-GI-NEXT: .byte 4 // 0x4
1044 ; CHECK-GI-NEXT: .byte 8 // 0x8
1045 ; CHECK-GI-NEXT: .byte 12 // 0xc
1046 ; CHECK-GI-NEXT: .byte 16 // 0x10
1047 ; CHECK-GI-NEXT: .byte 20 // 0x14
1048 ; CHECK-GI-NEXT: .byte 24 // 0x18
1049 ; CHECK-GI-NEXT: .byte 28 // 0x1c
1050 ; CHECK-GI-NEXT: .byte 0 // 0x0
1051 ; CHECK-GI-NEXT: .byte 255 // 0xff
1052 ; CHECK-GI-NEXT: .byte 255 // 0xff
1053 ; CHECK-GI-NEXT: .byte 255 // 0xff
1054 ; CHECK-GI-NEXT: .byte 255 // 0xff
1055 ; CHECK-GI-NEXT: .byte 255 // 0xff
1056 ; CHECK-GI-NEXT: .byte 255 // 0xff
1057 ; CHECK-GI-NEXT: .byte 255 // 0xff
1058 ; CHECK-GI-LABEL: .LCPI16_2:
1059 ; CHECK-GI: .byte 0 // 0x0
1060 ; CHECK-GI-NEXT: .byte 4 // 0x4
1061 ; CHECK-GI-NEXT: .byte 8 // 0x8
1062 ; CHECK-GI-NEXT: .byte 12 // 0xc
1063 ; CHECK-GI-NEXT: .byte 16 // 0x10
1064 ; CHECK-GI-NEXT: .byte 20 // 0x14
1065 ; CHECK-GI-NEXT: .byte 24 // 0x18
1066 ; CHECK-GI-NEXT: .byte 28 // 0x1c
1067 ; CHECK-GI-NEXT: .byte 255 // 0xff
1068 ; CHECK-GI-NEXT: .byte 255 // 0xff
1069 ; CHECK-GI-NEXT: .byte 255 // 0xff
1070 ; CHECK-GI-NEXT: .byte 255 // 0xff
1071 ; CHECK-GI-NEXT: .byte 255 // 0xff
1072 ; CHECK-GI-NEXT: .byte 255 // 0xff
1073 ; CHECK-GI-NEXT: .byte 255 // 0xff
1074 ; CHECK-GI-NEXT: .byte 255 // 0xff
1076 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
1077 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
1078 ; CHECK-SD: // %bb.0:
1079 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1080 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0
1081 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1082 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
1083 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1084 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1085 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
1086 ; CHECK-SD-NEXT: ret
1088 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
1089 ; CHECK-GI: // %bb.0:
1090 ; CHECK-GI-NEXT: adrp x8, .LCPI16_2
1091 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1092 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
1093 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
1094 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1
1095 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1096 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
1097 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1]
1098 ; CHECK-GI-NEXT: adrp x8, .LCPI16_0
1099 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
1100 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
1101 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
1102 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
1103 ; CHECK-GI-NEXT: ret
1104 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1105 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1106 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1110 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
1111 declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1112 declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1113 declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1114 declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1115 declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1116 declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1117 declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1119 define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
1120 ; CHECK-LABEL: tbx1_8b:
1122 ; CHECK-NEXT: tbx.8b v0, { v1 }, v2
1124 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
1128 define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
1129 ; CHECK-LABEL: tbx1_16b:
1131 ; CHECK-NEXT: tbx.16b v0, { v1 }, v2
1133 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
1137 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
1138 ; CHECK-SD-LABEL: tbx2_8b:
1139 ; CHECK-SD: // %bb.0:
1140 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1141 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1142 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3
1143 ; CHECK-SD-NEXT: ret
1145 ; CHECK-GI-LABEL: tbx2_8b:
1146 ; CHECK-GI: // %bb.0:
1147 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1148 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1149 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3
1150 ; CHECK-GI-NEXT: ret
1151 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
1155 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
1156 ; CHECK-SD-LABEL: tbx2_16b:
1157 ; CHECK-SD: // %bb.0:
1158 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1159 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1160 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3
1161 ; CHECK-SD-NEXT: ret
1163 ; CHECK-GI-LABEL: tbx2_16b:
1164 ; CHECK-GI: // %bb.0:
1165 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1166 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1167 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3
1168 ; CHECK-GI-NEXT: ret
1169 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
1173 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
1174 ; CHECK-SD-LABEL: tbx3_8b:
1175 ; CHECK-SD: // %bb.0:
1176 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1177 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1178 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1179 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
1180 ; CHECK-SD-NEXT: ret
1182 ; CHECK-GI-LABEL: tbx3_8b:
1183 ; CHECK-GI: // %bb.0:
1184 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1185 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1186 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1187 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
1188 ; CHECK-GI-NEXT: ret
1189 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
1193 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
1194 ; CHECK-SD-LABEL: tbx3_16b:
1195 ; CHECK-SD: // %bb.0:
1196 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1197 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1198 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1199 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
1200 ; CHECK-SD-NEXT: ret
1202 ; CHECK-GI-LABEL: tbx3_16b:
1203 ; CHECK-GI: // %bb.0:
1204 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1205 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1206 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1207 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
1208 ; CHECK-GI-NEXT: ret
1209 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
1213 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
1214 ; CHECK-SD-LABEL: tbx4_8b:
1215 ; CHECK-SD: // %bb.0:
1216 ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1217 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1218 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1219 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1220 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
1221 ; CHECK-SD-NEXT: ret
1223 ; CHECK-GI-LABEL: tbx4_8b:
1224 ; CHECK-GI: // %bb.0:
1225 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1226 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1227 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1228 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1229 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
1230 ; CHECK-GI-NEXT: ret
1231 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
1235 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
1236 ; CHECK-SD-LABEL: tbx4_16b:
1237 ; CHECK-SD: // %bb.0:
1238 ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1239 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1240 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1241 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1242 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
1243 ; CHECK-SD-NEXT: ret
1245 ; CHECK-GI-LABEL: tbx4_16b:
1246 ; CHECK-GI: // %bb.0:
1247 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1248 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1249 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1250 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1251 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
1252 ; CHECK-GI-NEXT: ret
1253 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
1257 declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1258 declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1259 declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1260 declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1261 declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1262 declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1263 declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1264 declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone