1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
6 ; CHECK-LABEL: tbl1_8b:
8 ; CHECK-NEXT: tbl.8b v0, { v0 }, v1
10 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
14 define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
15 ; CHECK-LABEL: tbl1_16b:
17 ; CHECK-NEXT: tbl.16b v0, { v0 }, v1
19 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
23 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
24 ; CHECK-SD-LABEL: tbl2_8b:
26 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
27 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
28 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
31 ; CHECK-GI-LABEL: tbl2_8b:
33 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
34 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
35 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
37 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
41 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
42 ; CHECK-SD-LABEL: tbl2_16b:
44 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
45 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
46 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
49 ; CHECK-GI-LABEL: tbl2_16b:
51 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
52 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
53 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
55 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
59 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
60 ; CHECK-SD-LABEL: tbl3_8b:
62 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
63 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
64 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
65 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
68 ; CHECK-GI-LABEL: tbl3_8b:
70 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
71 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
72 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
73 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
79 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
80 ; CHECK-SD-LABEL: tbl3_16b:
82 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
83 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
84 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
85 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
88 ; CHECK-GI-LABEL: tbl3_16b:
90 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
91 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
92 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
93 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
95 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
99 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
100 ; CHECK-SD-LABEL: tbl4_8b:
101 ; CHECK-SD: // %bb.0:
102 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
103 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
104 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
105 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
106 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
109 ; CHECK-GI-LABEL: tbl4_8b:
110 ; CHECK-GI: // %bb.0:
111 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
112 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
113 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
114 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
115 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
117 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
121 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
122 ; CHECK-SD-LABEL: tbl4_16b:
123 ; CHECK-SD: // %bb.0:
124 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
125 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
126 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
127 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
128 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
131 ; CHECK-GI-LABEL: tbl4_16b:
132 ; CHECK-GI: // %bb.0:
133 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
134 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
135 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
136 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
137 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
143 ; CHECK-SD-LABEL: .LCPI8_0:
144 ; CHECK-SD: .byte 0 // 0x0
145 ; CHECK-SD-NEXT: .byte 4 // 0x4
146 ; CHECK-SD-NEXT: .byte 8 // 0x8
147 ; CHECK-SD-NEXT: .byte 12 // 0xc
148 ; CHECK-SD-NEXT: .byte 255 // 0xff
149 ; CHECK-SD-NEXT: .byte 255 // 0xff
150 ; CHECK-SD-NEXT: .byte 255 // 0xff
151 ; CHECK-SD-NEXT: .byte 255 // 0xff
153 ; CHECK-GI-LABEL: .LCPI8_0:
154 ; CHECK-GI: .byte 0 // 0x0
155 ; CHECK-GI-NEXT: .byte 1 // 0x1
156 ; CHECK-GI-NEXT: .byte 2 // 0x2
157 ; CHECK-GI-NEXT: .byte 3 // 0x3
158 ; CHECK-GI-NEXT: .byte 12 // 0xc
159 ; CHECK-GI-NEXT: .byte 13 // 0xd
160 ; CHECK-GI-NEXT: .byte 14 // 0xe
161 ; CHECK-GI-NEXT: .byte 15 // 0xf
162 ; CHECK-GI-LABEL: .LCPI8_1:
163 ; CHECK-GI: .byte 0 // 0x0
164 ; CHECK-GI-NEXT: .byte 4 // 0x4
165 ; CHECK-GI-NEXT: .byte 8 // 0x8
166 ; CHECK-GI-NEXT: .byte 12 // 0xc
167 ; CHECK-GI-NEXT: .byte 255 // 0xff
168 ; CHECK-GI-NEXT: .byte 255 // 0xff
169 ; CHECK-GI-NEXT: .byte 255 // 0xff
170 ; CHECK-GI-NEXT: .byte 255 // 0xff
172 define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
173 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
174 ; CHECK-SD: // %bb.0:
175 ; CHECK-SD-NEXT: adrp x8, .LCPI8_0
176 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
177 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
178 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
179 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
180 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
181 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
182 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
183 ; CHECK-SD-NEXT: mov.s v0[1], v1[1]
184 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
187 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
188 ; CHECK-GI: // %bb.0:
189 ; CHECK-GI-NEXT: adrp x8, .LCPI8_1
190 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
191 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
192 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
193 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
194 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
195 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0
196 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
197 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
198 ; CHECK-GI-NEXT: mov.d v0[1], v1[0]
199 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
200 ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
201 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
203 %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
204 %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
205 %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
209 ; CHECK-SD-LABEL: .LCPI9_0:
210 ; CHECK-SD-NEXT: .byte 0 // 0x0
211 ; CHECK-SD-NEXT: .byte 4 // 0x4
212 ; CHECK-SD-NEXT: .byte 8 // 0x8
213 ; CHECK-SD-NEXT: .byte 12 // 0xc
214 ; CHECK-SD-NEXT: .byte 16 // 0x10
215 ; CHECK-SD-NEXT: .byte 20 // 0x14
216 ; CHECK-SD-NEXT: .byte 24 // 0x18
217 ; CHECK-SD-NEXT: .byte 28 // 0x1c
218 ; CHECK-SD-NEXT: .byte 32 // 0x20
219 ; CHECK-SD-NEXT: .byte 36 // 0x24
220 ; CHECK-SD-NEXT: .byte 40 // 0x28
221 ; CHECK-SD-NEXT: .byte 44 // 0x2c
222 ; CHECK-SD-NEXT: .byte 48 // 0x30
223 ; CHECK-SD-NEXT: .byte 52 // 0x34
224 ; CHECK-SD-NEXT: .byte 56 // 0x38
225 ; CHECK-SD-NEXT: .byte 60 // 0x3c
227 ;CHECK-GI-LABEL: .LCPI9_0:
228 ;CHECK-GI: .byte 0 // 0x0
229 ;CHECK-GI-NEXT: .byte 1 // 0x1
230 ;CHECK-GI-NEXT: .byte 2 // 0x2
231 ;CHECK-GI-NEXT: .byte 3 // 0x3
232 ;CHECK-GI-NEXT: .byte 4 // 0x4
233 ;CHECK-GI-NEXT: .byte 5 // 0x5
234 ;CHECK-GI-NEXT: .byte 6 // 0x6
235 ;CHECK-GI-NEXT: .byte 7 // 0x7
236 ;CHECK-GI-NEXT: .byte 16 // 0x10
237 ;CHECK-GI-NEXT: .byte 17 // 0x11
238 ;CHECK-GI-NEXT: .byte 18 // 0x12
239 ;CHECK-GI-NEXT: .byte 19 // 0x13
240 ;CHECK-GI-NEXT: .byte 20 // 0x14
241 ;CHECK-GI-NEXT: .byte 21 // 0x15
242 ;CHECK-GI-NEXT: .byte 22 // 0x16
243 ;CHECK-GI-NEXT: .byte 23 // 0x17
244 ;CHECK-GI-LABEL: .LCPI9_1:
245 ;CHECK-GI: .byte 0 // 0x0
246 ;CHECK-GI-NEXT: .byte 4 // 0x4
247 ;CHECK-GI-NEXT: .byte 8 // 0x8
248 ;CHECK-GI-NEXT: .byte 12 // 0xc
249 ;CHECK-GI-NEXT: .byte 16 // 0x10
250 ;CHECK-GI-NEXT: .byte 20 // 0x14
251 ;CHECK-GI-NEXT: .byte 24 // 0x18
252 ;CHECK-GI-NEXT: .byte 28 // 0x1c
253 ;CHECK-GI-NEXT: .byte 255 // 0xff
254 ;CHECK-GI-NEXT: .byte 255 // 0xff
255 ;CHECK-GI-NEXT: .byte 255 // 0xff
256 ;CHECK-GI-NEXT: .byte 255 // 0xff
257 ;CHECK-GI-NEXT: .byte 255 // 0xff
258 ;CHECK-GI-NEXT: .byte 255 // 0xff
259 ;CHECK-GI-NEXT: .byte 255 // 0xff
260 ;CHECK-GI-NEXT: .byte 255 // 0xff
262 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
263 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
264 ; CHECK-SD: // %bb.0:
265 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
266 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0
267 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
268 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
269 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
270 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
271 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
274 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
275 ; CHECK-GI: // %bb.0:
276 ; CHECK-GI-NEXT: adrp x8, .LCPI9_1
277 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
278 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
279 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
280 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
281 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
282 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0
283 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
284 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
285 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
286 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
288 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
289 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
290 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
294 ; CHECK-GI-LABEL: .LCPI10_0:
295 ; CHECK-GI: .byte 0 // 0x0
296 ; CHECK-GI-NEXT: .byte 1 // 0x1
297 ; CHECK-GI-NEXT: .byte 2 // 0x2
298 ; CHECK-GI-NEXT: .byte 3 // 0x3
299 ; CHECK-GI-NEXT: .byte 4 // 0x4
300 ; CHECK-GI-NEXT: .byte 5 // 0x5
301 ; CHECK-GI-NEXT: .byte 6 // 0x6
302 ; CHECK-GI-NEXT: .byte 7 // 0x7
303 ; CHECK-GI-NEXT: .byte 16 // 0x10
304 ; CHECK-GI-NEXT: .byte 17 // 0x11
305 ; CHECK-GI-NEXT: .byte 18 // 0x12
306 ; CHECK-GI-NEXT: .byte 19 // 0x13
307 ; CHECK-GI-NEXT: .byte 20 // 0x14
308 ; CHECK-GI-NEXT: .byte 21 // 0x15
309 ; CHECK-GI-NEXT: .byte 22 // 0x16
310 ; CHECK-GI-NEXT: .byte 23 // 0x17
311 ; CHECK-GI-LABEL: .LCPI10_1:
312 ; CHECK-GI: .byte 0 // 0x0
313 ; CHECK-GI-NEXT: .byte 4 // 0x4
314 ; CHECK-GI-NEXT: .byte 8 // 0x8
315 ; CHECK-GI-NEXT: .byte 12 // 0xc
316 ; CHECK-GI-NEXT: .byte 16 // 0x10
317 ; CHECK-GI-NEXT: .byte 20 // 0x14
318 ; CHECK-GI-NEXT: .byte 24 // 0x18
319 ; CHECK-GI-NEXT: .byte 28 // 0x1c
320 ; CHECK-GI-NEXT: .byte 255 // 0xff
321 ; CHECK-GI-NEXT: .byte 255 // 0xff
322 ; CHECK-GI-NEXT: .byte 255 // 0xff
323 ; CHECK-GI-NEXT: .byte 255 // 0xff
324 ; CHECK-GI-NEXT: .byte 255 // 0xff
325 ; CHECK-GI-NEXT: .byte 255 // 0xff
326 ; CHECK-GI-NEXT: .byte 255 // 0xff
327 ; CHECK-GI-NEXT: .byte 255 // 0xff
329 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
330 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
331 ; CHECK-SD: // %bb.0:
332 ; CHECK-SD-NEXT: fmov s4, w0
333 ; CHECK-SD-NEXT: mov w8, #32 // =0x20
334 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
335 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
336 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
337 ; CHECK-SD-NEXT: mov.b v4[1], w0
338 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
339 ; CHECK-SD-NEXT: mov.b v4[2], w0
340 ; CHECK-SD-NEXT: mov.b v4[3], w0
341 ; CHECK-SD-NEXT: mov.b v4[4], w0
342 ; CHECK-SD-NEXT: mov.b v4[5], w0
343 ; CHECK-SD-NEXT: mov.b v4[6], w0
344 ; CHECK-SD-NEXT: mov.b v4[7], w0
345 ; CHECK-SD-NEXT: mov.b v4[8], w8
346 ; CHECK-SD-NEXT: mov w8, #36 // =0x24
347 ; CHECK-SD-NEXT: mov.b v4[9], w8
348 ; CHECK-SD-NEXT: mov w8, #40 // =0x28
349 ; CHECK-SD-NEXT: mov.b v4[10], w8
350 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c
351 ; CHECK-SD-NEXT: mov.b v4[11], w8
352 ; CHECK-SD-NEXT: mov w8, #48 // =0x30
353 ; CHECK-SD-NEXT: mov.b v4[12], w8
354 ; CHECK-SD-NEXT: mov w8, #52 // =0x34
355 ; CHECK-SD-NEXT: mov.b v4[13], w8
356 ; CHECK-SD-NEXT: mov w8, #56 // =0x38
357 ; CHECK-SD-NEXT: mov.b v4[14], w8
358 ; CHECK-SD-NEXT: mov w8, #60 // =0x3c
359 ; CHECK-SD-NEXT: mov.b v4[15], w8
360 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
363 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
364 ; CHECK-GI: // %bb.0:
365 ; CHECK-GI-NEXT: fmov s4, w0
366 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
367 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
368 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
369 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
370 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
371 ; CHECK-GI-NEXT: mov.16b v5, v4
372 ; CHECK-GI-NEXT: mov.b v5[1], v4[0]
373 ; CHECK-GI-NEXT: mov.b v5[2], v4[0]
374 ; CHECK-GI-NEXT: mov.b v5[3], v4[0]
375 ; CHECK-GI-NEXT: mov.b v5[4], v4[0]
376 ; CHECK-GI-NEXT: mov.b v5[5], v4[0]
377 ; CHECK-GI-NEXT: mov.b v5[6], v4[0]
378 ; CHECK-GI-NEXT: mov.b v5[7], v4[0]
379 ; CHECK-GI-NEXT: fmov s4, w8
380 ; CHECK-GI-NEXT: adrp x8, .LCPI10_1
381 ; CHECK-GI-NEXT: mov.b v5[8], v4[0]
382 ; CHECK-GI-NEXT: mov.b v5[9], v4[0]
383 ; CHECK-GI-NEXT: mov.b v5[10], v4[0]
384 ; CHECK-GI-NEXT: mov.b v5[11], v4[0]
385 ; CHECK-GI-NEXT: mov.b v5[12], v4[0]
386 ; CHECK-GI-NEXT: mov.b v5[13], v4[0]
387 ; CHECK-GI-NEXT: mov.b v5[14], v4[0]
388 ; CHECK-GI-NEXT: mov.b v5[15], v4[0]
389 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI10_1]
390 ; CHECK-GI-NEXT: adrp x8, .LCPI10_0
391 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5
392 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
393 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
394 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
396 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
397 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
398 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
399 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
400 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
401 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
402 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
403 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
404 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
405 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
406 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
407 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
408 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
409 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
410 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
411 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
412 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
413 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
414 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
418 ; CHECK-GI-LABEL: .LCPI11_0:
419 ; CHECK-GI: .byte 0 // 0x0
420 ; CHECK-GI-NEXT: .byte 1 // 0x1
421 ; CHECK-GI-NEXT: .byte 2 // 0x2
422 ; CHECK-GI-NEXT: .byte 3 // 0x3
423 ; CHECK-GI-NEXT: .byte 4 // 0x4
424 ; CHECK-GI-NEXT: .byte 5 // 0x5
425 ; CHECK-GI-NEXT: .byte 6 // 0x6
426 ; CHECK-GI-NEXT: .byte 15 // 0xf
427 ; CHECK-GI-NEXT: .byte 16 // 0x10
428 ; CHECK-GI-NEXT: .byte 17 // 0x11
429 ; CHECK-GI-NEXT: .byte 18 // 0x12
430 ; CHECK-GI-NEXT: .byte 19 // 0x13
431 ; CHECK-GI-NEXT: .byte 20 // 0x14
432 ; CHECK-GI-NEXT: .byte 21 // 0x15
433 ; CHECK-GI-NEXT: .byte 22 // 0x16
434 ; CHECK-GI-NEXT: .byte 31 // 0x1f
435 ; CHECK-GI-LABEL: .LCPI11_1:
436 ; CHECK-GI: .byte 0 // 0x0
437 ; CHECK-GI-NEXT: .byte 4 // 0x4
438 ; CHECK-GI-NEXT: .byte 8 // 0x8
439 ; CHECK-GI-NEXT: .byte 12 // 0xc
440 ; CHECK-GI-NEXT: .byte 16 // 0x10
441 ; CHECK-GI-NEXT: .byte 20 // 0x14
442 ; CHECK-GI-NEXT: .byte 24 // 0x18
443 ; CHECK-GI-NEXT: .byte 28 // 0x1c
444 ; CHECK-GI-NEXT: .byte 255 // 0xff
445 ; CHECK-GI-NEXT: .byte 255 // 0xff
446 ; CHECK-GI-NEXT: .byte 255 // 0xff
447 ; CHECK-GI-NEXT: .byte 255 // 0xff
448 ; CHECK-GI-NEXT: .byte 255 // 0xff
449 ; CHECK-GI-NEXT: .byte 255 // 0xff
450 ; CHECK-GI-NEXT: .byte 255 // 0xff
451 ; CHECK-GI-NEXT: .byte 255 // 0xff
453 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
454 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
455 ; CHECK-SD: // %bb.0:
456 ; CHECK-SD-NEXT: mov w8, #1 // =0x1
457 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
458 ; CHECK-SD-NEXT: fmov s4, w8
459 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
460 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
461 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
462 ; CHECK-SD-NEXT: mov.b v4[1], w8
463 ; CHECK-SD-NEXT: mov.b v4[2], w8
464 ; CHECK-SD-NEXT: mov.b v4[3], w8
465 ; CHECK-SD-NEXT: mov.b v4[4], w8
466 ; CHECK-SD-NEXT: mov.b v4[5], w8
467 ; CHECK-SD-NEXT: mov.b v4[6], w8
468 ; CHECK-SD-NEXT: mov w8, #32 // =0x20
469 ; CHECK-SD-NEXT: mov.b v4[7], w0
470 ; CHECK-SD-NEXT: mov.b v4[8], w8
471 ; CHECK-SD-NEXT: mov w8, #36 // =0x24
472 ; CHECK-SD-NEXT: mov.b v4[9], w8
473 ; CHECK-SD-NEXT: mov w8, #40 // =0x28
474 ; CHECK-SD-NEXT: mov.b v4[10], w8
475 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c
476 ; CHECK-SD-NEXT: mov.b v4[11], w8
477 ; CHECK-SD-NEXT: mov w8, #48 // =0x30
478 ; CHECK-SD-NEXT: mov.b v4[12], w8
479 ; CHECK-SD-NEXT: mov w8, #52 // =0x34
480 ; CHECK-SD-NEXT: mov.b v4[13], w8
481 ; CHECK-SD-NEXT: mov w8, #56 // =0x38
482 ; CHECK-SD-NEXT: mov.b v4[14], w8
483 ; CHECK-SD-NEXT: mov w8, #31 // =0x1f
484 ; CHECK-SD-NEXT: mov.b v4[15], w8
485 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
488 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
489 ; CHECK-GI: // %bb.0:
490 ; CHECK-GI-NEXT: mov w8, #1 // =0x1
491 ; CHECK-GI-NEXT: fmov s6, w0
492 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
493 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
494 ; CHECK-GI-NEXT: fmov s4, w8
495 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
496 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
497 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
498 ; CHECK-GI-NEXT: mov.16b v5, v4
499 ; CHECK-GI-NEXT: mov.b v5[1], v4[0]
500 ; CHECK-GI-NEXT: mov.b v5[2], v4[0]
501 ; CHECK-GI-NEXT: mov.b v5[3], v4[0]
502 ; CHECK-GI-NEXT: mov.b v5[4], v4[0]
503 ; CHECK-GI-NEXT: mov.b v5[5], v4[0]
504 ; CHECK-GI-NEXT: mov.b v5[6], v4[0]
505 ; CHECK-GI-NEXT: mov.b v5[7], v4[0]
506 ; CHECK-GI-NEXT: fmov s4, w8
507 ; CHECK-GI-NEXT: adrp x8, .LCPI11_1
508 ; CHECK-GI-NEXT: mov.b v5[8], v4[0]
509 ; CHECK-GI-NEXT: mov.b v5[9], v4[0]
510 ; CHECK-GI-NEXT: mov.b v5[10], v4[0]
511 ; CHECK-GI-NEXT: mov.b v5[11], v4[0]
512 ; CHECK-GI-NEXT: mov.b v5[12], v6[0]
513 ; CHECK-GI-NEXT: mov.b v5[13], v6[0]
514 ; CHECK-GI-NEXT: mov.b v5[14], v4[0]
515 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI11_1]
516 ; CHECK-GI-NEXT: adrp x8, .LCPI11_0
517 ; CHECK-GI-NEXT: mov.b v5[15], v6[0]
518 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5
519 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
520 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
521 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
523 %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
524 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
525 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
526 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
527 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
528 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
529 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
530 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
531 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
532 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
533 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
534 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
535 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
536 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
537 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
538 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
539 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
540 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
541 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
545 ; CHECK-SD-LABEL: .LCPI12_0:
546 ; CHECK-SD: .byte 0 // 0x0
547 ; CHECK-SD-NEXT: .byte 4 // 0x4
548 ; CHECK-SD-NEXT: .byte 8 // 0x8
549 ; CHECK-SD-NEXT: .byte 12 // 0xc
550 ; CHECK-SD-NEXT: .byte 16 // 0x10
551 ; CHECK-SD-NEXT: .byte 20 // 0x14
552 ; CHECK-SD-NEXT: .byte 24 // 0x18
553 ; CHECK-SD-NEXT: .byte 28 // 0x1c
554 ; CHECK-SD-NEXT: .byte 255 // 0xff
555 ; CHECK-SD-NEXT: .byte 255 // 0xff
556 ; CHECK-SD-NEXT: .byte 255 // 0xff
557 ; CHECK-SD-NEXT: .byte 255 // 0xff
558 ; CHECK-SD-NEXT: .byte 255 // 0xff
559 ; CHECK-SD-NEXT: .byte 255 // 0xff
560 ; CHECK-SD-NEXT: .byte 255 // 0xff
561 ; CHECK-SD-NEXT: .byte 255 // 0xff
563 ; CHECK-GI-LABEL: .LCPI12_0:
564 ; CHECK-GI: .byte 0 // 0x0
565 ; CHECK-GI-NEXT: .byte 1 // 0x1
566 ; CHECK-GI-NEXT: .byte 2 // 0x2
567 ; CHECK-GI-NEXT: .byte 3 // 0x3
568 ; CHECK-GI-NEXT: .byte 4 // 0x4
569 ; CHECK-GI-NEXT: .byte 5 // 0x5
570 ; CHECK-GI-NEXT: .byte 6 // 0x6
571 ; CHECK-GI-NEXT: .byte 7 // 0x7
572 ; CHECK-GI-NEXT: .byte 16 // 0x10
573 ; CHECK-GI-NEXT: .byte 17 // 0x11
574 ; CHECK-GI-NEXT: .byte 18 // 0x12
575 ; CHECK-GI-NEXT: .byte 19 // 0x13
576 ; CHECK-GI-NEXT: .byte 20 // 0x14
577 ; CHECK-GI-NEXT: .byte 21 // 0x15
578 ; CHECK-GI-NEXT: .byte 22 // 0x16
579 ; CHECK-GI-NEXT: .byte 23 // 0x17
580 ; CHECK-GI-LABEL: .LCPI12_1:
581 ; CHECK-GI: .byte 0 // 0x0
582 ; CHECK-GI-NEXT: .byte 4 // 0x4
583 ; CHECK-GI-NEXT: .byte 8 // 0x8
584 ; CHECK-GI-NEXT: .byte 12 // 0xc
585 ; CHECK-GI-NEXT: .byte 16 // 0x10
586 ; CHECK-GI-NEXT: .byte 20 // 0x14
587 ; CHECK-GI-NEXT: .byte 24 // 0x18
588 ; CHECK-GI-NEXT: .byte 28 // 0x1c
589 ; CHECK-GI-NEXT: .byte 255 // 0xff
590 ; CHECK-GI-NEXT: .byte 255 // 0xff
591 ; CHECK-GI-NEXT: .byte 255 // 0xff
592 ; CHECK-GI-NEXT: .byte 255 // 0xff
593 ; CHECK-GI-NEXT: .byte 255 // 0xff
594 ; CHECK-GI-NEXT: .byte 255 // 0xff
595 ; CHECK-GI-NEXT: .byte 255 // 0xff
596 ; CHECK-GI-NEXT: .byte 255 // 0xff
598 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
599 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
600 ; CHECK-SD: // %bb.0:
601 ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff
602 ; CHECK-SD-NEXT: adrp x8, .LCPI12_0
603 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
604 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
605 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0]
606 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
607 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
608 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
609 ; CHECK-SD-NEXT: mov.b v4[0], w0
610 ; CHECK-SD-NEXT: mov.b v4[1], w0
611 ; CHECK-SD-NEXT: mov.b v4[2], w0
612 ; CHECK-SD-NEXT: mov.b v4[3], w0
613 ; CHECK-SD-NEXT: mov.b v4[4], w0
614 ; CHECK-SD-NEXT: mov.b v4[5], w0
615 ; CHECK-SD-NEXT: mov.b v4[6], w0
616 ; CHECK-SD-NEXT: mov.b v4[7], w0
617 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4
618 ; CHECK-SD-NEXT: mov.d v2[1], v0[0]
619 ; CHECK-SD-NEXT: mov.16b v0, v2
622 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
623 ; CHECK-GI: // %bb.0:
624 ; CHECK-GI-NEXT: fmov s4, w0
625 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
626 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
627 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
628 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
629 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
630 ; CHECK-GI-NEXT: mov.16b v5, v4
631 ; CHECK-GI-NEXT: mov.b v5[1], v4[0]
632 ; CHECK-GI-NEXT: mov.b v5[2], v4[0]
633 ; CHECK-GI-NEXT: mov.b v5[3], v4[0]
634 ; CHECK-GI-NEXT: mov.b v5[4], v4[0]
635 ; CHECK-GI-NEXT: mov.b v5[5], v4[0]
636 ; CHECK-GI-NEXT: mov.b v5[6], v4[0]
637 ; CHECK-GI-NEXT: mov.b v5[7], v4[0]
638 ; CHECK-GI-NEXT: fmov s4, w8
639 ; CHECK-GI-NEXT: adrp x8, .LCPI12_1
640 ; CHECK-GI-NEXT: mov.b v5[8], v4[0]
641 ; CHECK-GI-NEXT: mov.b v5[9], v4[0]
642 ; CHECK-GI-NEXT: mov.b v5[10], v4[0]
643 ; CHECK-GI-NEXT: mov.b v5[11], v4[0]
644 ; CHECK-GI-NEXT: mov.b v5[12], v4[0]
645 ; CHECK-GI-NEXT: mov.b v5[13], v4[0]
646 ; CHECK-GI-NEXT: mov.b v5[14], v4[0]
647 ; CHECK-GI-NEXT: mov.b v5[15], v4[0]
648 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI12_1]
649 ; CHECK-GI-NEXT: adrp x8, .LCPI12_0
650 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4
651 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5
652 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
653 ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
655 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
656 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
657 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
658 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
659 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
660 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
661 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
662 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
663 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
664 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
665 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
666 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
667 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
668 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
669 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
670 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
671 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
672 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
673 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
677 ; CHECK-SD-LABEL: .LCPI13_0:
678 ; CHECK-SD: .byte 0 // 0x0
679 ; CHECK-SD-NEXT: .byte 4 // 0x4
680 ; CHECK-SD-NEXT: .byte 8 // 0x8
681 ; CHECK-SD-NEXT: .byte 12 // 0xc
682 ; CHECK-SD-NEXT: .byte 16 // 0x10
683 ; CHECK-SD-NEXT: .byte 20 // 0x14
684 ; CHECK-SD-NEXT: .byte 24 // 0x18
685 ; CHECK-SD-NEXT: .byte 28 // 0x1c
686 ; CHECK-SD-NEXT: .byte 255 // 0xff
687 ; CHECK-SD-NEXT: .byte 255 // 0xff
688 ; CHECK-SD-NEXT: .byte 255 // 0xff
689 ; CHECK-SD-NEXT: .byte 255 // 0xff
690 ; CHECK-SD-NEXT: .byte 255 // 0xff
691 ; CHECK-SD-NEXT: .byte 255 // 0xff
692 ; CHECK-SD-NEXT: .byte 255 // 0xff
693 ; CHECK-SD-NEXT: .byte 255 // 0xff
694 ; CHECK-SD-LABEL: .LCPI13_1:
695 ; CHECK-SD: .byte 0 // 0x0
696 ; CHECK-SD-NEXT: .byte 1 // 0x1
697 ; CHECK-SD-NEXT: .byte 2 // 0x2
698 ; CHECK-SD-NEXT: .byte 3 // 0x3
699 ; CHECK-SD-NEXT: .byte 4 // 0x4
700 ; CHECK-SD-NEXT: .byte 5 // 0x5
701 ; CHECK-SD-NEXT: .byte 6 // 0x6
702 ; CHECK-SD-NEXT: .byte 7 // 0x7
703 ; CHECK-SD-NEXT: .byte 16 // 0x10
704 ; CHECK-SD-NEXT: .byte 17 // 0x11
705 ; CHECK-SD-NEXT: .byte 18 // 0x12
706 ; CHECK-SD-NEXT: .byte 19 // 0x13
707 ; CHECK-SD-NEXT: .byte 20 // 0x14
708 ; CHECK-SD-NEXT: .byte 21 // 0x15
709 ; CHECK-SD-NEXT: .byte 30 // 0x1e
710 ; CHECK-SD-NEXT: .byte 31 // 0x1f
712 ; CHECK-GI-LABEL: .LCPI13_0:
713 ; CHECK-GI: .byte 0 // 0x0
714 ; CHECK-GI-NEXT: .byte 1 // 0x1
715 ; CHECK-GI-NEXT: .byte 2 // 0x2
716 ; CHECK-GI-NEXT: .byte 3 // 0x3
717 ; CHECK-GI-NEXT: .byte 4 // 0x4
718 ; CHECK-GI-NEXT: .byte 5 // 0x5
719 ; CHECK-GI-NEXT: .byte 6 // 0x6
720 ; CHECK-GI-NEXT: .byte 7 // 0x7
721 ; CHECK-GI-NEXT: .byte 16 // 0x10
722 ; CHECK-GI-NEXT: .byte 17 // 0x11
723 ; CHECK-GI-NEXT: .byte 18 // 0x12
724 ; CHECK-GI-NEXT: .byte 19 // 0x13
725 ; CHECK-GI-NEXT: .byte 20 // 0x14
726 ; CHECK-GI-NEXT: .byte 21 // 0x15
727 ; CHECK-GI-NEXT: .byte 30 // 0x1e
728 ; CHECK-GI-NEXT: .byte 31 // 0x1f
729 ; CHECK-GI-LABEL: .LCPI13_1:
730 ; CHECK-GI: .byte 0 // 0x0
731 ; CHECK-GI-NEXT: .byte 4 // 0x4
732 ; CHECK-GI-NEXT: .byte 8 // 0x8
733 ; CHECK-GI-NEXT: .byte 12 // 0xc
734 ; CHECK-GI-NEXT: .byte 16 // 0x10
735 ; CHECK-GI-NEXT: .byte 20 // 0x14
736 ; CHECK-GI-NEXT: .byte 24 // 0x18
737 ; CHECK-GI-NEXT: .byte 28 // 0x1c
738 ; CHECK-GI-NEXT: .byte 255 // 0xff
739 ; CHECK-GI-NEXT: .byte 255 // 0xff
740 ; CHECK-GI-NEXT: .byte 255 // 0xff
741 ; CHECK-GI-NEXT: .byte 255 // 0xff
742 ; CHECK-GI-NEXT: .byte 255 // 0xff
743 ; CHECK-GI-NEXT: .byte 255 // 0xff
744 ; CHECK-GI-NEXT: .byte 255 // 0xff
745 ; CHECK-GI-NEXT: .byte 255 // 0xff
747 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
748 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
749 ; CHECK-SD: // %bb.0:
750 ; CHECK-SD-NEXT: dup.16b v4, w0
751 ; CHECK-SD-NEXT: mov w8, #255 // =0xff
752 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
753 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
754 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
755 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
756 ; CHECK-SD-NEXT: mov.b v4[8], w8
757 ; CHECK-SD-NEXT: mov.b v4[9], w8
758 ; CHECK-SD-NEXT: mov.b v4[10], w8
759 ; CHECK-SD-NEXT: mov.b v4[11], w8
760 ; CHECK-SD-NEXT: mov.b v4[12], w8
761 ; CHECK-SD-NEXT: mov.b v4[13], w8
762 ; CHECK-SD-NEXT: adrp x8, .LCPI13_0
763 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0]
764 ; CHECK-SD-NEXT: adrp x8, .LCPI13_1
765 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
766 ; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4
767 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1]
768 ; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0
771 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
772 ; CHECK-GI: // %bb.0:
773 ; CHECK-GI-NEXT: fmov s4, w0
774 ; CHECK-GI-NEXT: mov w8, #255 // =0xff
775 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
776 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
777 ; CHECK-GI-NEXT: fmov s6, w8
778 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1
779 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
780 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
781 ; CHECK-GI-NEXT: mov.16b v5, v4
782 ; CHECK-GI-NEXT: mov.b v5[1], v4[0]
783 ; CHECK-GI-NEXT: mov.b v5[2], v4[0]
784 ; CHECK-GI-NEXT: mov.b v5[3], v4[0]
785 ; CHECK-GI-NEXT: mov.b v5[4], v4[0]
786 ; CHECK-GI-NEXT: mov.b v5[5], v4[0]
787 ; CHECK-GI-NEXT: mov.b v5[6], v4[0]
788 ; CHECK-GI-NEXT: mov.b v5[7], v4[0]
789 ; CHECK-GI-NEXT: mov.b v5[8], v6[0]
790 ; CHECK-GI-NEXT: mov.b v5[9], v6[0]
791 ; CHECK-GI-NEXT: mov.b v5[10], v6[0]
792 ; CHECK-GI-NEXT: mov.b v5[11], v6[0]
793 ; CHECK-GI-NEXT: mov.b v5[12], v6[0]
794 ; CHECK-GI-NEXT: mov.b v5[13], v6[0]
795 ; CHECK-GI-NEXT: mov.b v5[14], v4[0]
796 ; CHECK-GI-NEXT: mov.b v5[15], v4[0]
797 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI13_1]
798 ; CHECK-GI-NEXT: adrp x8, .LCPI13_0
799 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4
800 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5
801 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
802 ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
804 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
805 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
806 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
807 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
808 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
809 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
810 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
811 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
812 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
813 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
814 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
815 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
816 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
817 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
818 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
819 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
820 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
821 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
822 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
826 ; CHECK-SD-LABEL: .LCPI14_0:
827 ; CHECK-SD: .byte 0 // 0x0
828 ; CHECK-SD-NEXT: .byte 4 // 0x4
829 ; CHECK-SD-NEXT: .byte 52 // 0x34
830 ; CHECK-SD-NEXT: .byte 12 // 0xc
831 ; CHECK-SD-NEXT: .byte 16 // 0x10
832 ; CHECK-SD-NEXT: .byte 20 // 0x14
833 ; CHECK-SD-NEXT: .byte 24 // 0x18
834 ; CHECK-SD-NEXT: .byte 28 // 0x1c
835 ; CHECK-SD-NEXT: .byte 32 // 0x20
836 ; CHECK-SD-NEXT: .byte 36 // 0x24
837 ; CHECK-SD-NEXT: .byte 40 // 0x28
838 ; CHECK-SD-NEXT: .byte 44 // 0x2c
839 ; CHECK-SD-NEXT: .byte 48 // 0x30
840 ; CHECK-SD-NEXT: .byte 52 // 0x34
841 ; CHECK-SD-NEXT: .byte 56 // 0x38
842 ; CHECK-SD-NEXT: .byte 60 // 0x3c
844 ; CHECK-GI-LABEL: .LCPI14_0:
845 ; CHECK-GI: .byte 0 // 0x0
846 ; CHECK-GI-NEXT: .byte 1 // 0x1
847 ; CHECK-GI-NEXT: .byte 21 // 0x15
848 ; CHECK-GI-NEXT: .byte 3 // 0x3
849 ; CHECK-GI-NEXT: .byte 4 // 0x4
850 ; CHECK-GI-NEXT: .byte 5 // 0x5
851 ; CHECK-GI-NEXT: .byte 6 // 0x6
852 ; CHECK-GI-NEXT: .byte 7 // 0x7
853 ; CHECK-GI-NEXT: .byte 16 // 0x10
854 ; CHECK-GI-NEXT: .byte 17 // 0x11
855 ; CHECK-GI-NEXT: .byte 18 // 0x12
856 ; CHECK-GI-NEXT: .byte 19 // 0x13
857 ; CHECK-GI-NEXT: .byte 20 // 0x14
858 ; CHECK-GI-NEXT: .byte 21 // 0x15
859 ; CHECK-GI-NEXT: .byte 22 // 0x16
860 ; CHECK-GI-NEXT: .byte 23 // 0x17
861 ; CHECK-GI-LABEL: .LCPI14_1:
862 ; CHECK-GI: .byte 0 // 0x0
863 ; CHECK-GI-NEXT: .byte 4 // 0x4
864 ; CHECK-GI-NEXT: .byte 8 // 0x8
865 ; CHECK-GI-NEXT: .byte 12 // 0xc
866 ; CHECK-GI-NEXT: .byte 16 // 0x10
867 ; CHECK-GI-NEXT: .byte 20 // 0x14
868 ; CHECK-GI-NEXT: .byte 24 // 0x18
869 ; CHECK-GI-NEXT: .byte 28 // 0x1c
870 ; CHECK-GI-NEXT: .byte 255 // 0xff
871 ; CHECK-GI-NEXT: .byte 255 // 0xff
872 ; CHECK-GI-NEXT: .byte 255 // 0xff
873 ; CHECK-GI-NEXT: .byte 255 // 0xff
874 ; CHECK-GI-NEXT: .byte 255 // 0xff
875 ; CHECK-GI-NEXT: .byte 255 // 0xff
876 ; CHECK-GI-NEXT: .byte 255 // 0xff
877 ; CHECK-GI-NEXT: .byte 255 // 0xff
879 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
880 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
881 ; CHECK-SD: // %bb.0:
882 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
883 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0
884 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
885 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
886 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
887 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
888 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
891 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
892 ; CHECK-GI: // %bb.0:
893 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1
894 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
895 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
896 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1]
897 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
898 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
899 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0
900 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
901 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
902 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
903 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
905 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
906 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
907 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
911 ; CHECK-SD-LABEL: .LCPI15_0:
912 ; CHECK-SD: .byte 0 // 0x0
913 ; CHECK-SD-NEXT: .byte 4 // 0x4
914 ; CHECK-SD-NEXT: .byte 52 // 0x34
915 ; CHECK-SD-NEXT: .byte 12 // 0xc
916 ; CHECK-SD-NEXT: .byte 16 // 0x10
917 ; CHECK-SD-NEXT: .byte 20 // 0x14
918 ; CHECK-SD-NEXT: .byte 24 // 0x18
919 ; CHECK-SD-NEXT: .byte 28 // 0x1c
920 ; CHECK-SD-NEXT: .byte 32 // 0x20
921 ; CHECK-SD-NEXT: .byte 36 // 0x24
922 ; CHECK-SD-NEXT: .byte 40 // 0x28
923 ; CHECK-SD-NEXT: .byte 44 // 0x2c
924 ; CHECK-SD-NEXT: .byte 48 // 0x30
925 ; CHECK-SD-NEXT: .byte 52 // 0x34
926 ; CHECK-SD-NEXT: .byte 56 // 0x38
927 ; CHECK-SD-NEXT: .byte 60 // 0x3c
929 ; CHECK-GI-LABEL: .LCPI15_0:
930 ; CHECK-GI: .byte 0 // 0x0
931 ; CHECK-GI-NEXT: .byte 1 // 0x1
932 ; CHECK-GI-NEXT: .byte 21 // 0x15
933 ; CHECK-GI-NEXT: .byte 3 // 0x3
934 ; CHECK-GI-NEXT: .byte 4 // 0x4
935 ; CHECK-GI-NEXT: .byte 5 // 0x5
936 ; CHECK-GI-NEXT: .byte 6 // 0x6
937 ; CHECK-GI-NEXT: .byte 7 // 0x7
938 ; CHECK-GI-NEXT: .byte 16 // 0x10
939 ; CHECK-GI-NEXT: .byte 17 // 0x11
940 ; CHECK-GI-NEXT: .byte 18 // 0x12
941 ; CHECK-GI-NEXT: .byte 19 // 0x13
942 ; CHECK-GI-NEXT: .byte 20 // 0x14
943 ; CHECK-GI-NEXT: .byte 21 // 0x15
944 ; CHECK-GI-NEXT: .byte 22 // 0x16
945 ; CHECK-GI-NEXT: .byte 23 // 0x17
946 ; CHECK-GI-LABEL: .LCPI15_1:
947 ; CHECK-GI: .byte 0 // 0x0
948 ; CHECK-GI-NEXT: .byte 4 // 0x4
949 ; CHECK-GI-NEXT: .byte 8 // 0x8
950 ; CHECK-GI-NEXT: .byte 12 // 0xc
951 ; CHECK-GI-NEXT: .byte 16 // 0x10
952 ; CHECK-GI-NEXT: .byte 20 // 0x14
953 ; CHECK-GI-NEXT: .byte 24 // 0x18
954 ; CHECK-GI-NEXT: .byte 28 // 0x1c
955 ; CHECK-GI-NEXT: .byte 255 // 0xff
956 ; CHECK-GI-NEXT: .byte 255 // 0xff
957 ; CHECK-GI-NEXT: .byte 255 // 0xff
958 ; CHECK-GI-NEXT: .byte 255 // 0xff
959 ; CHECK-GI-NEXT: .byte 255 // 0xff
960 ; CHECK-GI-NEXT: .byte 255 // 0xff
961 ; CHECK-GI-NEXT: .byte 255 // 0xff
962 ; CHECK-GI-NEXT: .byte 255 // 0xff
963 ; CHECK-GI-LABEL: .LCPI15_2:
964 ; CHECK-GI: .byte 0 // 0x0
965 ; CHECK-GI-NEXT: .byte 4 // 0x4
966 ; CHECK-GI-NEXT: .byte 8 // 0x8
967 ; CHECK-GI-NEXT: .byte 12 // 0xc
968 ; CHECK-GI-NEXT: .byte 16 // 0x10
969 ; CHECK-GI-NEXT: .byte 20 // 0x14
970 ; CHECK-GI-NEXT: .byte 24 // 0x18
971 ; CHECK-GI-NEXT: .byte 28 // 0x1c
972 ; CHECK-GI-NEXT: .byte 0 // 0x0
973 ; CHECK-GI-NEXT: .byte 255 // 0xff
974 ; CHECK-GI-NEXT: .byte 255 // 0xff
975 ; CHECK-GI-NEXT: .byte 255 // 0xff
976 ; CHECK-GI-NEXT: .byte 255 // 0xff
977 ; CHECK-GI-NEXT: .byte 255 // 0xff
978 ; CHECK-GI-NEXT: .byte 255 // 0xff
979 ; CHECK-GI-NEXT: .byte 255 // 0xff
981 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
982 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
983 ; CHECK-SD: // %bb.0:
984 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
985 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0
986 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
987 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
988 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
989 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
990 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
993 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
994 ; CHECK-GI: // %bb.0:
995 ; CHECK-GI-NEXT: adrp x8, .LCPI15_2
996 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
997 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
998 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2]
999 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1
1000 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1001 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
1002 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1]
1003 ; CHECK-GI-NEXT: adrp x8, .LCPI15_0
1004 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
1005 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
1006 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
1007 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
1008 ; CHECK-GI-NEXT: ret
1009 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1010 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1011 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1015 ; CHECK-SD-LABEL: .LCPI16_0:
1016 ; CHECK-SD: .byte 0 // 0x0
1017 ; CHECK-SD-NEXT: .byte 4 // 0x4
1018 ; CHECK-SD-NEXT: .byte 52 // 0x34
1019 ; CHECK-SD-NEXT: .byte 12 // 0xc
1020 ; CHECK-SD-NEXT: .byte 16 // 0x10
1021 ; CHECK-SD-NEXT: .byte 20 // 0x14
1022 ; CHECK-SD-NEXT: .byte 24 // 0x18
1023 ; CHECK-SD-NEXT: .byte 28 // 0x1c
1024 ; CHECK-SD-NEXT: .byte 32 // 0x20
1025 ; CHECK-SD-NEXT: .byte 36 // 0x24
1026 ; CHECK-SD-NEXT: .byte 40 // 0x28
1027 ; CHECK-SD-NEXT: .byte 44 // 0x2c
1028 ; CHECK-SD-NEXT: .byte 48 // 0x30
1029 ; CHECK-SD-NEXT: .byte 52 // 0x34
1030 ; CHECK-SD-NEXT: .byte 56 // 0x38
1031 ; CHECK-SD-NEXT: .byte 60 // 0x3c
1033 ; CHECK-GI-LABEL: .LCPI16_0:
1034 ; CHECK-GI: .byte 0 // 0x0
1035 ; CHECK-GI-NEXT: .byte 1 // 0x1
1036 ; CHECK-GI-NEXT: .byte 21 // 0x15
1037 ; CHECK-GI-NEXT: .byte 3 // 0x3
1038 ; CHECK-GI-NEXT: .byte 4 // 0x4
1039 ; CHECK-GI-NEXT: .byte 5 // 0x5
1040 ; CHECK-GI-NEXT: .byte 6 // 0x6
1041 ; CHECK-GI-NEXT: .byte 7 // 0x7
1042 ; CHECK-GI-NEXT: .byte 16 // 0x10
1043 ; CHECK-GI-NEXT: .byte 17 // 0x11
1044 ; CHECK-GI-NEXT: .byte 18 // 0x12
1045 ; CHECK-GI-NEXT: .byte 19 // 0x13
1046 ; CHECK-GI-NEXT: .byte 20 // 0x14
1047 ; CHECK-GI-NEXT: .byte 21 // 0x15
1048 ; CHECK-GI-NEXT: .byte 22 // 0x16
1049 ; CHECK-GI-NEXT: .byte 23 // 0x17
1050 ; CHECK-GI-LABEL: .LCPI16_1:
1051 ; CHECK-GI: .byte 0 // 0x0
1052 ; CHECK-GI-NEXT: .byte 4 // 0x4
1053 ; CHECK-GI-NEXT: .byte 8 // 0x8
1054 ; CHECK-GI-NEXT: .byte 12 // 0xc
1055 ; CHECK-GI-NEXT: .byte 16 // 0x10
1056 ; CHECK-GI-NEXT: .byte 20 // 0x14
1057 ; CHECK-GI-NEXT: .byte 24 // 0x18
1058 ; CHECK-GI-NEXT: .byte 28 // 0x1c
1059 ; CHECK-GI-NEXT: .byte 0 // 0x0
1060 ; CHECK-GI-NEXT: .byte 255 // 0xff
1061 ; CHECK-GI-NEXT: .byte 255 // 0xff
1062 ; CHECK-GI-NEXT: .byte 255 // 0xff
1063 ; CHECK-GI-NEXT: .byte 255 // 0xff
1064 ; CHECK-GI-NEXT: .byte 255 // 0xff
1065 ; CHECK-GI-NEXT: .byte 255 // 0xff
1066 ; CHECK-GI-NEXT: .byte 255 // 0xff
1067 ; CHECK-GI-LABEL: .LCPI16_2:
1068 ; CHECK-GI: .byte 0 // 0x0
1069 ; CHECK-GI-NEXT: .byte 4 // 0x4
1070 ; CHECK-GI-NEXT: .byte 8 // 0x8
1071 ; CHECK-GI-NEXT: .byte 12 // 0xc
1072 ; CHECK-GI-NEXT: .byte 16 // 0x10
1073 ; CHECK-GI-NEXT: .byte 20 // 0x14
1074 ; CHECK-GI-NEXT: .byte 24 // 0x18
1075 ; CHECK-GI-NEXT: .byte 28 // 0x1c
1076 ; CHECK-GI-NEXT: .byte 255 // 0xff
1077 ; CHECK-GI-NEXT: .byte 255 // 0xff
1078 ; CHECK-GI-NEXT: .byte 255 // 0xff
1079 ; CHECK-GI-NEXT: .byte 255 // 0xff
1080 ; CHECK-GI-NEXT: .byte 255 // 0xff
1081 ; CHECK-GI-NEXT: .byte 255 // 0xff
1082 ; CHECK-GI-NEXT: .byte 255 // 0xff
1083 ; CHECK-GI-NEXT: .byte 255 // 0xff
1085 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
1086 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
1087 ; CHECK-SD: // %bb.0:
1088 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1089 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0
1090 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1091 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
1092 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1093 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1094 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
1095 ; CHECK-SD-NEXT: ret
1097 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
1098 ; CHECK-GI: // %bb.0:
1099 ; CHECK-GI-NEXT: adrp x8, .LCPI16_2
1100 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1101 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
1102 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
1103 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1
1104 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1105 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
1106 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1]
1107 ; CHECK-GI-NEXT: adrp x8, .LCPI16_0
1108 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
1109 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
1110 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
1111 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
1112 ; CHECK-GI-NEXT: ret
1113 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1114 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1115 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1119 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
1120 declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1121 declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1122 declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1123 declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1124 declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1125 declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1126 declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1128 define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
1129 ; CHECK-LABEL: tbx1_8b:
1131 ; CHECK-NEXT: tbx.8b v0, { v1 }, v2
1133 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
1137 define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
1138 ; CHECK-LABEL: tbx1_16b:
1140 ; CHECK-NEXT: tbx.16b v0, { v1 }, v2
1142 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
1146 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
1147 ; CHECK-SD-LABEL: tbx2_8b:
1148 ; CHECK-SD: // %bb.0:
1149 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1150 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1151 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3
1152 ; CHECK-SD-NEXT: ret
1154 ; CHECK-GI-LABEL: tbx2_8b:
1155 ; CHECK-GI: // %bb.0:
1156 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1157 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1158 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3
1159 ; CHECK-GI-NEXT: ret
1160 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
1164 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
1165 ; CHECK-SD-LABEL: tbx2_16b:
1166 ; CHECK-SD: // %bb.0:
1167 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1168 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1169 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3
1170 ; CHECK-SD-NEXT: ret
1172 ; CHECK-GI-LABEL: tbx2_16b:
1173 ; CHECK-GI: // %bb.0:
1174 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
1175 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
1176 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3
1177 ; CHECK-GI-NEXT: ret
1178 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
1182 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
1183 ; CHECK-SD-LABEL: tbx3_8b:
1184 ; CHECK-SD: // %bb.0:
1185 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1186 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1187 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1188 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
1189 ; CHECK-SD-NEXT: ret
1191 ; CHECK-GI-LABEL: tbx3_8b:
1192 ; CHECK-GI: // %bb.0:
1193 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1194 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1195 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1196 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
1197 ; CHECK-GI-NEXT: ret
1198 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
1202 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
1203 ; CHECK-SD-LABEL: tbx3_16b:
1204 ; CHECK-SD: // %bb.0:
1205 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1206 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1207 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1208 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
1209 ; CHECK-SD-NEXT: ret
1211 ; CHECK-GI-LABEL: tbx3_16b:
1212 ; CHECK-GI: // %bb.0:
1213 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
1214 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
1215 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
1216 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
1217 ; CHECK-GI-NEXT: ret
1218 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
1222 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
1223 ; CHECK-SD-LABEL: tbx4_8b:
1224 ; CHECK-SD: // %bb.0:
1225 ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1226 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1227 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1228 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1229 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
1230 ; CHECK-SD-NEXT: ret
1232 ; CHECK-GI-LABEL: tbx4_8b:
1233 ; CHECK-GI: // %bb.0:
1234 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1235 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1236 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1237 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1238 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
1239 ; CHECK-GI-NEXT: ret
1240 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
1244 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
1245 ; CHECK-SD-LABEL: tbx4_16b:
1246 ; CHECK-SD: // %bb.0:
1247 ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1248 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1249 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1250 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1251 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
1252 ; CHECK-SD-NEXT: ret
1254 ; CHECK-GI-LABEL: tbx4_16b:
1255 ; CHECK-GI: // %bb.0:
1256 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1257 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1258 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1259 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
1260 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
1261 ; CHECK-GI-NEXT: ret
1262 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
1266 declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1267 declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1268 declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1269 declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1270 declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1271 declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1272 declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
1273 declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone