1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
4 define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
5 ; CHECK-LABEL: tbl1_8b:
7 ; CHECK-NEXT: tbl.8b v0, { v0 }, v1
9 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
13 define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
14 ; CHECK-LABEL: tbl1_16b:
16 ; CHECK-NEXT: tbl.16b v0, { v0 }, v1
18 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
22 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
23 ; CHECK-LABEL: tbl2_8b:
25 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
26 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
27 ; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2
29 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
33 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
34 ; CHECK-LABEL: tbl2_16b:
36 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
37 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
38 ; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
40 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
44 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
45 ; CHECK-LABEL: tbl3_8b:
47 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
48 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
49 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
50 ; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
52 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
56 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
57 ; CHECK-LABEL: tbl3_16b:
59 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
60 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
61 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
62 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
64 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
68 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
69 ; CHECK-LABEL: tbl4_8b:
71 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
72 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
73 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
74 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
75 ; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
77 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
81 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
82 ; CHECK-LABEL: tbl4_16b:
84 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
85 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
86 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
87 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
88 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
90 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
94 ; CHECK-LABEL: .LCPI8_0:
95 ; CHECK-NEXT: .byte 0 // 0x0
96 ; CHECK-NEXT: .byte 4 // 0x4
97 ; CHECK-NEXT: .byte 8 // 0x8
98 ; CHECK-NEXT: .byte 12 // 0xc
99 ; CHECK-NEXT: .byte 255 // 0xff
100 ; CHECK-NEXT: .byte 255 // 0xff
101 ; CHECK-NEXT: .byte 255 // 0xff
102 ; CHECK-NEXT: .byte 255 // 0xff
104 define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
105 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8:
107 ; CHECK-NEXT: adrp x8, .LCPI8_0
108 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
109 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
110 ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
111 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
112 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
113 ; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4
114 ; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4
115 ; CHECK-NEXT: mov.s v0[1], v1[1]
116 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
118 %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
119 %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
120 %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
124 ; CHECK-LABEL: .LCPI9_0:
125 ; CHECK-NEXT: .byte 0 // 0x0
126 ; CHECK-NEXT: .byte 4 // 0x4
127 ; CHECK-NEXT: .byte 8 // 0x8
128 ; CHECK-NEXT: .byte 12 // 0xc
129 ; CHECK-NEXT: .byte 16 // 0x10
130 ; CHECK-NEXT: .byte 20 // 0x14
131 ; CHECK-NEXT: .byte 24 // 0x18
132 ; CHECK-NEXT: .byte 28 // 0x1c
133 ; CHECK-NEXT: .byte 32 // 0x20
134 ; CHECK-NEXT: .byte 36 // 0x24
135 ; CHECK-NEXT: .byte 40 // 0x28
136 ; CHECK-NEXT: .byte 44 // 0x2c
137 ; CHECK-NEXT: .byte 48 // 0x30
138 ; CHECK-NEXT: .byte 52 // 0x34
139 ; CHECK-NEXT: .byte 56 // 0x38
140 ; CHECK-NEXT: .byte 60 // 0x3c
142 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
143 ; CHECK-LABEL: shuffled_tbl2_to_tbl4:
145 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
146 ; CHECK-NEXT: adrp x8, .LCPI9_0
147 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
148 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
149 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
150 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
151 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
153 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
154 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
155 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
159 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
160 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
162 ; CHECK-NEXT: fmov s4, w0
163 ; CHECK-NEXT: mov w8, #32 // =0x20
164 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
165 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
166 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
167 ; CHECK-NEXT: mov.b v4[1], w0
168 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
169 ; CHECK-NEXT: mov.b v4[2], w0
170 ; CHECK-NEXT: mov.b v4[3], w0
171 ; CHECK-NEXT: mov.b v4[4], w0
172 ; CHECK-NEXT: mov.b v4[5], w0
173 ; CHECK-NEXT: mov.b v4[6], w0
174 ; CHECK-NEXT: mov.b v4[7], w0
175 ; CHECK-NEXT: mov.b v4[8], w8
176 ; CHECK-NEXT: mov w8, #36 // =0x24
177 ; CHECK-NEXT: mov.b v4[9], w8
178 ; CHECK-NEXT: mov w8, #40 // =0x28
179 ; CHECK-NEXT: mov.b v4[10], w8
180 ; CHECK-NEXT: mov w8, #44 // =0x2c
181 ; CHECK-NEXT: mov.b v4[11], w8
182 ; CHECK-NEXT: mov w8, #48 // =0x30
183 ; CHECK-NEXT: mov.b v4[12], w8
184 ; CHECK-NEXT: mov w8, #52 // =0x34
185 ; CHECK-NEXT: mov.b v4[13], w8
186 ; CHECK-NEXT: mov w8, #56 // =0x38
187 ; CHECK-NEXT: mov.b v4[14], w8
188 ; CHECK-NEXT: mov w8, #60 // =0x3c
189 ; CHECK-NEXT: mov.b v4[15], w8
190 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
192 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
193 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
194 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
195 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
196 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
197 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
198 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
199 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
200 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
201 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
202 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
203 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
204 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
205 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
206 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
207 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
208 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
209 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
210 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
214 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
215 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
217 ; CHECK-NEXT: mov w8, #1 // =0x1
218 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
219 ; CHECK-NEXT: fmov s4, w8
220 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
221 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
222 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
223 ; CHECK-NEXT: mov.b v4[1], w8
224 ; CHECK-NEXT: mov.b v4[2], w8
225 ; CHECK-NEXT: mov.b v4[3], w8
226 ; CHECK-NEXT: mov.b v4[4], w8
227 ; CHECK-NEXT: mov.b v4[5], w8
228 ; CHECK-NEXT: mov.b v4[6], w8
229 ; CHECK-NEXT: mov w8, #32 // =0x20
230 ; CHECK-NEXT: mov.b v4[7], w0
231 ; CHECK-NEXT: mov.b v4[8], w8
232 ; CHECK-NEXT: mov w8, #36 // =0x24
233 ; CHECK-NEXT: mov.b v4[9], w8
234 ; CHECK-NEXT: mov w8, #40 // =0x28
235 ; CHECK-NEXT: mov.b v4[10], w8
236 ; CHECK-NEXT: mov w8, #44 // =0x2c
237 ; CHECK-NEXT: mov.b v4[11], w8
238 ; CHECK-NEXT: mov w8, #48 // =0x30
239 ; CHECK-NEXT: mov.b v4[12], w8
240 ; CHECK-NEXT: mov w8, #52 // =0x34
241 ; CHECK-NEXT: mov.b v4[13], w8
242 ; CHECK-NEXT: mov w8, #56 // =0x38
243 ; CHECK-NEXT: mov.b v4[14], w8
244 ; CHECK-NEXT: mov w8, #31 // =0x1f
245 ; CHECK-NEXT: mov.b v4[15], w8
246 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
248 %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
249 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
250 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
251 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
252 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
253 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
254 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
255 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
256 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
257 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
258 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
259 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
260 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
261 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
262 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
263 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
264 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
265 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
266 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
270 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
271 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
273 ; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff
274 ; CHECK-NEXT: adrp x8, .LCPI12_0
275 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
276 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
277 ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI12_0]
278 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
279 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
280 ; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
281 ; CHECK-NEXT: mov.b v4[0], w0
282 ; CHECK-NEXT: mov.b v4[1], w0
283 ; CHECK-NEXT: mov.b v4[2], w0
284 ; CHECK-NEXT: mov.b v4[3], w0
285 ; CHECK-NEXT: mov.b v4[4], w0
286 ; CHECK-NEXT: mov.b v4[5], w0
287 ; CHECK-NEXT: mov.b v4[6], w0
288 ; CHECK-NEXT: mov.b v4[7], w0
289 ; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
290 ; CHECK-NEXT: mov.d v2[1], v0[0]
291 ; CHECK-NEXT: mov.16b v0, v2
293 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
294 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
295 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
296 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
297 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
298 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
299 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
300 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
301 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
302 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
303 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
304 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
305 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
306 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
307 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
308 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
309 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
310 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
311 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
315 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
316 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
318 ; CHECK-NEXT: dup.16b v4, w0
319 ; CHECK-NEXT: mov w8, #255 // =0xff
320 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
321 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
322 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
323 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
324 ; CHECK-NEXT: mov.b v4[8], w8
325 ; CHECK-NEXT: mov.b v4[9], w8
326 ; CHECK-NEXT: mov.b v4[10], w8
327 ; CHECK-NEXT: mov.b v4[11], w8
328 ; CHECK-NEXT: mov.b v4[12], w8
329 ; CHECK-NEXT: mov.b v4[13], w8
330 ; CHECK-NEXT: adrp x8, .LCPI13_0
331 ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI13_0]
332 ; CHECK-NEXT: adrp x8, .LCPI13_1
333 ; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
334 ; CHECK-NEXT: tbl.16b v3, { v0, v1 }, v4
335 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_1]
336 ; CHECK-NEXT: tbl.16b v0, { v2, v3 }, v0
338 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
339 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
340 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
341 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
342 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
343 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
344 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
345 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
346 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
347 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
348 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
349 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
350 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
351 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
352 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
353 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
354 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
355 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
356 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
361 ; CHECK-LABEL: .LCPI14_0:
362 ; CHECK-NEXT: .byte 0 // 0x0
363 ; CHECK-NEXT: .byte 4 // 0x4
364 ; CHECK-NEXT: .byte 52 // 0x34
365 ; CHECK-NEXT: .byte 12 // 0xc
366 ; CHECK-NEXT: .byte 16 // 0x10
367 ; CHECK-NEXT: .byte 20 // 0x14
368 ; CHECK-NEXT: .byte 24 // 0x18
369 ; CHECK-NEXT: .byte 28 // 0x1c
370 ; CHECK-NEXT: .byte 32 // 0x20
371 ; CHECK-NEXT: .byte 36 // 0x24
372 ; CHECK-NEXT: .byte 40 // 0x28
373 ; CHECK-NEXT: .byte 44 // 0x2c
374 ; CHECK-NEXT: .byte 48 // 0x30
375 ; CHECK-NEXT: .byte 52 // 0x34
376 ; CHECK-NEXT: .byte 56 // 0x38
377 ; CHECK-NEXT: .byte 60 // 0x3c
379 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
380 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
382 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
383 ; CHECK-NEXT: adrp x8, .LCPI14_0
384 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
385 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
386 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
387 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
388 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
390 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
391 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
392 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
396 ; CHECK-LABEL: .LCPI15_0:
397 ; CHECK-NEXT: .byte 0 // 0x0
398 ; CHECK-NEXT: .byte 4 // 0x4
399 ; CHECK-NEXT: .byte 52 // 0x34
400 ; CHECK-NEXT: .byte 12 // 0xc
401 ; CHECK-NEXT: .byte 16 // 0x10
402 ; CHECK-NEXT: .byte 20 // 0x14
403 ; CHECK-NEXT: .byte 24 // 0x18
404 ; CHECK-NEXT: .byte 28 // 0x1c
405 ; CHECK-NEXT: .byte 32 // 0x20
406 ; CHECK-NEXT: .byte 36 // 0x24
407 ; CHECK-NEXT: .byte 40 // 0x28
408 ; CHECK-NEXT: .byte 44 // 0x2c
409 ; CHECK-NEXT: .byte 48 // 0x30
410 ; CHECK-NEXT: .byte 52 // 0x34
411 ; CHECK-NEXT: .byte 56 // 0x38
412 ; CHECK-NEXT: .byte 60 // 0x3c
414 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
415 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
417 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
418 ; CHECK-NEXT: adrp x8, .LCPI15_0
419 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
420 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
421 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
422 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
423 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
425 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
426 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
427 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
431 ; CHECK-LABEL: .LCPI16_0:
432 ; CHECK-NEXT: .byte 0 // 0x0
433 ; CHECK-NEXT: .byte 4 // 0x4
434 ; CHECK-NEXT: .byte 52 // 0x34
435 ; CHECK-NEXT: .byte 12 // 0xc
436 ; CHECK-NEXT: .byte 16 // 0x10
437 ; CHECK-NEXT: .byte 20 // 0x14
438 ; CHECK-NEXT: .byte 24 // 0x18
439 ; CHECK-NEXT: .byte 28 // 0x1c
440 ; CHECK-NEXT: .byte 32 // 0x20
441 ; CHECK-NEXT: .byte 36 // 0x24
442 ; CHECK-NEXT: .byte 40 // 0x28
443 ; CHECK-NEXT: .byte 44 // 0x2c
444 ; CHECK-NEXT: .byte 48 // 0x30
445 ; CHECK-NEXT: .byte 52 // 0x34
446 ; CHECK-NEXT: .byte 56 // 0x38
447 ; CHECK-NEXT: .byte 60 // 0x3c
449 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
450 ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
452 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
453 ; CHECK-NEXT: adrp x8, .LCPI16_0
454 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
455 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
456 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
457 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
458 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
460 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
461 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
462 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
466 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
467 declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
468 declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
469 declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
470 declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
471 declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
472 declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
473 declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
475 define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
476 ; CHECK-LABEL: tbx1_8b:
478 ; CHECK-NEXT: tbx.8b v0, { v1 }, v2
480 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
484 define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
485 ; CHECK-LABEL: tbx1_16b:
487 ; CHECK-NEXT: tbx.16b v0, { v1 }, v2
489 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
493 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
494 ; CHECK-LABEL: tbx2_8b:
496 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
497 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
498 ; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3
500 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
504 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
505 ; CHECK-LABEL: tbx2_16b:
507 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
508 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
509 ; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3
511 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
515 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
516 ; CHECK-LABEL: tbx3_8b:
518 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
519 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
520 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
521 ; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
523 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
527 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
528 ; CHECK-LABEL: tbx3_16b:
530 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
531 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
532 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
533 ; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
535 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
539 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
540 ; CHECK-LABEL: tbx4_8b:
542 ; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
543 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
544 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
545 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
546 ; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
548 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
552 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
553 ; CHECK-LABEL: tbx4_16b:
555 ; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
556 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
557 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
558 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
559 ; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
561 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
565 declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
566 declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
567 declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
568 declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
569 declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
570 declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
571 declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
572 declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone