1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
6 ; CHECK-SD-LABEL: concat1:
8 ; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
11 ; CHECK-GI-LABEL: concat1:
13 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
14 ; CHECK-GI-NEXT: mov w8, v0.s[1]
15 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
16 ; CHECK-GI-NEXT: mov w9, v1.s[1]
17 ; CHECK-GI-NEXT: mov v0.h[1], w8
18 ; CHECK-GI-NEXT: fmov w8, s1
19 ; CHECK-GI-NEXT: mov v0.h[2], w8
20 ; CHECK-GI-NEXT: mov v0.h[3], w9
21 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
23 %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27 define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
28 ; CHECK-SD-LABEL: concat2:
30 ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b
33 ; CHECK-GI-LABEL: concat2:
35 ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
36 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
37 ; CHECK-GI-NEXT: fmov w8, s0
38 ; CHECK-GI-NEXT: mov v0.s[0], w8
39 ; CHECK-GI-NEXT: fmov w8, s1
40 ; CHECK-GI-NEXT: mov v0.s[1], w8
41 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
43 %v8i8 = shufflevector <4 x i8> %A, <4 x i8> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
47 define <16 x i8> @concat3(<8 x i8> %A, <8 x i8> %B) {
48 ; CHECK-LABEL: concat3:
50 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
51 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
52 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
54 %v16i8 = shufflevector <8 x i8> %A, <8 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
58 define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
59 ; CHECK-SD-LABEL: concat4:
61 ; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
64 ; CHECK-GI-LABEL: concat4:
66 ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
67 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
68 ; CHECK-GI-NEXT: fmov w8, s0
69 ; CHECK-GI-NEXT: mov v0.s[0], w8
70 ; CHECK-GI-NEXT: fmov w8, s1
71 ; CHECK-GI-NEXT: mov v0.s[1], w8
72 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
74 %v4i16 = shufflevector <2 x i16> %A, <2 x i16> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
78 define <8 x i16> @concat5(<4 x i16> %A, <4 x i16> %B) {
79 ; CHECK-LABEL: concat5:
81 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
82 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
83 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
85 %v8i16 = shufflevector <4 x i16> %A, <4 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
89 define <16 x i16> @concat6(ptr %A, ptr %B) {
90 ; CHECK-LABEL: concat6:
92 ; CHECK-NEXT: ldr q0, [x0]
93 ; CHECK-NEXT: ldr q1, [x1]
95 %tmp1 = load <8 x i16>, ptr %A
96 %tmp2 = load <8 x i16>, ptr %B
97 %v16i16 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
98 ret <16 x i16> %v16i16
101 define <4 x i32> @concat7(<2 x i32> %A, <2 x i32> %B) {
102 ; CHECK-LABEL: concat7:
104 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
105 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
106 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
108 %v4i32 = shufflevector <2 x i32> %A, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
112 define <8 x i32> @concat8(ptr %A, ptr %B) {
113 ; CHECK-LABEL: concat8:
115 ; CHECK-NEXT: ldr q0, [x0]
116 ; CHECK-NEXT: ldr q1, [x1]
118 %tmp1 = load <4 x i32>, ptr %A
119 %tmp2 = load <4 x i32>, ptr %B
120 %v8i32 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
124 define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
125 ; CHECK-SD-LABEL: concat9:
126 ; CHECK-SD: // %bb.0:
127 ; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v1.2s
130 ; CHECK-GI-LABEL: concat9:
131 ; CHECK-GI: // %bb.0:
132 ; CHECK-GI-NEXT: fmov w8, s0
133 ; CHECK-GI-NEXT: mov v0.s[0], w8
134 ; CHECK-GI-NEXT: fmov w8, s1
135 ; CHECK-GI-NEXT: mov v0.s[1], w8
136 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
138 %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
139 ret <4 x half> %v4half
142 define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {
143 ; CHECK-LABEL: concat10:
145 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
146 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
147 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
149 %v8half= shufflevector <4 x half> %A, <4 x half> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
150 ret <8 x half> %v8half
153 define <16 x half> @concat11(<8 x half> %A, <8 x half> %B) {
154 ; CHECK-LABEL: concat11:
157 %v16half= shufflevector <8 x half> %A, <8 x half> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
158 ret <16 x half> %v16half
161 define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
162 ; CHECK-SD-LABEL: concat_v8s16_v2s16:
163 ; CHECK-SD: // %bb.0:
164 ; CHECK-SD-NEXT: ldr s0, [x0]
167 ; CHECK-GI-LABEL: concat_v8s16_v2s16:
168 ; CHECK-GI: // %bb.0:
169 ; CHECK-GI-NEXT: ldrh w8, [x0]
170 ; CHECK-GI-NEXT: ldrh w9, [x0, #2]
171 ; CHECK-GI-NEXT: fmov s1, w8
172 ; CHECK-GI-NEXT: mov v1.h[1], w9
173 ; CHECK-GI-NEXT: mov v0.s[0], v1.s[0]
175 %a = load <2 x i16>, ptr %ptr
176 %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
180 define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) {
181 ; CHECK-LABEL: concat_v16s8_v4s8:
183 ; CHECK-NEXT: ldr s0, [x0]
185 %a = load <4 x i8>, ptr %ptr
186 %b = shufflevector <4 x i8> %a, <4 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
190 define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %ptrD) {
191 ; CHECK-LABEL: concat_v16s8_v4s8_load:
193 ; CHECK-NEXT: ldr s0, [x0]
194 ; CHECK-NEXT: ld1 { v0.s }[1], [x1]
195 ; CHECK-NEXT: ld1 { v0.s }[2], [x2]
196 ; CHECK-NEXT: ld1 { v0.s }[3], [x3]
198 %A = load <4 x i8>, ptr %ptrA
199 %B = load <4 x i8>, ptr %ptrB
200 %C = load <4 x i8>, ptr %ptrC
201 %D = load <4 x i8>, ptr %ptrD
202 %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
203 %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
204 %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
209 define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <4 x i8> %D) {
210 ; CHECK-SD-LABEL: concat_v16s8_v4s8_reg:
211 ; CHECK-SD: // %bb.0:
212 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
213 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
214 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
215 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
216 ; CHECK-SD-NEXT: mov v2.d[1], v3.d[0]
217 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
218 ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b
221 ; CHECK-GI-LABEL: concat_v16s8_v4s8_reg:
222 ; CHECK-GI: // %bb.0:
223 ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
224 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
225 ; CHECK-GI-NEXT: fmov w8, s0
226 ; CHECK-GI-NEXT: mov v0.s[0], w8
227 ; CHECK-GI-NEXT: fmov w8, s1
228 ; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b
229 ; CHECK-GI-NEXT: mov v0.s[1], w8
230 ; CHECK-GI-NEXT: uzp1 v1.8b, v3.8b, v0.8b
231 ; CHECK-GI-NEXT: fmov w8, s2
232 ; CHECK-GI-NEXT: mov v0.s[2], w8
233 ; CHECK-GI-NEXT: fmov w8, s1
234 ; CHECK-GI-NEXT: mov v0.s[3], w8
236 %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
237 %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
238 %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
242 define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %C, <2 x i16> %D) {
243 ; CHECK-SD-LABEL: concat_v8s16_v2s16_reg:
244 ; CHECK-SD: // %bb.0:
245 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
246 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0
247 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
248 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
249 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
250 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
251 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
254 ; CHECK-GI-LABEL: concat_v8s16_v2s16_reg:
255 ; CHECK-GI: // %bb.0:
256 ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
257 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
258 ; CHECK-GI-NEXT: fmov w8, s0
259 ; CHECK-GI-NEXT: mov v0.s[0], w8
260 ; CHECK-GI-NEXT: fmov w8, s1
261 ; CHECK-GI-NEXT: uzp1 v2.4h, v2.4h, v0.4h
262 ; CHECK-GI-NEXT: mov v0.s[1], w8
263 ; CHECK-GI-NEXT: uzp1 v1.4h, v3.4h, v0.4h
264 ; CHECK-GI-NEXT: fmov w8, s2
265 ; CHECK-GI-NEXT: mov v0.s[2], w8
266 ; CHECK-GI-NEXT: fmov w8, s1
267 ; CHECK-GI-NEXT: mov v0.s[3], w8
269 %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
270 %c = shufflevector <2 x i16> %C, <2 x i16> %D, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
271 %d = shufflevector <8 x i16> %b, <8 x i16> %c, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
275 define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) {
276 ; CHECK-SD-LABEL: concat_undef_first_use_first:
277 ; CHECK-SD: // %bb.0:
278 ; CHECK-SD-NEXT: ld1r { v0.2s }, [x0]
281 ; CHECK-GI-LABEL: concat_undef_first_use_first:
282 ; CHECK-GI: // %bb.0:
283 ; CHECK-GI-NEXT: ldrh w8, [x0]
284 ; CHECK-GI-NEXT: ldrh w9, [x0, #2]
285 ; CHECK-GI-NEXT: fmov s1, w8
286 ; CHECK-GI-NEXT: mov v1.h[1], w9
287 ; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
288 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
290 %l1 = load <2 x i16>, ptr %p1
291 %l2 = load <2 x i16>, ptr %p2
292 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
293 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
294 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
298 define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) {
299 ; CHECK-SD-LABEL: concat_undef_first_use_second:
300 ; CHECK-SD: // %bb.0:
301 ; CHECK-SD-NEXT: ld1r { v0.2s }, [x0]
304 ; CHECK-GI-LABEL: concat_undef_first_use_second:
305 ; CHECK-GI: // %bb.0:
306 ; CHECK-GI-NEXT: ldrh w8, [x0]
307 ; CHECK-GI-NEXT: ldrh w9, [x0, #2]
308 ; CHECK-GI-NEXT: fmov s1, w8
309 ; CHECK-GI-NEXT: mov v1.h[1], w9
310 ; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
311 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
313 %l1 = load <2 x i16>, ptr %p1
314 %l2 = load <2 x i16>, ptr %p2
315 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
316 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
317 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 8, i32 9>
321 define <4 x i16> @concat_undef_first_use_undef(ptr %p1, ptr %p2) {
322 ; CHECK-SD-LABEL: concat_undef_first_use_undef:
323 ; CHECK-SD: // %bb.0:
324 ; CHECK-SD-NEXT: ldr s0, [x0]
327 ; CHECK-GI-LABEL: concat_undef_first_use_undef:
328 ; CHECK-GI: // %bb.0:
330 %l1 = load <2 x i16>, ptr %p1
331 %l2 = load <2 x i16>, ptr %p2
332 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
333 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
334 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3>
338 define <8 x i16> @concat_low_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
339 ; CHECK-LABEL: concat_low_low_v8i16:
340 ; CHECK: // %bb.0: // %entry
341 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
344 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
345 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
346 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
347 ret <8 x i16> %shuffle.i4
350 define <8 x i16> @concat_high_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
351 ; CHECK-SD-LABEL: concat_high_low_v8i16:
352 ; CHECK-SD: // %bb.0: // %entry
353 ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v1.16b, #8
356 ; CHECK-GI-LABEL: concat_high_low_v8i16:
357 ; CHECK-GI: // %bb.0: // %entry
358 ; CHECK-GI-NEXT: mov d0, v0.d[1]
359 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
362 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
363 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
364 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
365 ret <8 x i16> %shuffle.i4
368 define <8 x i16> @concat_low_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
369 ; CHECK-SD-LABEL: concat_low_high_v8i16:
370 ; CHECK-SD: // %bb.0: // %entry
371 ; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
372 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
375 ; CHECK-GI-LABEL: concat_low_high_v8i16:
376 ; CHECK-GI: // %bb.0: // %entry
377 ; CHECK-GI-NEXT: mov d1, v1.d[1]
378 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
381 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
382 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
383 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384 ret <8 x i16> %shuffle.i4
387 define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
388 ; CHECK-LABEL: concat_high_high_v8i16:
389 ; CHECK: // %bb.0: // %entry
390 ; CHECK-NEXT: mov v1.d[0], v0.d[1]
391 ; CHECK-NEXT: mov v0.16b, v1.16b
394 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
395 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
396 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
397 ret <8 x i16> %shuffle.i4
400 define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
401 ; CHECK-LABEL: concat_high_high_v8f16:
402 ; CHECK: // %bb.0: // %entry
403 ; CHECK-NEXT: mov v1.d[0], v0.d[1]
404 ; CHECK-NEXT: mov v0.16b, v1.16b
407 %shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
408 %shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
409 %shuffle.i4 = shufflevector <4 x half> %shuffle.i3, <4 x half> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
410 ret <8 x half> %shuffle.i4
413 define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
414 ; CHECK-LABEL: concat_high_high_v8bf16:
415 ; CHECK: // %bb.0: // %entry
416 ; CHECK-NEXT: mov v1.d[0], v0.d[1]
417 ; CHECK-NEXT: mov v0.16b, v1.16b
420 %shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
421 %shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
422 %shuffle.i4 = shufflevector <4 x bfloat> %shuffle.i3, <4 x bfloat> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
423 ret <8 x bfloat> %shuffle.i4
426 define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
427 ; CHECK-SD-LABEL: concat_high_high_v4i32:
428 ; CHECK-SD: // %bb.0: // %entry
429 ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
432 ; CHECK-GI-LABEL: concat_high_high_v4i32:
433 ; CHECK-GI: // %bb.0: // %entry
434 ; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
435 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
438 %shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
439 %shuffle.i = shufflevector <4 x i32> %b_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
440 %shuffle.i4 = shufflevector <2 x i32> %shuffle.i3, <2 x i32> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
441 ret <4 x i32> %shuffle.i4
444 define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_vec) {
445 ; CHECK-SD-LABEL: concat_high_high_v4f32:
446 ; CHECK-SD: // %bb.0: // %entry
447 ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
450 ; CHECK-GI-LABEL: concat_high_high_v4f32:
451 ; CHECK-GI: // %bb.0: // %entry
452 ; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
453 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
456 %shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
457 %shuffle.i = shufflevector <4 x float> %b_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
458 %shuffle.i4 = shufflevector <2 x float> %shuffle.i3, <2 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
459 ret <4 x float> %shuffle.i4
462 define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
463 ; CHECK-LABEL: concat_high_high_v16i8:
464 ; CHECK: // %bb.0: // %entry
465 ; CHECK-NEXT: mov v1.d[0], v0.d[1]
466 ; CHECK-NEXT: mov v0.16b, v1.16b
469 %shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470 %shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
471 %shuffle.i4 = shufflevector <8 x i8> %shuffle.i3, <8 x i8> %shuffle.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
472 ret <16 x i8> %shuffle.i4
475 define <2 x i64> @concat_high_high_v2i64(<2 x i64> %a_vec, <2 x i64> %b_vec) {
476 ; CHECK-SD-LABEL: concat_high_high_v2i64:
477 ; CHECK-SD: // %bb.0: // %entry
478 ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
481 ; CHECK-GI-LABEL: concat_high_high_v2i64:
482 ; CHECK-GI: // %bb.0: // %entry
483 ; CHECK-GI-NEXT: mov v0.d[0], v0.d[1]
484 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[1]
487 %shuffle.i3 = shufflevector <2 x i64> %a_vec, <2 x i64> poison, <1 x i32> <i32 1>
488 %shuffle.i = shufflevector <2 x i64> %b_vec, <2 x i64> poison, <1 x i32> <i32 1>
489 %shuffle.i4 = shufflevector <1 x i64> %shuffle.i3, <1 x i64> %shuffle.i, <2 x i32> <i32 0, i32 1>
490 ret <2 x i64> %shuffle.i4
493 define <2 x double> @concat_high_high_v2f64(<2 x double> %a_vec, <2 x double> %b_vec) {
494 ; CHECK-SD-LABEL: concat_high_high_v2f64:
495 ; CHECK-SD: // %bb.0: // %entry
496 ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
499 ; CHECK-GI-LABEL: concat_high_high_v2f64:
500 ; CHECK-GI: // %bb.0: // %entry
501 ; CHECK-GI-NEXT: mov v0.d[0], v0.d[1]
502 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[1]
505 %shuffle.i3 = shufflevector <2 x double> %a_vec, <2 x double> poison, <1 x i32> <i32 1>
506 %shuffle.i = shufflevector <2 x double> %b_vec, <2 x double> poison, <1 x i32> <i32 1>
507 %shuffle.i4 = shufflevector <1 x double> %shuffle.i3, <1 x double> %shuffle.i, <2 x i32> <i32 0, i32 1>
508 ret <2 x double> %shuffle.i4