1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 target triple = "aarch64-unknown-linux-gnu"
7 ; REVB pattern for shuffle v32i8 -> v16i16
8 define void @test_revbv16i16(ptr %a) #0 {
9 ; CHECK-LABEL: test_revbv16i16:
11 ; CHECK-NEXT: ptrue p0.b, vl32
12 ; CHECK-NEXT: ptrue p1.h
13 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
14 ; CHECK-NEXT: revb z0.h, p1/m, z0.h
15 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
17 %tmp1 = load <32 x i8>, ptr %a
18 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 undef, i32 24, i32 27, i32 undef, i32 29, i32 28, i32 undef, i32 undef>
19 store <32 x i8> %tmp2, ptr %a
23 ; REVB pattern for shuffle v32i8 -> v8i32
24 define void @test_revbv8i32(ptr %a) #0 {
25 ; CHECK-LABEL: test_revbv8i32:
27 ; CHECK-NEXT: ptrue p0.b, vl32
28 ; CHECK-NEXT: ptrue p1.s
29 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
30 ; CHECK-NEXT: revb z0.s, p1/m, z0.s
31 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
33 %tmp1 = load <32 x i8>, ptr %a
34 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
35 store <32 x i8> %tmp2, ptr %a
39 ; REVB pattern for shuffle v32i8 -> v4i64
40 define void @test_revbv4i64(ptr %a) #0 {
41 ; CHECK-LABEL: test_revbv4i64:
43 ; CHECK-NEXT: ptrue p0.b, vl32
44 ; CHECK-NEXT: ptrue p1.d
45 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
46 ; CHECK-NEXT: revb z0.d, p1/m, z0.d
47 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
49 %tmp1 = load <32 x i8>, ptr %a
50 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 undef, i32 27, i32 undef, i32 undef, i32 undef>
51 store <32 x i8> %tmp2, ptr %a
55 ; REVH pattern for shuffle v16i16 -> v8i32
56 define void @test_revhv8i32(ptr %a) #0 {
57 ; CHECK-LABEL: test_revhv8i32:
59 ; CHECK-NEXT: ptrue p0.h, vl16
60 ; CHECK-NEXT: ptrue p1.s
61 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
62 ; CHECK-NEXT: revh z0.s, p1/m, z0.s
63 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
65 %tmp1 = load <16 x i16>, ptr %a
66 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
67 store <16 x i16> %tmp2, ptr %a
71 ; REVH pattern for shuffle v16f16 -> v8f32
72 define void @test_revhv8f32(ptr %a) #0 {
73 ; CHECK-LABEL: test_revhv8f32:
75 ; CHECK-NEXT: ptrue p0.h, vl16
76 ; CHECK-NEXT: ptrue p1.s
77 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
78 ; CHECK-NEXT: revh z0.s, p1/m, z0.s
79 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
81 %tmp1 = load <16 x half>, ptr %a
82 %tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
83 store <16 x half> %tmp2, ptr %a
87 ; REVH pattern for shuffle v16i16 -> v4i64
88 define void @test_revhv4i64(ptr %a) #0 {
89 ; CHECK-LABEL: test_revhv4i64:
91 ; CHECK-NEXT: ptrue p0.h, vl16
92 ; CHECK-NEXT: ptrue p1.d
93 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
94 ; CHECK-NEXT: revh z0.d, p1/m, z0.d
95 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
97 %tmp1 = load <16 x i16>, ptr %a
98 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
99 store <16 x i16> %tmp2, ptr %a
103 ; REVW pattern for shuffle v8i32 -> v4i64
104 define void @test_revwv4i64(ptr %a) #0 {
105 ; CHECK-LABEL: test_revwv4i64:
107 ; CHECK-NEXT: ptrue p0.s, vl8
108 ; CHECK-NEXT: ptrue p1.d
109 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
110 ; CHECK-NEXT: revw z0.d, p1/m, z0.d
111 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
113 %tmp1 = load <8 x i32>, ptr %a
114 %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
115 store <8 x i32> %tmp2, ptr %a
119 ; REVW pattern for shuffle v8f32 -> v4f64
120 define void @test_revwv4f64(ptr %a) #0 {
121 ; CHECK-LABEL: test_revwv4f64:
123 ; CHECK-NEXT: ptrue p0.s, vl8
124 ; CHECK-NEXT: ptrue p1.d
125 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
126 ; CHECK-NEXT: revw z0.d, p1/m, z0.d
127 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
129 %tmp1 = load <8 x float>, ptr %a
130 %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
131 store <8 x float> %tmp2, ptr %a
135 ; Don't use SVE for 128-bit vectors
136 define <16 x i8> @test_revv16i8(ptr %a) #0 {
137 ; CHECK-LABEL: test_revv16i8:
139 ; CHECK-NEXT: ldr q0, [x0]
140 ; CHECK-NEXT: rev64 v0.16b, v0.16b
142 %tmp1 = load <16 x i8>, ptr %a
143 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
147 ; REVW pattern for shuffle two v8i32 inputs with the second input available.
148 define void @test_revwv8i32v8i32(ptr %a, ptr %b) #0 {
149 ; CHECK-LABEL: test_revwv8i32v8i32:
151 ; CHECK-NEXT: ptrue p0.s, vl8
152 ; CHECK-NEXT: ptrue p1.d
153 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
154 ; CHECK-NEXT: revw z0.d, p1/m, z0.d
155 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
157 %tmp1 = load <8 x i32>, ptr %a
158 %tmp2 = load <8 x i32>, ptr %b
159 %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
160 store <8 x i32> %tmp3, ptr %a
164 ; REVH pattern for shuffle v32i16 with 256 bits and 512 bits SVE.
165 define void @test_revhv32i16(ptr %a) #0 {
166 ; VBITS_GE_256-LABEL: test_revhv32i16:
167 ; VBITS_GE_256: // %bb.0:
168 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
169 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
170 ; VBITS_GE_256-NEXT: ptrue p1.d
171 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
172 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
173 ; VBITS_GE_256-NEXT: revh z0.d, p1/m, z0.d
174 ; VBITS_GE_256-NEXT: revh z1.d, p1/m, z1.d
175 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
176 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
177 ; VBITS_GE_256-NEXT: ret
179 ; VBITS_GE_512-LABEL: test_revhv32i16:
180 ; VBITS_GE_512: // %bb.0:
181 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
182 ; VBITS_GE_512-NEXT: ptrue p1.d
183 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
184 ; VBITS_GE_512-NEXT: revh z0.d, p1/m, z0.d
185 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
186 ; VBITS_GE_512-NEXT: ret
187 %tmp1 = load <32 x i16>, ptr %a
188 %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
189 store <32 x i16> %tmp2, ptr %a
193 ; Only support to reverse bytes / halfwords / words within elements
194 define void @test_rev_elts_fail(ptr %a) #1 {
195 ; CHECK-LABEL: test_rev_elts_fail:
197 ; CHECK-NEXT: ptrue p0.d
198 ; CHECK-NEXT: adrp x8, .LCPI11_0
199 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI11_0
200 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
201 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8]
202 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
203 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
205 %tmp1 = load <4 x i64>, ptr %a
206 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
207 store <4 x i64> %tmp2, ptr %a
211 ; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
212 ; the double-words within quard-words.
213 define void @test_revdv4i64_sve2p1(ptr %a) #2 {
214 ; CHECK-LABEL: test_revdv4i64_sve2p1:
216 ; CHECK-NEXT: ptrue p0.d, vl4
217 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
218 ; CHECK-NEXT: revd z0.q, p0/m, z0.q
219 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
221 %tmp1 = load <4 x i64>, ptr %a
222 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
223 store <4 x i64> %tmp2, ptr %a
227 define void @test_revdv4f64_sve2p1(ptr %a) #2 {
228 ; CHECK-LABEL: test_revdv4f64_sve2p1:
230 ; CHECK-NEXT: ptrue p0.d, vl4
231 ; CHECK-NEXT: ptrue p1.d
232 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
233 ; CHECK-NEXT: revd z0.q, p1/m, z0.q
234 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
236 %tmp1 = load <4 x double>, ptr %a
237 %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
238 store <4 x double> %tmp2, ptr %a
242 ; REV instruction will reverse the order of all elements in the vector.
243 ; When the vector length and the target register size are inconsistent,
244 ; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed.
246 ; sve-vector-bits-min=256, sve-vector-bits-max is not set, REV inst can't be generated.
247 define void @test_revv8i32(ptr %a) #0 {
248 ; VBITS_GE_256-LABEL: test_revv8i32:
249 ; VBITS_GE_256: // %bb.0:
250 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
251 ; VBITS_GE_256-NEXT: index z0.s, #7, #-1
252 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
253 ; VBITS_GE_256-NEXT: tbl z0.s, { z1.s }, z0.s
254 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
255 ; VBITS_GE_256-NEXT: ret
257 ; VBITS_GE_512-LABEL: test_revv8i32:
258 ; VBITS_GE_512: // %bb.0:
259 ; VBITS_GE_512-NEXT: ptrue p0.s, vl8
260 ; VBITS_GE_512-NEXT: adrp x8, .LCPI14_0
261 ; VBITS_GE_512-NEXT: add x8, x8, :lo12:.LCPI14_0
262 ; VBITS_GE_512-NEXT: ptrue p1.s, vl16
263 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
264 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x8]
265 ; VBITS_GE_512-NEXT: tbl z0.s, { z0.s }, z1.s
266 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
267 ; VBITS_GE_512-NEXT: ret
268 %tmp1 = load <8 x i32>, ptr %a
269 %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
270 store <8 x i32> %tmp2, ptr %a
274 ; REV pattern for v32i8 shuffle with vscale_range(2,2)
275 define void @test_revv32i8_vl256(ptr %a) #1 {
276 ; CHECK-LABEL: test_revv32i8_vl256:
278 ; CHECK-NEXT: ptrue p0.b
279 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
280 ; CHECK-NEXT: rev z0.b, z0.b
281 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
283 %tmp1 = load <32 x i8>, ptr %a
284 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
285 store <32 x i8> %tmp2, ptr %a
289 ; REV pattern for v16i16 shuffle with vscale_range(2,2)
290 define void @test_revv16i16_vl256(ptr %a) #1 {
291 ; CHECK-LABEL: test_revv16i16_vl256:
293 ; CHECK-NEXT: ptrue p0.h
294 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
295 ; CHECK-NEXT: rev z0.h, z0.h
296 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
298 %tmp1 = load <16 x i16>, ptr %a
299 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
300 store <16 x i16> %tmp2, ptr %a
304 ; REV pattern for v8f32 shuffle with vscale_range(2,2)
305 define void @test_revv8f32_vl256(ptr %a) #1 {
306 ; CHECK-LABEL: test_revv8f32_vl256:
308 ; CHECK-NEXT: ptrue p0.s
309 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
310 ; CHECK-NEXT: rev z0.s, z0.s
311 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
313 %tmp1 = load <8 x float>, ptr %a
314 %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
315 store <8 x float> %tmp2, ptr %a
319 ; REV pattern for v4f64 shuffle with vscale_range(2,2)
320 define void @test_revv4f64_vl256(ptr %a) #1 {
321 ; CHECK-LABEL: test_revv4f64_vl256:
323 ; CHECK-NEXT: ptrue p0.d
324 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
325 ; CHECK-NEXT: rev z0.d, z0.d
326 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
328 %tmp1 = load <4 x double>, ptr %a
329 %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
330 store <4 x double> %tmp2, ptr %a
334 ; REV pattern for shuffle two v8i32 inputs with the second input available, vscale_range(2,2).
335 define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 {
336 ; CHECK-LABEL: test_revv8i32v8i32:
338 ; CHECK-NEXT: ptrue p0.s
339 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
340 ; CHECK-NEXT: rev z0.s, z0.s
341 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
343 %tmp1 = load <8 x i32>, ptr %a
344 %tmp2 = load <8 x i32>, ptr %b
345 %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
346 store <8 x i32> %tmp3, ptr %a
350 ; Illegal REV pattern.
351 define void @test_rev_fail(ptr %a) #1 {
352 ; CHECK-LABEL: test_rev_fail:
354 ; CHECK-NEXT: ptrue p0.h
355 ; CHECK-NEXT: adrp x8, .LCPI20_0
356 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0
357 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
358 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x8]
359 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
360 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
362 %tmp1 = load <16 x i16>, ptr %a
363 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
364 store <16 x i16> %tmp2, ptr %a
368 ; Don't use SVE for 128-bit shuffle with two inputs
369 define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
370 ; CHECK-LABEL: test_revv8i16v8i16:
372 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
373 ; CHECK-NEXT: sub x9, sp, #48
374 ; CHECK-NEXT: mov x29, sp
375 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
376 ; CHECK-NEXT: .cfi_def_cfa w29, 16
377 ; CHECK-NEXT: .cfi_offset w30, -8
378 ; CHECK-NEXT: .cfi_offset w29, -16
379 ; CHECK-NEXT: mov x8, sp
380 ; CHECK-NEXT: ldr q0, [x1]
381 ; CHECK-NEXT: ldr q1, [x0]
382 ; CHECK-NEXT: orr x9, x8, #0x1e
383 ; CHECK-NEXT: orr x10, x8, #0x1c
384 ; CHECK-NEXT: ptrue p0.h
385 ; CHECK-NEXT: st1 { v0.h }[4], [x9]
386 ; CHECK-NEXT: orr x9, x8, #0x18
387 ; CHECK-NEXT: st1 { v0.h }[7], [x9]
388 ; CHECK-NEXT: orr x9, x8, #0xe
389 ; CHECK-NEXT: st1 { v1.h }[4], [x9]
390 ; CHECK-NEXT: orr x9, x8, #0xc
391 ; CHECK-NEXT: st1 { v1.h }[5], [x9]
392 ; CHECK-NEXT: orr x9, x8, #0x8
393 ; CHECK-NEXT: st1 { v0.h }[5], [x10]
394 ; CHECK-NEXT: orr x10, x8, #0x10
395 ; CHECK-NEXT: st1 { v1.h }[7], [x9]
396 ; CHECK-NEXT: orr x9, x8, #0x4
397 ; CHECK-NEXT: st1 { v0.h }[3], [x10]
398 ; CHECK-NEXT: mov w10, #26 // =0x1a
399 ; CHECK-NEXT: st1 { v1.h }[1], [x9]
400 ; CHECK-NEXT: orr x9, x8, #0x2
401 ; CHECK-NEXT: st1 { v1.h }[2], [x9]
402 ; CHECK-NEXT: orr x9, x8, x10
403 ; CHECK-NEXT: mov w10, #20 // =0x14
404 ; CHECK-NEXT: st1 { v0.h }[6], [x9]
405 ; CHECK-NEXT: orr x9, x8, x10
406 ; CHECK-NEXT: mov w10, #18 // =0x12
407 ; CHECK-NEXT: st1 { v0.h }[1], [x9]
408 ; CHECK-NEXT: orr x9, x8, x10
409 ; CHECK-NEXT: st1 { v0.h }[2], [x9]
410 ; CHECK-NEXT: mov w9, #10 // =0xa
411 ; CHECK-NEXT: orr x9, x8, x9
412 ; CHECK-NEXT: st1 { v1.h }[3], [x8]
413 ; CHECK-NEXT: st1 { v1.h }[6], [x9]
414 ; CHECK-NEXT: str h0, [sp, #22]
415 ; CHECK-NEXT: str h1, [sp, #6]
416 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
417 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
418 ; CHECK-NEXT: mov sp, x29
419 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
421 %tmp1 = load <8 x i16>, ptr %a
422 %tmp2 = load <8 x i16>, ptr %b
423 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
424 store <16 x i16> %tmp3, ptr %c
428 attributes #0 = { "target-features"="+sve" }
429 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
430 attributes #2 = { "target-features"="+sve2p1" }