1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
6 ; Test we can code generater patterns of the form:
7 ; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
8 ; scalable_vector = ISD::INSERT_SUBVECTOR scalable_vector, fixed_length_vector, 0
10 ; NOTE: Currently shufflevector does not support scalable vectors so it cannot
11 ; be used to model the above operations. Instead these tests rely on knowing
12 ; how fixed length operation are lowered to scalable ones, with multiple blocks
13 ; ensuring insert/extract sequences are not folded away.
15 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
16 target triple = "aarch64-unknown-linux-gnu"
19 define void @subvector_v4i8(ptr %in, ptr %out) {
20 ; CHECK-LABEL: subvector_v4i8:
21 ; CHECK: // %bb.0: // %bb1
22 ; CHECK-NEXT: ptrue p0.h, vl4
23 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
24 ; CHECK-NEXT: st1b { z0.h }, p0, [x1]
26 %a = load <4 x i8>, ptr %in
30 store <4 x i8> %a, ptr %out
34 define void @subvector_v8i8(ptr %in, ptr %out) {
35 ; CHECK-LABEL: subvector_v8i8:
36 ; CHECK: // %bb.0: // %bb1
37 ; CHECK-NEXT: ldr d0, [x0]
38 ; CHECK-NEXT: str d0, [x1]
40 %a = load <8 x i8>, ptr %in
44 store <8 x i8> %a, ptr %out
48 define void @subvector_v16i8(ptr %in, ptr %out) {
49 ; CHECK-LABEL: subvector_v16i8:
50 ; CHECK: // %bb.0: // %bb1
51 ; CHECK-NEXT: ldr q0, [x0]
52 ; CHECK-NEXT: str q0, [x1]
54 %a = load <16 x i8>, ptr %in
58 store <16 x i8> %a, ptr %out
62 define void @subvector_v32i8(ptr %in, ptr %out) {
63 ; CHECK-LABEL: subvector_v32i8:
64 ; CHECK: // %bb.0: // %bb1
65 ; CHECK-NEXT: ldp q0, q1, [x0]
66 ; CHECK-NEXT: stp q0, q1, [x1]
68 %a = load <32 x i8>, ptr %in
72 store <32 x i8> %a, ptr %out
77 define void @subvector_v2i16(ptr %in, ptr %out) {
78 ; CHECK-LABEL: subvector_v2i16:
79 ; CHECK: // %bb.0: // %bb1
80 ; CHECK-NEXT: sub sp, sp, #16
81 ; CHECK-NEXT: .cfi_def_cfa_offset 16
82 ; CHECK-NEXT: ldrh w8, [x0, #2]
83 ; CHECK-NEXT: ptrue p0.s, vl2
84 ; CHECK-NEXT: str w8, [sp, #12]
85 ; CHECK-NEXT: ldrh w8, [x0]
86 ; CHECK-NEXT: str w8, [sp, #8]
87 ; CHECK-NEXT: ldr d0, [sp, #8]
88 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
89 ; CHECK-NEXT: add sp, sp, #16
91 %a = load <2 x i16>, ptr %in
95 store <2 x i16> %a, ptr %out
99 define void @subvector_v4i16(ptr %in, ptr %out) {
100 ; CHECK-LABEL: subvector_v4i16:
101 ; CHECK: // %bb.0: // %bb1
102 ; CHECK-NEXT: ldr d0, [x0]
103 ; CHECK-NEXT: str d0, [x1]
105 %a = load <4 x i16>, ptr %in
109 store <4 x i16> %a, ptr %out
113 define void @subvector_v8i16(ptr %in, ptr %out) {
114 ; CHECK-LABEL: subvector_v8i16:
115 ; CHECK: // %bb.0: // %bb1
116 ; CHECK-NEXT: ldr q0, [x0]
117 ; CHECK-NEXT: str q0, [x1]
119 %a = load <8 x i16>, ptr %in
123 store <8 x i16> %a, ptr %out
127 define void @subvector_v16i16(ptr %in, ptr %out) {
128 ; CHECK-LABEL: subvector_v16i16:
129 ; CHECK: // %bb.0: // %bb1
130 ; CHECK-NEXT: ldp q0, q1, [x0]
131 ; CHECK-NEXT: stp q0, q1, [x1]
133 %a = load <16 x i16>, ptr %in
137 store <16 x i16> %a, ptr %out
142 define void @subvector_v2i32(ptr %in, ptr %out) {
143 ; CHECK-LABEL: subvector_v2i32:
144 ; CHECK: // %bb.0: // %bb1
145 ; CHECK-NEXT: ldr d0, [x0]
146 ; CHECK-NEXT: str d0, [x1]
148 %a = load <2 x i32>, ptr %in
152 store <2 x i32> %a, ptr %out
156 define void @subvector_v4i32(ptr %in, ptr %out) {
157 ; CHECK-LABEL: subvector_v4i32:
158 ; CHECK: // %bb.0: // %bb1
159 ; CHECK-NEXT: ldr q0, [x0]
160 ; CHECK-NEXT: str q0, [x1]
162 %a = load <4 x i32>, ptr %in
166 store <4 x i32> %a, ptr %out
170 define void @subvector_v8i32(ptr %in, ptr %out) {
171 ; CHECK-LABEL: subvector_v8i32:
172 ; CHECK: // %bb.0: // %bb1
173 ; CHECK-NEXT: ldp q0, q1, [x0]
174 ; CHECK-NEXT: stp q0, q1, [x1]
176 %a = load <8 x i32>, ptr %in
180 store <8 x i32> %a, ptr %out
185 define void @subvector_v2i64(ptr %in, ptr %out) {
186 ; CHECK-LABEL: subvector_v2i64:
187 ; CHECK: // %bb.0: // %bb1
188 ; CHECK-NEXT: ldr q0, [x0]
189 ; CHECK-NEXT: str q0, [x1]
191 %a = load <2 x i64>, ptr %in
195 store <2 x i64> %a, ptr %out
199 define void @subvector_v4i64(ptr %in, ptr %out) {
200 ; CHECK-LABEL: subvector_v4i64:
201 ; CHECK: // %bb.0: // %bb1
202 ; CHECK-NEXT: ldp q0, q1, [x0]
203 ; CHECK-NEXT: stp q0, q1, [x1]
205 %a = load <4 x i64>, ptr %in
209 store <4 x i64> %a, ptr %out
214 define void @subvector_v2f16(ptr %in, ptr %out) {
215 ; CHECK-LABEL: subvector_v2f16:
216 ; CHECK: // %bb.0: // %bb1
217 ; CHECK-NEXT: ldr w8, [x0]
218 ; CHECK-NEXT: str w8, [x1]
220 %a = load <2 x half>, ptr %in
224 store <2 x half> %a, ptr %out
228 define void @subvector_v4f16(ptr %in, ptr %out) {
229 ; CHECK-LABEL: subvector_v4f16:
230 ; CHECK: // %bb.0: // %bb1
231 ; CHECK-NEXT: ldr d0, [x0]
232 ; CHECK-NEXT: str d0, [x1]
234 %a = load <4 x half>, ptr %in
238 store <4 x half> %a, ptr %out
242 define void @subvector_v8f16(ptr %in, ptr %out) {
243 ; CHECK-LABEL: subvector_v8f16:
244 ; CHECK: // %bb.0: // %bb1
245 ; CHECK-NEXT: ldr q0, [x0]
246 ; CHECK-NEXT: str q0, [x1]
248 %a = load <8 x half>, ptr %in
252 store <8 x half> %a, ptr %out
256 define void @subvector_v16f16(ptr %in, ptr %out) {
257 ; CHECK-LABEL: subvector_v16f16:
258 ; CHECK: // %bb.0: // %bb1
259 ; CHECK-NEXT: ldp q0, q1, [x0]
260 ; CHECK-NEXT: stp q0, q1, [x1]
262 %a = load <16 x half>, ptr %in
266 store <16 x half> %a, ptr %out
271 define void @subvector_v2f32(ptr %in, ptr %out) {
272 ; CHECK-LABEL: subvector_v2f32:
273 ; CHECK: // %bb.0: // %bb1
274 ; CHECK-NEXT: ldr d0, [x0]
275 ; CHECK-NEXT: str d0, [x1]
277 %a = load <2 x float>, ptr %in
281 store <2 x float> %a, ptr %out
285 define void @subvector_v4f32(ptr %in, ptr %out) {
286 ; CHECK-LABEL: subvector_v4f32:
287 ; CHECK: // %bb.0: // %bb1
288 ; CHECK-NEXT: ldr q0, [x0]
289 ; CHECK-NEXT: str q0, [x1]
291 %a = load <4 x float>, ptr %in
295 store <4 x float> %a, ptr %out
299 define void @subvector_v8f32(ptr %in, ptr %out) {
300 ; CHECK-LABEL: subvector_v8f32:
301 ; CHECK: // %bb.0: // %bb1
302 ; CHECK-NEXT: ldp q0, q1, [x0]
303 ; CHECK-NEXT: stp q0, q1, [x1]
305 %a = load <8 x float>,ptr %in
309 store <8 x float> %a, ptr %out
314 define void @subvector_v2f64(ptr %in, ptr %out) {
315 ; CHECK-LABEL: subvector_v2f64:
316 ; CHECK: // %bb.0: // %bb1
317 ; CHECK-NEXT: ldr q0, [x0]
318 ; CHECK-NEXT: str q0, [x1]
320 %a = load <2 x double>, ptr %in
324 store <2 x double> %a, ptr %out
328 define void @subvector_v4f64(ptr %in, ptr %out) {
329 ; CHECK-LABEL: subvector_v4f64:
330 ; CHECK: // %bb.0: // %bb1
331 ; CHECK-NEXT: ldp q0, q1, [x0]
332 ; CHECK-NEXT: stp q0, q1, [x1]
334 %a = load <4 x double>, ptr %in
338 store <4 x double> %a, ptr %out