1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
6 target triple = "aarch64-unknown-linux-gnu"
8 define <4 x i8> @load_v4i8(ptr %a) {
9 ; CHECK-LABEL: load_v4i8:
11 ; CHECK-NEXT: ptrue p0.h, vl4
12 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
13 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
16 ; NONEON-NOSVE-LABEL: load_v4i8:
17 ; NONEON-NOSVE: // %bb.0:
18 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
19 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
20 ; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3]
21 ; NONEON-NOSVE-NEXT: strh w8, [sp, #14]
22 ; NONEON-NOSVE-NEXT: ldrb w8, [x0, #2]
23 ; NONEON-NOSVE-NEXT: strh w8, [sp, #12]
24 ; NONEON-NOSVE-NEXT: ldrb w8, [x0, #1]
25 ; NONEON-NOSVE-NEXT: strh w8, [sp, #10]
26 ; NONEON-NOSVE-NEXT: ldrb w8, [x0]
27 ; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
28 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
29 ; NONEON-NOSVE-NEXT: add sp, sp, #16
30 ; NONEON-NOSVE-NEXT: ret
31 %load = load <4 x i8>, ptr %a
35 define <8 x i8> @load_v8i8(ptr %a) {
36 ; CHECK-LABEL: load_v8i8:
38 ; CHECK-NEXT: ldr d0, [x0]
41 ; NONEON-NOSVE-LABEL: load_v8i8:
42 ; NONEON-NOSVE: // %bb.0:
43 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
44 ; NONEON-NOSVE-NEXT: ret
45 %load = load <8 x i8>, ptr %a
49 define <16 x i8> @load_v16i8(ptr %a) {
50 ; CHECK-LABEL: load_v16i8:
52 ; CHECK-NEXT: ldr q0, [x0]
55 ; NONEON-NOSVE-LABEL: load_v16i8:
56 ; NONEON-NOSVE: // %bb.0:
57 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
58 ; NONEON-NOSVE-NEXT: ret
59 %load = load <16 x i8>, ptr %a
63 define <32 x i8> @load_v32i8(ptr %a) {
64 ; CHECK-LABEL: load_v32i8:
66 ; CHECK-NEXT: ldp q0, q1, [x0]
69 ; NONEON-NOSVE-LABEL: load_v32i8:
70 ; NONEON-NOSVE: // %bb.0:
71 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
72 ; NONEON-NOSVE-NEXT: ret
73 %load = load <32 x i8>, ptr %a
77 define <2 x i16> @load_v2i16(ptr %a) {
78 ; CHECK-LABEL: load_v2i16:
80 ; CHECK-NEXT: ptrue p0.s, vl2
81 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
82 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
85 ; NONEON-NOSVE-LABEL: load_v2i16:
86 ; NONEON-NOSVE: // %bb.0:
87 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
88 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
89 ; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2]
90 ; NONEON-NOSVE-NEXT: str w8, [sp, #12]
91 ; NONEON-NOSVE-NEXT: ldrh w8, [x0]
92 ; NONEON-NOSVE-NEXT: str w8, [sp, #8]
93 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
94 ; NONEON-NOSVE-NEXT: add sp, sp, #16
95 ; NONEON-NOSVE-NEXT: ret
96 %load = load <2 x i16>, ptr %a
100 define <2 x half> @load_v2f16(ptr %a) {
101 ; CHECK-LABEL: load_v2f16:
103 ; CHECK-NEXT: ldr s0, [x0]
106 ; NONEON-NOSVE-LABEL: load_v2f16:
107 ; NONEON-NOSVE: // %bb.0:
108 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
109 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
110 ; NONEON-NOSVE-NEXT: ldr w8, [x0]
111 ; NONEON-NOSVE-NEXT: str w8, [sp, #8]
112 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
113 ; NONEON-NOSVE-NEXT: add sp, sp, #16
114 ; NONEON-NOSVE-NEXT: ret
115 %load = load <2 x half>, ptr %a
119 define <4 x i16> @load_v4i16(ptr %a) {
120 ; CHECK-LABEL: load_v4i16:
122 ; CHECK-NEXT: ldr d0, [x0]
125 ; NONEON-NOSVE-LABEL: load_v4i16:
126 ; NONEON-NOSVE: // %bb.0:
127 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
128 ; NONEON-NOSVE-NEXT: ret
129 %load = load <4 x i16>, ptr %a
133 define <4 x half> @load_v4f16(ptr %a) {
134 ; CHECK-LABEL: load_v4f16:
136 ; CHECK-NEXT: ldr d0, [x0]
139 ; NONEON-NOSVE-LABEL: load_v4f16:
140 ; NONEON-NOSVE: // %bb.0:
141 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
142 ; NONEON-NOSVE-NEXT: ret
143 %load = load <4 x half>, ptr %a
147 define <8 x i16> @load_v8i16(ptr %a) {
148 ; CHECK-LABEL: load_v8i16:
150 ; CHECK-NEXT: ldr q0, [x0]
153 ; NONEON-NOSVE-LABEL: load_v8i16:
154 ; NONEON-NOSVE: // %bb.0:
155 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
156 ; NONEON-NOSVE-NEXT: ret
157 %load = load <8 x i16>, ptr %a
161 define <8 x half> @load_v8f16(ptr %a) {
162 ; CHECK-LABEL: load_v8f16:
164 ; CHECK-NEXT: ldr q0, [x0]
167 ; NONEON-NOSVE-LABEL: load_v8f16:
168 ; NONEON-NOSVE: // %bb.0:
169 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
170 ; NONEON-NOSVE-NEXT: ret
171 %load = load <8 x half>, ptr %a
175 define <16 x i16> @load_v16i16(ptr %a) {
176 ; CHECK-LABEL: load_v16i16:
178 ; CHECK-NEXT: ldp q0, q1, [x0]
181 ; NONEON-NOSVE-LABEL: load_v16i16:
182 ; NONEON-NOSVE: // %bb.0:
183 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
184 ; NONEON-NOSVE-NEXT: ret
185 %load = load <16 x i16>, ptr %a
189 define <16 x half> @load_v16f16(ptr %a) {
190 ; CHECK-LABEL: load_v16f16:
192 ; CHECK-NEXT: ldp q0, q1, [x0]
195 ; NONEON-NOSVE-LABEL: load_v16f16:
196 ; NONEON-NOSVE: // %bb.0:
197 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
198 ; NONEON-NOSVE-NEXT: ret
199 %load = load <16 x half>, ptr %a
200 ret <16 x half> %load
203 define <2 x i32> @load_v2i32(ptr %a) {
204 ; CHECK-LABEL: load_v2i32:
206 ; CHECK-NEXT: ldr d0, [x0]
209 ; NONEON-NOSVE-LABEL: load_v2i32:
210 ; NONEON-NOSVE: // %bb.0:
211 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
212 ; NONEON-NOSVE-NEXT: ret
213 %load = load <2 x i32>, ptr %a
217 define <2 x float> @load_v2f32(ptr %a) {
218 ; CHECK-LABEL: load_v2f32:
220 ; CHECK-NEXT: ldr d0, [x0]
223 ; NONEON-NOSVE-LABEL: load_v2f32:
224 ; NONEON-NOSVE: // %bb.0:
225 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
226 ; NONEON-NOSVE-NEXT: ret
227 %load = load <2 x float>, ptr %a
228 ret <2 x float> %load
231 define <4 x i32> @load_v4i32(ptr %a) {
232 ; CHECK-LABEL: load_v4i32:
234 ; CHECK-NEXT: ldr q0, [x0]
237 ; NONEON-NOSVE-LABEL: load_v4i32:
238 ; NONEON-NOSVE: // %bb.0:
239 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
240 ; NONEON-NOSVE-NEXT: ret
241 %load = load <4 x i32>, ptr %a
245 define <4 x float> @load_v4f32(ptr %a) {
246 ; CHECK-LABEL: load_v4f32:
248 ; CHECK-NEXT: ldr q0, [x0]
251 ; NONEON-NOSVE-LABEL: load_v4f32:
252 ; NONEON-NOSVE: // %bb.0:
253 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
254 ; NONEON-NOSVE-NEXT: ret
255 %load = load <4 x float>, ptr %a
256 ret <4 x float> %load
259 define <8 x i32> @load_v8i32(ptr %a) {
260 ; CHECK-LABEL: load_v8i32:
262 ; CHECK-NEXT: ldp q0, q1, [x0]
265 ; NONEON-NOSVE-LABEL: load_v8i32:
266 ; NONEON-NOSVE: // %bb.0:
267 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
268 ; NONEON-NOSVE-NEXT: ret
269 %load = load <8 x i32>, ptr %a
273 define <8 x float> @load_v8f32(ptr %a) {
274 ; CHECK-LABEL: load_v8f32:
276 ; CHECK-NEXT: ldp q0, q1, [x0]
279 ; NONEON-NOSVE-LABEL: load_v8f32:
280 ; NONEON-NOSVE: // %bb.0:
281 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
282 ; NONEON-NOSVE-NEXT: ret
283 %load = load <8 x float>, ptr %a
284 ret <8 x float> %load
287 define <1 x i64> @load_v1i64(ptr %a) {
288 ; CHECK-LABEL: load_v1i64:
290 ; CHECK-NEXT: ldr d0, [x0]
293 ; NONEON-NOSVE-LABEL: load_v1i64:
294 ; NONEON-NOSVE: // %bb.0:
295 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
296 ; NONEON-NOSVE-NEXT: ret
297 %load = load <1 x i64>, ptr %a
301 define <1 x double> @load_v1f64(ptr %a) {
302 ; CHECK-LABEL: load_v1f64:
304 ; CHECK-NEXT: ldr d0, [x0]
307 ; NONEON-NOSVE-LABEL: load_v1f64:
308 ; NONEON-NOSVE: // %bb.0:
309 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
310 ; NONEON-NOSVE-NEXT: ret
311 %load = load <1 x double>, ptr %a
312 ret <1 x double> %load
315 define <2 x i64> @load_v2i64(ptr %a) {
316 ; CHECK-LABEL: load_v2i64:
318 ; CHECK-NEXT: ldr q0, [x0]
321 ; NONEON-NOSVE-LABEL: load_v2i64:
322 ; NONEON-NOSVE: // %bb.0:
323 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
324 ; NONEON-NOSVE-NEXT: ret
325 %load = load <2 x i64>, ptr %a
329 define <2 x double> @load_v2f64(ptr %a) {
330 ; CHECK-LABEL: load_v2f64:
332 ; CHECK-NEXT: ldr q0, [x0]
335 ; NONEON-NOSVE-LABEL: load_v2f64:
336 ; NONEON-NOSVE: // %bb.0:
337 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
338 ; NONEON-NOSVE-NEXT: ret
339 %load = load <2 x double>, ptr %a
340 ret <2 x double> %load
343 define <4 x i64> @load_v4i64(ptr %a) {
344 ; CHECK-LABEL: load_v4i64:
346 ; CHECK-NEXT: ldp q0, q1, [x0]
349 ; NONEON-NOSVE-LABEL: load_v4i64:
350 ; NONEON-NOSVE: // %bb.0:
351 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
352 ; NONEON-NOSVE-NEXT: ret
353 %load = load <4 x i64>, ptr %a
357 define <4 x double> @load_v4f64(ptr %a) {
358 ; CHECK-LABEL: load_v4f64:
360 ; CHECK-NEXT: ldp q0, q1, [x0]
363 ; NONEON-NOSVE-LABEL: load_v4f64:
364 ; NONEON-NOSVE: // %bb.0:
365 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
366 ; NONEON-NOSVE-NEXT: ret
367 %load = load <4 x double>, ptr %a
368 ret <4 x double> %load