1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s | FileCheck %s
7 define <vscale x 16 x i8> @ld1rqb_i8(<vscale x 16 x i1> %pred, i8* %addr) {
8 ; CHECK-LABEL: ld1rqb_i8:
9 ; CHECK: ld1rqb { z0.b }, p0/z, [x0]
11 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %addr)
12 ret <vscale x 16 x i8> %res
15 define <vscale x 16 x i8> @ld1rqb_i8_imm(<vscale x 16 x i1> %pred, i8* %addr) {
16 ; CHECK-LABEL: ld1rqb_i8_imm:
17 ; CHECK: ld1rqb { z0.b }, p0/z, [x0, #16]
19 %ptr = getelementptr inbounds i8, i8* %addr, i8 16
20 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
21 ret <vscale x 16 x i8> %res
24 define <vscale x 16 x i8> @ld1rqb_i8_imm_lower_bound(<vscale x 16 x i1> %pred, i8* %addr) {
25 ; CHECK-LABEL: ld1rqb_i8_imm_lower_bound:
26 ; CHECK: ld1rqb { z0.b }, p0/z, [x0, #-128]
28 %ptr = getelementptr inbounds i8, i8* %addr, i8 -128
29 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
30 ret <vscale x 16 x i8> %res
33 define <vscale x 16 x i8> @ld1rqb_i8_imm_upper_bound(<vscale x 16 x i1> %pred, i8* %addr) {
34 ; CHECK-LABEL: ld1rqb_i8_imm_upper_bound:
35 ; CHECK: ld1rqb { z0.b }, p0/z, [x0, #112]
37 %ptr = getelementptr inbounds i8, i8* %addr, i8 112
38 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
39 ret <vscale x 16 x i8> %res
42 define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_lower_bound(<vscale x 16 x i1> %pred, i8* %addr) {
43 ; CHECK-LABEL: ld1rqb_i8_imm_out_of_lower_bound:
44 ; CHECK: sub x8, x0, #129
45 ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x8]
47 %ptr = getelementptr inbounds i8, i8* %addr, i64 -129
48 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
49 ret <vscale x 16 x i8> %res
52 define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_upper_bound(<vscale x 16 x i1> %pred, i8* %addr) {
53 ; CHECK-LABEL: ld1rqb_i8_imm_out_of_upper_bound:
54 ; CHECK: add x8, x0, #113
55 ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x8]
57 %ptr = getelementptr inbounds i8, i8* %addr, i64 113
58 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
59 ret <vscale x 16 x i8> %res
66 define <vscale x 8 x i16> @ld1rqh_i16(<vscale x 8 x i1> %pred, i16* %addr) {
67 ; CHECK-LABEL: ld1rqh_i16:
68 ; CHECK: ld1rqh { z0.h }, p0/z, [x0]
70 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> %pred, i16* %addr)
71 ret <vscale x 8 x i16> %res
74 define <vscale x 8 x half> @ld1rqh_f16(<vscale x 8 x i1> %pred, half* %addr) {
75 ; CHECK-LABEL: ld1rqh_f16:
76 ; CHECK: ld1rqh { z0.h }, p0/z, [x0]
78 %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1> %pred, half* %addr)
79 ret <vscale x 8 x half> %res
82 define <vscale x 8 x i16> @ld1rqh_i16_imm(<vscale x 8 x i1> %pred, i16* %addr) {
83 ; CHECK-LABEL: ld1rqh_i16_imm:
84 ; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-64]
86 %ptr = getelementptr inbounds i16, i16* %addr, i16 -32
87 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> %pred, i16* %ptr)
88 ret <vscale x 8 x i16> %res
91 define <vscale x 8 x half> @ld1rqh_f16_imm(<vscale x 8 x i1> %pred, half* %addr) {
92 ; CHECK-LABEL: ld1rqh_f16_imm:
93 ; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
95 %ptr = getelementptr inbounds half, half* %addr, i16 -8
96 %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1> %pred, half* %ptr)
97 ret <vscale x 8 x half> %res
100 define <vscale x 8 x bfloat> @ld1rqh_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
101 ; CHECK-LABEL: ld1rqh_bf16:
102 ; CHECK: ld1rqh { z0.h }, p0/z, [x0]
104 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
105 ret <vscale x 8 x bfloat> %res
108 define <vscale x 8 x bfloat> @ld1rqh_bf16_imm(<vscale x 8 x i1> %pred, bfloat* %addr) {
109 ; CHECK-LABEL: ld1rqh_bf16_imm:
110 ; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
112 %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8
113 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat* %ptr)
114 ret <vscale x 8 x bfloat> %res
121 define <vscale x 4 x i32> @ld1rqw_i32(<vscale x 4 x i1> %pred, i32* %addr) {
122 ; CHECK-LABEL: ld1rqw_i32:
123 ; CHECK: ld1rqw { z0.s }, p0/z, [x0]
125 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> %pred, i32* %addr)
126 ret <vscale x 4 x i32> %res
129 define <vscale x 4 x float> @ld1rqw_f32(<vscale x 4 x i1> %pred, float* %addr) {
130 ; CHECK-LABEL: ld1rqw_f32:
131 ; CHECK: ld1rqw { z0.s }, p0/z, [x0]
133 %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1> %pred, float* %addr)
134 ret <vscale x 4 x float> %res
137 define <vscale x 4 x i32> @ld1rqw_i32_imm(<vscale x 4 x i1> %pred, i32* %addr) {
138 ; CHECK-LABEL: ld1rqw_i32_imm:
139 ; CHECK: ld1rqw { z0.s }, p0/z, [x0, #112]
141 %ptr = getelementptr inbounds i32, i32* %addr, i32 28
142 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> %pred, i32* %ptr)
143 ret <vscale x 4 x i32> %res
146 define <vscale x 4 x float> @ld1rqw_f32_imm(<vscale x 4 x i1> %pred, float* %addr) {
147 ; CHECK-LABEL: ld1rqw_f32_imm:
148 ; CHECK: ld1rqw { z0.s }, p0/z, [x0, #32]
150 %ptr = getelementptr inbounds float, float* %addr, i32 8
151 %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1> %pred, float* %ptr)
152 ret <vscale x 4 x float> %res
159 define <vscale x 2 x i64> @ld1rqd_i64(<vscale x 2 x i1> %pred, i64* %addr) {
160 ; CHECK-LABEL: ld1rqd_i64:
161 ; CHECK: ld1rqd { z0.d }, p0/z, [x0]
163 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> %pred, i64* %addr)
164 ret <vscale x 2 x i64> %res
167 define <vscale x 2 x double> @ld1rqd_f64(<vscale x 2 x i1> %pred, double* %addr) {
168 ; CHECK-LABEL: ld1rqd_f64:
169 ; CHECK: ld1rqd { z0.d }, p0/z, [x0]
171 %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1> %pred, double* %addr)
172 ret <vscale x 2 x double> %res
175 define <vscale x 2 x i64> @ld1rqd_i64_imm(<vscale x 2 x i1> %pred, i64* %addr) {
176 ; CHECK-LABEL: ld1rqd_i64_imm:
177 ; CHECK: ld1rqd { z0.d }, p0/z, [x0, #64]
179 %ptr = getelementptr inbounds i64, i64* %addr, i64 8
180 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> %pred, i64* %ptr)
181 ret <vscale x 2 x i64> %res
184 define <vscale x 2 x double> @ld1rqd_f64_imm(<vscale x 2 x i1> %pred, double* %addr) {
185 ; CHECK-LABEL: ld1rqd_f64_imm:
186 ; CHECK: ld1rqd { z0.d }, p0/z, [x0, #-128]
188 %ptr = getelementptr inbounds double, double* %addr, i64 -16
189 %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1> %pred, double* %ptr)
190 ret <vscale x 2 x double> %res
197 define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
198 ; CHECK-LABEL: ldnt1b_i8:
199 ; CHECK: ldnt1b { z0.b }, p0/z, [x0]
201 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pred,
203 ret <vscale x 16 x i8> %res
210 define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
211 ; CHECK-LABEL: ldnt1h_i16:
212 ; CHECK: ldnt1h { z0.h }, p0/z, [x0]
214 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %pred,
216 ret <vscale x 8 x i16> %res
219 define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, half* %addr) {
220 ; CHECK-LABEL: ldnt1h_f16:
221 ; CHECK: ldnt1h { z0.h }, p0/z, [x0]
223 %res = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %pred,
225 ret <vscale x 8 x half> %res
228 define <vscale x 8 x bfloat> @ldnt1h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
229 ; CHECK-LABEL: ldnt1h_bf16:
230 ; CHECK: ldnt1h { z0.h }, p0/z, [x0]
232 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> %pred,
234 ret <vscale x 8 x bfloat> %res
241 define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
242 ; CHECK-LABEL: ldnt1w_i32:
243 ; CHECK: ldnt1w { z0.s }, p0/z, [x0]
245 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %pred,
247 ret <vscale x 4 x i32> %res
250 define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, float* %addr) {
251 ; CHECK-LABEL: ldnt1w_f32:
252 ; CHECK: ldnt1w { z0.s }, p0/z, [x0]
254 %res = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %pred,
256 ret <vscale x 4 x float> %res
263 define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
264 ; CHECK-LABEL: ldnt1d_i64:
265 ; CHECK: ldnt1d { z0.d }, p0/z, [x0]
267 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %pred,
269 ret <vscale x 2 x i64> %res
272 define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, double* %addr) {
273 ; CHECK-LABEL: ldnt1d_f64:
274 ; CHECK: ldnt1d { z0.d }, p0/z, [x0]
276 %res = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %pred,
278 ret <vscale x 2 x double> %res
285 define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
286 ; CHECK-LABEL: ld2b_i8:
287 ; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0]
289 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
290 ret <vscale x 32 x i8> %res
297 define <vscale x 16 x i16> @ld2h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
298 ; CHECK-LABEL: ld2h_i16:
299 ; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
301 %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
302 ret <vscale x 16 x i16> %res
305 define <vscale x 16 x half> @ld2h_f16(<vscale x 8 x i1> %pred, half* %addr) {
306 ; CHECK-LABEL: ld2h_f16:
307 ; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
309 %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
310 ret <vscale x 16 x half> %res
313 define <vscale x 16 x bfloat> @ld2h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
314 ; CHECK-LABEL: ld2h_bf16:
315 ; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
317 %res = call <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
318 ret <vscale x 16 x bfloat> %res
325 define <vscale x 8 x i32> @ld2w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
326 ; CHECK-LABEL: ld2w_i32:
327 ; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
329 %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
330 ret <vscale x 8 x i32> %res
333 define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, float* %addr) {
334 ; CHECK-LABEL: ld2w_f32:
335 ; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
337 %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
338 ret <vscale x 8 x float> %res
345 define <vscale x 4 x i64> @ld2d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
346 ; CHECK-LABEL: ld2d_i64:
347 ; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
349 %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
350 ret <vscale x 4 x i64> %res
353 define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, double* %addr) {
354 ; CHECK-LABEL: ld2d_f64:
355 ; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
357 %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
358 ret <vscale x 4 x double> %res
365 define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
366 ; CHECK-LABEL: ld3b_i8:
367 ; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
369 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
370 ret <vscale x 48 x i8> %res
377 define <vscale x 24 x i16> @ld3h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
378 ; CHECK-LABEL: ld3h_i16:
379 ; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
381 %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
382 ret <vscale x 24 x i16> %res
385 define <vscale x 24 x half> @ld3h_f16(<vscale x 8 x i1> %pred, half* %addr) {
386 ; CHECK-LABEL: ld3h_f16:
387 ; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
389 %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
390 ret <vscale x 24 x half> %res
393 define <vscale x 24 x bfloat> @ld3h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
394 ; CHECK-LABEL: ld3h_bf16:
395 ; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
397 %res = call <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
398 ret <vscale x 24 x bfloat> %res
405 define <vscale x 12 x i32> @ld3w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
406 ; CHECK-LABEL: ld3w_i32:
407 ; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
409 %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
410 ret <vscale x 12 x i32> %res
413 define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, float* %addr) {
414 ; CHECK-LABEL: ld3w_f32:
415 ; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
417 %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
418 ret <vscale x 12 x float> %res
425 define <vscale x 6 x i64> @ld3d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
426 ; CHECK-LABEL: ld3d_i64:
427 ; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
429 %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
430 ret <vscale x 6 x i64> %res
433 define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, double* %addr) {
434 ; CHECK-LABEL: ld3d_f64:
435 ; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
437 %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
438 ret <vscale x 6 x double> %res
445 define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
446 ; CHECK-LABEL: ld4b_i8:
447 ; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
449 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
450 ret <vscale x 64 x i8> %res
457 define <vscale x 32 x i16> @ld4h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
458 ; CHECK-LABEL: ld4h_i16:
459 ; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
461 %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
462 ret <vscale x 32 x i16> %res
465 define <vscale x 32 x half> @ld4h_f16(<vscale x 8 x i1> %pred, half* %addr) {
466 ; CHECK-LABEL: ld4h_f16:
467 ; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
469 %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
470 ret <vscale x 32 x half> %res
473 define <vscale x 32 x bfloat> @ld4h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
474 ; CHECK-LABEL: ld4h_bf16:
475 ; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
477 %res = call <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
478 ret <vscale x 32 x bfloat> %res
485 define <vscale x 16 x i32> @ld4w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
486 ; CHECK-LABEL: ld4w_i32:
487 ; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
489 %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
490 ret <vscale x 16 x i32> %res
493 define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, float* %addr) {
494 ; CHECK-LABEL: ld4w_f32:
495 ; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
497 %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
498 ret <vscale x 16 x float> %res
505 define <vscale x 8 x i64> @ld4d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
506 ; CHECK-LABEL: ld4d_i64:
507 ; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
509 %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
510 ret <vscale x 8 x i64> %res
513 define <vscale x 8 x double> @ld4d_f64(<vscale x 2 x i1> %pred, double* %addr) {
514 ; CHECK-LABEL: ld4d_f64:
515 ; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
517 %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
518 ret <vscale x 8 x double> %res
522 declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1>, i8*)
523 declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1>, i16*)
524 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1>, i32*)
525 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1>, i64*)
526 declare <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1>, half*)
527 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1>, bfloat*)
528 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1>, float*)
529 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1>, double*)
531 declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, i8*)
532 declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, i16*)
533 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, i32*)
534 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, i64*)
535 declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, half*)
536 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1>, bfloat*)
537 declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, float*)
538 declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, double*)
540 declare <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
541 declare <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
542 declare <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
543 declare <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
544 declare <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
545 declare <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1>, bfloat*)
546 declare <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
547 declare <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
549 declare <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
550 declare <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
551 declare <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
552 declare <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
553 declare <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
554 declare <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1>, bfloat*)
555 declare <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
556 declare <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
558 declare <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
559 declare <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
560 declare <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
561 declare <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
562 declare <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
563 declare <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1>, bfloat*)
564 declare <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
565 declare <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)