1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
3 ; RUN: -verify-machineinstrs < %s \
4 ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
6 ; RUN: -verify-machineinstrs < %s \
7 ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64
8 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
9 ; RUN: -verify-machineinstrs < %s \
10 ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32
11 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
12 ; RUN: -verify-machineinstrs < %s \
13 ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64
15 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, <vscale x 1 x i1>, i32)
17 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i8(ptr %ptr, i8 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18 ; CHECK-LABEL: strided_vpload_nxv1i8_i8:
20 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
21 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
23 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 %stride, <vscale x 1 x i1> %m, i32 %evl)
24 ret <vscale x 1 x i8> %load
27 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i16(ptr, i16, <vscale x 1 x i1>, i32)
29 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i16(ptr %ptr, i16 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
30 ; CHECK-LABEL: strided_vpload_nxv1i8_i16:
32 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
33 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
35 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i16(ptr %ptr, i16 %stride, <vscale x 1 x i1> %m, i32 %evl)
36 ret <vscale x 1 x i8> %load
39 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr, i64, <vscale x 1 x i1>, i32)
41 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i64(ptr %ptr, i64 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
42 ; CHECK-RV32-LABEL: strided_vpload_nxv1i8_i64:
43 ; CHECK-RV32: # %bb.0:
44 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
45 ; CHECK-RV32-NEXT: vlse8.v v8, (a0), a1, v0.t
46 ; CHECK-RV32-NEXT: ret
48 ; CHECK-RV64-LABEL: strided_vpload_nxv1i8_i64:
49 ; CHECK-RV64: # %bb.0:
50 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
51 ; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1, v0.t
52 ; CHECK-RV64-NEXT: ret
53 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, <vscale x 1 x i1> %m, i32 %evl)
54 ret <vscale x 1 x i8> %load
57 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i64_allones_mask(ptr %ptr, i64 signext %stride, i32 zeroext %evl) {
58 ; CHECK-RV32-LABEL: strided_vpload_nxv1i8_i64_allones_mask:
59 ; CHECK-RV32: # %bb.0:
60 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
61 ; CHECK-RV32-NEXT: vlse8.v v8, (a0), a1
62 ; CHECK-RV32-NEXT: ret
64 ; CHECK-RV64-LABEL: strided_vpload_nxv1i8_i64_allones_mask:
65 ; CHECK-RV64: # %bb.0:
66 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
67 ; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1
68 ; CHECK-RV64-NEXT: ret
69 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, <vscale x 1 x i1> splat (i1 true), i32 %evl)
70 ret <vscale x 1 x i8> %load
73 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
75 define <vscale x 1 x i8> @strided_vpload_nxv1i8(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
76 ; CHECK-LABEL: strided_vpload_nxv1i8:
78 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
79 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
81 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
82 ret <vscale x 1 x i8> %load
85 define <vscale x 1 x i8> @strided_vpload_nxv1i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
86 ; CHECK-LABEL: strided_vpload_nxv1i8_allones_mask:
88 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
89 ; CHECK-NEXT: vlse8.v v8, (a0), a1
91 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> splat (i1 true), i32 %evl)
92 ret <vscale x 1 x i8> %load
95 declare <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
97 define <vscale x 2 x i8> @strided_vpload_nxv2i8(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
98 ; CHECK-LABEL: strided_vpload_nxv2i8:
100 ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
101 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
103 %load = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
104 ret <vscale x 2 x i8> %load
107 declare <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
109 define <vscale x 4 x i8> @strided_vpload_nxv4i8(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
110 ; CHECK-LABEL: strided_vpload_nxv4i8:
112 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
113 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
115 %load = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
116 ret <vscale x 4 x i8> %load
119 declare <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
121 define <vscale x 8 x i8> @strided_vpload_nxv8i8(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
122 ; CHECK-LABEL: strided_vpload_nxv8i8:
124 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
125 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
127 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
128 ret <vscale x 8 x i8> %load
131 define <vscale x 8 x i8> @strided_vpload_nxv8i8_unit_stride(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
132 ; CHECK-LABEL: strided_vpload_nxv8i8_unit_stride:
134 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
135 ; CHECK-NEXT: vle8.v v8, (a0), v0.t
137 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 1, <vscale x 8 x i1> %m, i32 %evl)
138 ret <vscale x 8 x i8> %load
141 define <vscale x 8 x i8> @strided_vpload_nxv8i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
142 ; CHECK-LABEL: strided_vpload_nxv8i8_allones_mask:
144 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
145 ; CHECK-NEXT: vlse8.v v8, (a0), a1
147 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> splat (i1 true), i32 %evl)
148 ret <vscale x 8 x i8> %load
151 declare <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
153 define <vscale x 1 x i16> @strided_vpload_nxv1i16(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
154 ; CHECK-LABEL: strided_vpload_nxv1i16:
156 ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
157 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
159 %load = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
160 ret <vscale x 1 x i16> %load
163 declare <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
165 define <vscale x 2 x i16> @strided_vpload_nxv2i16(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
166 ; CHECK-LABEL: strided_vpload_nxv2i16:
168 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
169 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
171 %load = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
172 ret <vscale x 2 x i16> %load
175 define <vscale x 2 x i16> @strided_vpload_nxv2i16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
176 ; CHECK-LABEL: strided_vpload_nxv2i16_allones_mask:
178 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
179 ; CHECK-NEXT: vlse16.v v8, (a0), a1
181 %load = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> splat (i1 true), i32 %evl)
182 ret <vscale x 2 x i16> %load
185 declare <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
187 define <vscale x 4 x i16> @strided_vpload_nxv4i16(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
188 ; CHECK-LABEL: strided_vpload_nxv4i16:
190 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
191 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
193 %load = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
194 ret <vscale x 4 x i16> %load
197 define <vscale x 4 x i16> @strided_vpload_nxv4i16_unit_stride(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
198 ; CHECK-LABEL: strided_vpload_nxv4i16_unit_stride:
200 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
201 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
203 %load = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
204 ret <vscale x 4 x i16> %load
207 declare <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
209 define <vscale x 8 x i16> @strided_vpload_nxv8i16(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
210 ; CHECK-LABEL: strided_vpload_nxv8i16:
212 ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
213 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
215 %load = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
216 ret <vscale x 8 x i16> %load
219 declare <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
221 define <vscale x 1 x i32> @strided_vpload_nxv1i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
222 ; CHECK-LABEL: strided_vpload_nxv1i32:
224 ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
225 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
227 %load = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
228 ret <vscale x 1 x i32> %load
231 declare <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
233 define <vscale x 2 x i32> @strided_vpload_nxv2i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
234 ; CHECK-LABEL: strided_vpload_nxv2i32:
236 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
237 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
239 %load = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
240 ret <vscale x 2 x i32> %load
243 define <vscale x 2 x i32> @strided_vpload_nxv2i32_unit_stride(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
244 ; CHECK-LABEL: strided_vpload_nxv2i32_unit_stride:
246 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
247 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
249 %load = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr %ptr, i32 4, <vscale x 2 x i1> %m, i32 %evl)
250 ret <vscale x 2 x i32> %load
253 declare <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
255 define <vscale x 4 x i32> @strided_vpload_nxv4i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
256 ; CHECK-LABEL: strided_vpload_nxv4i32:
258 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
259 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
261 %load = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
262 ret <vscale x 4 x i32> %load
265 define <vscale x 4 x i32> @strided_vpload_nxv4i32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
266 ; CHECK-LABEL: strided_vpload_nxv4i32_allones_mask:
268 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
269 ; CHECK-NEXT: vlse32.v v8, (a0), a1
271 %load = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> splat (i1 true), i32 %evl)
272 ret <vscale x 4 x i32> %load
275 declare <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
277 define <vscale x 8 x i32> @strided_vpload_nxv8i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
278 ; CHECK-LABEL: strided_vpload_nxv8i32:
280 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
281 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
283 %load = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
284 ret <vscale x 8 x i32> %load
287 declare <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
289 define <vscale x 1 x i64> @strided_vpload_nxv1i64(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
290 ; CHECK-LABEL: strided_vpload_nxv1i64:
292 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
293 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
295 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
296 ret <vscale x 1 x i64> %load
299 define <vscale x 1 x i64> @strided_vpload_nxv1i64_unit_stride(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
300 ; CHECK-LABEL: strided_vpload_nxv1i64_unit_stride:
302 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
303 ; CHECK-NEXT: vle64.v v8, (a0), v0.t
305 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
306 ret <vscale x 1 x i64> %load
309 define <vscale x 1 x i64> @strided_vpload_nxv1i64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
310 ; CHECK-LABEL: strided_vpload_nxv1i64_allones_mask:
312 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
313 ; CHECK-NEXT: vlse64.v v8, (a0), a1
315 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> splat (i1 true), i32 %evl)
316 ret <vscale x 1 x i64> %load
319 declare <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
321 define <vscale x 2 x i64> @strided_vpload_nxv2i64(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
322 ; CHECK-LABEL: strided_vpload_nxv2i64:
324 ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
325 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
327 %load = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
328 ret <vscale x 2 x i64> %load
331 declare <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
333 define <vscale x 4 x i64> @strided_vpload_nxv4i64(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
334 ; CHECK-LABEL: strided_vpload_nxv4i64:
336 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
337 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
339 %load = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
340 ret <vscale x 4 x i64> %load
343 declare <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
345 define <vscale x 8 x i64> @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
346 ; CHECK-LABEL: strided_vpload_nxv8i64:
348 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
349 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
351 %load = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
352 ret <vscale x 8 x i64> %load
355 declare <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
357 define <vscale x 1 x half> @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
358 ; CHECK-LABEL: strided_vpload_nxv1f16:
360 ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
361 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
363 %load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
364 ret <vscale x 1 x half> %load
367 declare <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
369 define <vscale x 2 x half> @strided_vpload_nxv2f16(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
370 ; CHECK-LABEL: strided_vpload_nxv2f16:
372 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
373 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
375 %load = call <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
376 ret <vscale x 2 x half> %load
379 define <vscale x 2 x half> @strided_vpload_nxv2f16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
380 ; CHECK-LABEL: strided_vpload_nxv2f16_allones_mask:
382 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
383 ; CHECK-NEXT: vlse16.v v8, (a0), a1
385 %load = call <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> splat (i1 true), i32 %evl)
386 ret <vscale x 2 x half> %load
389 declare <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
391 define <vscale x 4 x half> @strided_vpload_nxv4f16(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
392 ; CHECK-LABEL: strided_vpload_nxv4f16:
394 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
395 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
397 %load = call <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
398 ret <vscale x 4 x half> %load
401 define <vscale x 4 x half> @strided_vpload_nxv4f16_unit_stride(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
402 ; CHECK-LABEL: strided_vpload_nxv4f16_unit_stride:
404 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
405 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
407 %load = call <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
408 ret <vscale x 4 x half> %load
411 declare <vscale x 8 x half> @llvm.experimental.vp.strided.load.nxv8f16.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
413 define <vscale x 8 x half> @strided_vpload_nxv8f16(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
414 ; CHECK-LABEL: strided_vpload_nxv8f16:
416 ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
417 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
419 %load = call <vscale x 8 x half> @llvm.experimental.vp.strided.load.nxv8f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
420 ret <vscale x 8 x half> %load
423 declare <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
425 define <vscale x 1 x float> @strided_vpload_nxv1f32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
426 ; CHECK-LABEL: strided_vpload_nxv1f32:
428 ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
429 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
431 %load = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
432 ret <vscale x 1 x float> %load
435 declare <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
437 define <vscale x 2 x float> @strided_vpload_nxv2f32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
438 ; CHECK-LABEL: strided_vpload_nxv2f32:
440 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
441 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
443 %load = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
444 ret <vscale x 2 x float> %load
447 define <vscale x 2 x float> @strided_vpload_nxv2f32_unit_stride(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
448 ; CHECK-LABEL: strided_vpload_nxv2f32_unit_stride:
450 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
451 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
453 %load = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr %ptr, i32 4, <vscale x 2 x i1> %m, i32 %evl)
454 ret <vscale x 2 x float> %load
457 declare <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
459 define <vscale x 4 x float> @strided_vpload_nxv4f32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
460 ; CHECK-LABEL: strided_vpload_nxv4f32:
462 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
463 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
465 %load = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
466 ret <vscale x 4 x float> %load
469 declare <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
471 define <vscale x 8 x float> @strided_vpload_nxv8f32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
472 ; CHECK-LABEL: strided_vpload_nxv8f32:
474 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
475 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
477 %load = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
478 ret <vscale x 8 x float> %load
481 define <vscale x 8 x float> @strided_vpload_nxv8f32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
482 ; CHECK-LABEL: strided_vpload_nxv8f32_allones_mask:
484 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
485 ; CHECK-NEXT: vlse32.v v8, (a0), a1
487 %load = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> splat (i1 true), i32 %evl)
488 ret <vscale x 8 x float> %load
491 declare <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
493 define <vscale x 1 x double> @strided_vpload_nxv1f64(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
494 ; CHECK-LABEL: strided_vpload_nxv1f64:
496 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
497 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
499 %load = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
500 ret <vscale x 1 x double> %load
503 define <vscale x 1 x double> @strided_vpload_nxv1f64_unit_stride(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
504 ; CHECK-LABEL: strided_vpload_nxv1f64_unit_stride:
506 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
507 ; CHECK-NEXT: vle64.v v8, (a0), v0.t
509 %load = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
510 ret <vscale x 1 x double> %load
513 declare <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
515 define <vscale x 2 x double> @strided_vpload_nxv2f64(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
516 ; CHECK-LABEL: strided_vpload_nxv2f64:
518 ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
519 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
521 %load = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
522 ret <vscale x 2 x double> %load
525 declare <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
527 define <vscale x 4 x double> @strided_vpload_nxv4f64(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
528 ; CHECK-LABEL: strided_vpload_nxv4f64:
530 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
531 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
533 %load = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
534 ret <vscale x 4 x double> %load
537 define <vscale x 4 x double> @strided_vpload_nxv4f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
538 ; CHECK-LABEL: strided_vpload_nxv4f64_allones_mask:
540 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
541 ; CHECK-NEXT: vlse64.v v8, (a0), a1
543 %load = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> splat (i1 true), i32 %evl)
544 ret <vscale x 4 x double> %load
547 declare <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
549 define <vscale x 8 x double> @strided_vpload_nxv8f64(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
550 ; CHECK-LABEL: strided_vpload_nxv8f64:
552 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
553 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
555 %load = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
556 ret <vscale x 8 x double> %load
560 define <vscale x 3 x double> @strided_vpload_nxv3f64(ptr %ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
561 ; CHECK-LABEL: strided_vpload_nxv3f64:
563 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
564 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
566 %v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
567 ret <vscale x 3 x double> %v
570 define <vscale x 3 x double> @strided_vpload_nxv3f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
571 ; CHECK-LABEL: strided_vpload_nxv3f64_allones_mask:
573 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
574 ; CHECK-NEXT: vlse64.v v8, (a0), a1
576 %v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, <vscale x 3 x i1> splat (i1 true), i32 %evl)
577 ret <vscale x 3 x double> %v
580 declare <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr, i32, <vscale x 3 x i1>, i32)
583 define <vscale x 16 x double> @strided_load_nxv16f64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
584 ; CHECK-RV32-LABEL: strided_load_nxv16f64:
585 ; CHECK-RV32: # %bb.0:
586 ; CHECK-RV32-NEXT: vmv1r.v v9, v0
587 ; CHECK-RV32-NEXT: csrr a4, vlenb
588 ; CHECK-RV32-NEXT: sub a2, a3, a4
589 ; CHECK-RV32-NEXT: sltu a5, a3, a2
590 ; CHECK-RV32-NEXT: addi a5, a5, -1
591 ; CHECK-RV32-NEXT: and a2, a5, a2
592 ; CHECK-RV32-NEXT: bltu a3, a4, .LBB49_2
593 ; CHECK-RV32-NEXT: # %bb.1:
594 ; CHECK-RV32-NEXT: mv a3, a4
595 ; CHECK-RV32-NEXT: .LBB49_2:
596 ; CHECK-RV32-NEXT: mul a5, a3, a1
597 ; CHECK-RV32-NEXT: srli a4, a4, 3
598 ; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
599 ; CHECK-RV32-NEXT: vslidedown.vx v8, v9, a4
600 ; CHECK-RV32-NEXT: add a5, a0, a5
601 ; CHECK-RV32-NEXT: vmv1r.v v0, v8
602 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
603 ; CHECK-RV32-NEXT: vlse64.v v16, (a5), a1, v0.t
604 ; CHECK-RV32-NEXT: vmv1r.v v0, v9
605 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
606 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
607 ; CHECK-RV32-NEXT: ret
609 ; CHECK-RV64-LABEL: strided_load_nxv16f64:
610 ; CHECK-RV64: # %bb.0:
611 ; CHECK-RV64-NEXT: vmv1r.v v9, v0
612 ; CHECK-RV64-NEXT: csrr a4, vlenb
613 ; CHECK-RV64-NEXT: sub a3, a2, a4
614 ; CHECK-RV64-NEXT: sltu a5, a2, a3
615 ; CHECK-RV64-NEXT: addi a5, a5, -1
616 ; CHECK-RV64-NEXT: and a3, a5, a3
617 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB49_2
618 ; CHECK-RV64-NEXT: # %bb.1:
619 ; CHECK-RV64-NEXT: mv a2, a4
620 ; CHECK-RV64-NEXT: .LBB49_2:
621 ; CHECK-RV64-NEXT: mul a5, a2, a1
622 ; CHECK-RV64-NEXT: srli a4, a4, 3
623 ; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
624 ; CHECK-RV64-NEXT: vslidedown.vx v8, v9, a4
625 ; CHECK-RV64-NEXT: add a5, a0, a5
626 ; CHECK-RV64-NEXT: vmv1r.v v0, v8
627 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
628 ; CHECK-RV64-NEXT: vlse64.v v16, (a5), a1, v0.t
629 ; CHECK-RV64-NEXT: vmv1r.v v0, v9
630 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
631 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
632 ; CHECK-RV64-NEXT: ret
633 %v = call <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 %evl)
634 ret <vscale x 16 x double> %v
637 define <vscale x 16 x double> @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 %stride, i32 zeroext %evl) {
638 ; CHECK-RV32-LABEL: strided_load_nxv16f64_allones_mask:
639 ; CHECK-RV32: # %bb.0:
640 ; CHECK-RV32-NEXT: csrr a4, vlenb
641 ; CHECK-RV32-NEXT: sub a2, a3, a4
642 ; CHECK-RV32-NEXT: sltu a5, a3, a2
643 ; CHECK-RV32-NEXT: addi a5, a5, -1
644 ; CHECK-RV32-NEXT: and a2, a5, a2
645 ; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2
646 ; CHECK-RV32-NEXT: # %bb.1:
647 ; CHECK-RV32-NEXT: mv a3, a4
648 ; CHECK-RV32-NEXT: .LBB50_2:
649 ; CHECK-RV32-NEXT: mul a4, a3, a1
650 ; CHECK-RV32-NEXT: add a4, a0, a4
651 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
652 ; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
653 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
654 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
655 ; CHECK-RV32-NEXT: ret
657 ; CHECK-RV64-LABEL: strided_load_nxv16f64_allones_mask:
658 ; CHECK-RV64: # %bb.0:
659 ; CHECK-RV64-NEXT: csrr a4, vlenb
660 ; CHECK-RV64-NEXT: sub a3, a2, a4
661 ; CHECK-RV64-NEXT: sltu a5, a2, a3
662 ; CHECK-RV64-NEXT: addi a5, a5, -1
663 ; CHECK-RV64-NEXT: and a3, a5, a3
664 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2
665 ; CHECK-RV64-NEXT: # %bb.1:
666 ; CHECK-RV64-NEXT: mv a2, a4
667 ; CHECK-RV64-NEXT: .LBB50_2:
668 ; CHECK-RV64-NEXT: mul a4, a2, a1
669 ; CHECK-RV64-NEXT: add a4, a0, a4
670 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
671 ; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
672 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
673 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
674 ; CHECK-RV64-NEXT: ret
675 %v = call <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 16 x i1> splat (i1 true), i32 %evl)
676 ret <vscale x 16 x double> %v
679 declare <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr, i64, <vscale x 16 x i1>, i32)
681 ; Widening + splitting (with HiIsEmpty == true)
682 ; NOTE: We can't return <vscale x 17 x double> as that introduces a vector
683 ; store that can't yet be legalized through widening. In order to test purely
684 ; the vp.strided.load legalization, we manually split it.
685 define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl, ptr %hi_ptr) {
686 ; CHECK-RV32-LABEL: strided_load_nxv17f64:
687 ; CHECK-RV32: # %bb.0:
688 ; CHECK-RV32-NEXT: csrr a2, vlenb
689 ; CHECK-RV32-NEXT: slli a7, a2, 1
690 ; CHECK-RV32-NEXT: vmv1r.v v8, v0
691 ; CHECK-RV32-NEXT: mv a6, a3
692 ; CHECK-RV32-NEXT: bltu a3, a7, .LBB51_2
693 ; CHECK-RV32-NEXT: # %bb.1:
694 ; CHECK-RV32-NEXT: mv a6, a7
695 ; CHECK-RV32-NEXT: .LBB51_2:
696 ; CHECK-RV32-NEXT: sub a5, a6, a2
697 ; CHECK-RV32-NEXT: sltu t0, a6, a5
698 ; CHECK-RV32-NEXT: addi t0, t0, -1
699 ; CHECK-RV32-NEXT: and t0, t0, a5
700 ; CHECK-RV32-NEXT: mv a5, a6
701 ; CHECK-RV32-NEXT: bltu a6, a2, .LBB51_4
702 ; CHECK-RV32-NEXT: # %bb.3:
703 ; CHECK-RV32-NEXT: mv a5, a2
704 ; CHECK-RV32-NEXT: .LBB51_4:
705 ; CHECK-RV32-NEXT: mul t1, a5, a1
706 ; CHECK-RV32-NEXT: srli t2, a2, 3
707 ; CHECK-RV32-NEXT: vsetvli t3, zero, e8, mf4, ta, ma
708 ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, t2
709 ; CHECK-RV32-NEXT: add t1, a0, t1
710 ; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma
711 ; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t
712 ; CHECK-RV32-NEXT: sub a7, a3, a7
713 ; CHECK-RV32-NEXT: sltu a3, a3, a7
714 ; CHECK-RV32-NEXT: addi a3, a3, -1
715 ; CHECK-RV32-NEXT: and a3, a3, a7
716 ; CHECK-RV32-NEXT: bltu a3, a2, .LBB51_6
717 ; CHECK-RV32-NEXT: # %bb.5:
718 ; CHECK-RV32-NEXT: mv a3, a2
719 ; CHECK-RV32-NEXT: .LBB51_6:
720 ; CHECK-RV32-NEXT: mul a6, a6, a1
721 ; CHECK-RV32-NEXT: srli a2, a2, 2
722 ; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
723 ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a2
724 ; CHECK-RV32-NEXT: add a6, a0, a6
725 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
726 ; CHECK-RV32-NEXT: vlse64.v v24, (a6), a1, v0.t
727 ; CHECK-RV32-NEXT: vmv1r.v v0, v8
728 ; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
729 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
730 ; CHECK-RV32-NEXT: vs1r.v v24, (a4)
731 ; CHECK-RV32-NEXT: ret
733 ; CHECK-RV64-LABEL: strided_load_nxv17f64:
734 ; CHECK-RV64: # %bb.0:
735 ; CHECK-RV64-NEXT: csrr a4, vlenb
736 ; CHECK-RV64-NEXT: slli a7, a4, 1
737 ; CHECK-RV64-NEXT: vmv1r.v v8, v0
738 ; CHECK-RV64-NEXT: mv a6, a2
739 ; CHECK-RV64-NEXT: bltu a2, a7, .LBB51_2
740 ; CHECK-RV64-NEXT: # %bb.1:
741 ; CHECK-RV64-NEXT: mv a6, a7
742 ; CHECK-RV64-NEXT: .LBB51_2:
743 ; CHECK-RV64-NEXT: sub a5, a6, a4
744 ; CHECK-RV64-NEXT: sltu t0, a6, a5
745 ; CHECK-RV64-NEXT: addi t0, t0, -1
746 ; CHECK-RV64-NEXT: and t0, t0, a5
747 ; CHECK-RV64-NEXT: mv a5, a6
748 ; CHECK-RV64-NEXT: bltu a6, a4, .LBB51_4
749 ; CHECK-RV64-NEXT: # %bb.3:
750 ; CHECK-RV64-NEXT: mv a5, a4
751 ; CHECK-RV64-NEXT: .LBB51_4:
752 ; CHECK-RV64-NEXT: mul t1, a5, a1
753 ; CHECK-RV64-NEXT: srli t2, a4, 3
754 ; CHECK-RV64-NEXT: vsetvli t3, zero, e8, mf4, ta, ma
755 ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, t2
756 ; CHECK-RV64-NEXT: add t1, a0, t1
757 ; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma
758 ; CHECK-RV64-NEXT: vlse64.v v16, (t1), a1, v0.t
759 ; CHECK-RV64-NEXT: sub a7, a2, a7
760 ; CHECK-RV64-NEXT: sltu a2, a2, a7
761 ; CHECK-RV64-NEXT: addi a2, a2, -1
762 ; CHECK-RV64-NEXT: and a2, a2, a7
763 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB51_6
764 ; CHECK-RV64-NEXT: # %bb.5:
765 ; CHECK-RV64-NEXT: mv a2, a4
766 ; CHECK-RV64-NEXT: .LBB51_6:
767 ; CHECK-RV64-NEXT: mul a6, a6, a1
768 ; CHECK-RV64-NEXT: srli a4, a4, 2
769 ; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
770 ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a4
771 ; CHECK-RV64-NEXT: add a6, a0, a6
772 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
773 ; CHECK-RV64-NEXT: vlse64.v v24, (a6), a1, v0.t
774 ; CHECK-RV64-NEXT: vmv1r.v v0, v8
775 ; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
776 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
777 ; CHECK-RV64-NEXT: vs1r.v v24, (a3)
778 ; CHECK-RV64-NEXT: ret
779 %v = call <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 %evl)
780 %lo = call <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %v, i64 0)
781 %hi = call <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %v, i64 16)
782 store <vscale x 1 x double> %hi, ptr %hi_ptr
783 ret <vscale x 16 x double> %lo
786 declare <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, <vscale x 17 x i1>, i32)
787 declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
788 declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)
790 ; Test unmasked integer zero strided
791 define <vscale x 1 x i8> @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr) {
792 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
793 ; CHECK-OPT: # %bb.0:
794 ; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
795 ; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
796 ; CHECK-OPT-NEXT: ret
798 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
799 ; CHECK-NO-OPT: # %bb.0:
800 ; CHECK-NO-OPT-NEXT: lbu a0, 0(a0)
801 ; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
802 ; CHECK-NO-OPT-NEXT: vmv.v.x v8, a0
803 ; CHECK-NO-OPT-NEXT: ret
804 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 0, <vscale x 1 x i1> splat (i1 true), i32 4)
805 ret <vscale x 1 x i8> %load
808 ; Test unmasked float zero strided
809 define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
810 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
811 ; CHECK-OPT: # %bb.0:
812 ; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
813 ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
814 ; CHECK-OPT-NEXT: ret
816 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
817 ; CHECK-NO-OPT: # %bb.0:
818 ; CHECK-NO-OPT-NEXT: flh fa5, 0(a0)
819 ; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
820 ; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5
821 ; CHECK-NO-OPT-NEXT: ret
822 %load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 4)
823 ret <vscale x 1 x half> %load
826 define <vscale x 1 x i64> @zero_strided_vadd_nxv1i64(<vscale x 1 x i64> %v, ptr %ptr) {
827 ; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64:
828 ; CHECK-RV32: # %bb.0:
829 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
830 ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
831 ; CHECK-RV32-NEXT: vadd.vv v8, v8, v9
832 ; CHECK-RV32-NEXT: ret
834 ; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64:
835 ; CHECK-RV64: # %bb.0:
836 ; CHECK-RV64-NEXT: ld a0, 0(a0)
837 ; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
838 ; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
839 ; CHECK-RV64-NEXT: ret
840 %vscale = call i32 @llvm.vscale()
841 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 %vscale)
842 %w = add <vscale x 1 x i64> %v, %load
843 ret <vscale x 1 x i64> %w
846 define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, ptr %ptr) {
847 ; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64:
848 ; CHECK-RV32: # %bb.0:
849 ; CHECK-RV32-NEXT: csrr a1, vlenb
850 ; CHECK-RV32-NEXT: srli a2, a1, 3
851 ; CHECK-RV32-NEXT: sub a3, a2, a1
852 ; CHECK-RV32-NEXT: sltu a4, a2, a3
853 ; CHECK-RV32-NEXT: addi a4, a4, -1
854 ; CHECK-RV32-NEXT: and a3, a4, a3
855 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
856 ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
857 ; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2
858 ; CHECK-RV32-NEXT: # %bb.1:
859 ; CHECK-RV32-NEXT: mv a2, a1
860 ; CHECK-RV32-NEXT: .LBB55_2:
861 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
862 ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
863 ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
864 ; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
865 ; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
866 ; CHECK-RV32-NEXT: ret
868 ; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
869 ; CHECK-RV64: # %bb.0:
870 ; CHECK-RV64-NEXT: ld a0, 0(a0)
871 ; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
872 ; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
873 ; CHECK-RV64-NEXT: vadd.vx v16, v16, a0
874 ; CHECK-RV64-NEXT: ret
875 %vscale = call i32 @llvm.vscale()
876 %load = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, <vscale x 16 x i1> splat (i1 true), i32 %vscale)
877 %w = add <vscale x 16 x i64> %v, %load
878 ret <vscale x 16 x i64> %w
881 define <vscale x 1 x ptr> @zero_strided_vadd_nxv1p0(<vscale x 1 x ptr> %v, ptr %ptr) {
882 ; CHECK-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
883 ; CHECK-OPT-RV32: # %bb.0:
884 ; CHECK-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
885 ; CHECK-OPT-RV32-NEXT: vlse32.v v8, (a0), zero
886 ; CHECK-OPT-RV32-NEXT: ret
888 ; CHECK-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
889 ; CHECK-OPT-RV64: # %bb.0:
890 ; CHECK-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
891 ; CHECK-OPT-RV64-NEXT: vlse64.v v8, (a0), zero
892 ; CHECK-OPT-RV64-NEXT: ret
894 ; CHECK-NO-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
895 ; CHECK-NO-OPT-RV32: # %bb.0:
896 ; CHECK-NO-OPT-RV32-NEXT: lw a0, 0(a0)
897 ; CHECK-NO-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
898 ; CHECK-NO-OPT-RV32-NEXT: vmv.v.x v8, a0
899 ; CHECK-NO-OPT-RV32-NEXT: ret
901 ; CHECK-NO-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
902 ; CHECK-NO-OPT-RV64: # %bb.0:
903 ; CHECK-NO-OPT-RV64-NEXT: ld a0, 0(a0)
904 ; CHECK-NO-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
905 ; CHECK-NO-OPT-RV64-NEXT: vmv.v.x v8, a0
906 ; CHECK-NO-OPT-RV64-NEXT: ret
907 %vscale = call i32 @llvm.vscale()
908 %load = call <vscale x 1 x ptr> @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 %vscale)
909 ret <vscale x 1 x ptr> %load