1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s \
4 ; RUN: -check-prefixes=CHECK,CHECK-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
6 ; RUN: -verify-machineinstrs < %s | FileCheck %s \
7 ; RUN: -check-prefixes=CHECK,CHECK-RV64
9 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, <vscale x 1 x i1>, i32)
11 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i8(ptr %ptr, i8 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
12 ; CHECK-LABEL: strided_vpload_nxv1i8_i8:
14 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
15 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
17 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 %stride, <vscale x 1 x i1> %m, i32 %evl)
18 ret <vscale x 1 x i8> %load
21 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i16(ptr, i16, <vscale x 1 x i1>, i32)
23 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i16(ptr %ptr, i16 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
24 ; CHECK-LABEL: strided_vpload_nxv1i8_i16:
26 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
27 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
29 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i16(ptr %ptr, i16 %stride, <vscale x 1 x i1> %m, i32 %evl)
30 ret <vscale x 1 x i8> %load
33 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr, i64, <vscale x 1 x i1>, i32)
35 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i64(ptr %ptr, i64 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
36 ; CHECK-RV32-LABEL: strided_vpload_nxv1i8_i64:
37 ; CHECK-RV32: # %bb.0:
38 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
39 ; CHECK-RV32-NEXT: vlse8.v v8, (a0), a1, v0.t
40 ; CHECK-RV32-NEXT: ret
42 ; CHECK-RV64-LABEL: strided_vpload_nxv1i8_i64:
43 ; CHECK-RV64: # %bb.0:
44 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
45 ; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1, v0.t
46 ; CHECK-RV64-NEXT: ret
47 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, <vscale x 1 x i1> %m, i32 %evl)
48 ret <vscale x 1 x i8> %load
51 define <vscale x 1 x i8> @strided_vpload_nxv1i8_i64_allones_mask(ptr %ptr, i64 signext %stride, i32 zeroext %evl) {
52 ; CHECK-RV32-LABEL: strided_vpload_nxv1i8_i64_allones_mask:
53 ; CHECK-RV32: # %bb.0:
54 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
55 ; CHECK-RV32-NEXT: vlse8.v v8, (a0), a1
56 ; CHECK-RV32-NEXT: ret
58 ; CHECK-RV64-LABEL: strided_vpload_nxv1i8_i64_allones_mask:
59 ; CHECK-RV64: # %bb.0:
60 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
61 ; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1
62 ; CHECK-RV64-NEXT: ret
63 %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
64 %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
65 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, <vscale x 1 x i1> %b, i32 %evl)
66 ret <vscale x 1 x i8> %load
69 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
71 define <vscale x 1 x i8> @strided_vpload_nxv1i8(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: strided_vpload_nxv1i8:
74 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
75 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
77 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
78 ret <vscale x 1 x i8> %load
81 define <vscale x 1 x i8> @strided_vpload_nxv1i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
82 ; CHECK-LABEL: strided_vpload_nxv1i8_allones_mask:
84 ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
85 ; CHECK-NEXT: vlse8.v v8, (a0), a1
87 %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
88 %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
89 %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %b, i32 %evl)
90 ret <vscale x 1 x i8> %load
93 declare <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
95 define <vscale x 2 x i8> @strided_vpload_nxv2i8(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
96 ; CHECK-LABEL: strided_vpload_nxv2i8:
98 ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
99 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
101 %load = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
102 ret <vscale x 2 x i8> %load
105 declare <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
107 define <vscale x 4 x i8> @strided_vpload_nxv4i8(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
108 ; CHECK-LABEL: strided_vpload_nxv4i8:
110 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
111 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
113 %load = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
114 ret <vscale x 4 x i8> %load
117 declare <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
119 define <vscale x 8 x i8> @strided_vpload_nxv8i8(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
120 ; CHECK-LABEL: strided_vpload_nxv8i8:
122 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
123 ; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
125 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
126 ret <vscale x 8 x i8> %load
129 define <vscale x 8 x i8> @strided_vpload_nxv8i8_unit_stride(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
130 ; CHECK-LABEL: strided_vpload_nxv8i8_unit_stride:
132 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
133 ; CHECK-NEXT: vle8.v v8, (a0), v0.t
135 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 1, <vscale x 8 x i1> %m, i32 %evl)
136 ret <vscale x 8 x i8> %load
139 define <vscale x 8 x i8> @strided_vpload_nxv8i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
140 ; CHECK-LABEL: strided_vpload_nxv8i8_allones_mask:
142 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
143 ; CHECK-NEXT: vlse8.v v8, (a0), a1
145 %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
146 %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
147 %load = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %b, i32 %evl)
148 ret <vscale x 8 x i8> %load
151 declare <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
153 define <vscale x 1 x i16> @strided_vpload_nxv1i16(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
154 ; CHECK-LABEL: strided_vpload_nxv1i16:
156 ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
157 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
159 %load = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
160 ret <vscale x 1 x i16> %load
163 declare <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
165 define <vscale x 2 x i16> @strided_vpload_nxv2i16(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
166 ; CHECK-LABEL: strided_vpload_nxv2i16:
168 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
169 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
171 %load = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
172 ret <vscale x 2 x i16> %load
175 define <vscale x 2 x i16> @strided_vpload_nxv2i16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
176 ; CHECK-LABEL: strided_vpload_nxv2i16_allones_mask:
178 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
179 ; CHECK-NEXT: vlse16.v v8, (a0), a1
181 %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
182 %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
183 %load = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %b, i32 %evl)
184 ret <vscale x 2 x i16> %load
187 declare <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
189 define <vscale x 4 x i16> @strided_vpload_nxv4i16(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
190 ; CHECK-LABEL: strided_vpload_nxv4i16:
192 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
193 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
195 %load = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
196 ret <vscale x 4 x i16> %load
199 define <vscale x 4 x i16> @strided_vpload_nxv4i16_unit_stride(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
200 ; CHECK-LABEL: strided_vpload_nxv4i16_unit_stride:
202 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
203 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
205 %load = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.p0.i32(ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
206 ret <vscale x 4 x i16> %load
209 declare <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
211 define <vscale x 8 x i16> @strided_vpload_nxv8i16(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
212 ; CHECK-LABEL: strided_vpload_nxv8i16:
214 ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
215 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
217 %load = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
218 ret <vscale x 8 x i16> %load
221 declare <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
223 define <vscale x 1 x i32> @strided_vpload_nxv1i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
224 ; CHECK-LABEL: strided_vpload_nxv1i32:
226 ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
227 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
229 %load = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
230 ret <vscale x 1 x i32> %load
233 declare <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
235 define <vscale x 2 x i32> @strided_vpload_nxv2i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
236 ; CHECK-LABEL: strided_vpload_nxv2i32:
238 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
239 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
241 %load = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
242 ret <vscale x 2 x i32> %load
245 define <vscale x 2 x i32> @strided_vpload_nxv2i32_unit_stride(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
246 ; CHECK-LABEL: strided_vpload_nxv2i32_unit_stride:
248 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
249 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
251 %load = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr %ptr, i32 4, <vscale x 2 x i1> %m, i32 %evl)
252 ret <vscale x 2 x i32> %load
255 declare <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
257 define <vscale x 4 x i32> @strided_vpload_nxv4i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
258 ; CHECK-LABEL: strided_vpload_nxv4i32:
260 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
261 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
263 %load = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
264 ret <vscale x 4 x i32> %load
267 define <vscale x 4 x i32> @strided_vpload_nxv4i32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
268 ; CHECK-LABEL: strided_vpload_nxv4i32_allones_mask:
270 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
271 ; CHECK-NEXT: vlse32.v v8, (a0), a1
273 %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
274 %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
275 %load = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %b, i32 %evl)
276 ret <vscale x 4 x i32> %load
279 declare <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
281 define <vscale x 8 x i32> @strided_vpload_nxv8i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
282 ; CHECK-LABEL: strided_vpload_nxv8i32:
284 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
285 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
287 %load = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
288 ret <vscale x 8 x i32> %load
291 declare <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
293 define <vscale x 1 x i64> @strided_vpload_nxv1i64(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
294 ; CHECK-LABEL: strided_vpload_nxv1i64:
296 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
297 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
299 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
300 ret <vscale x 1 x i64> %load
303 define <vscale x 1 x i64> @strided_vpload_nxv1i64_unit_stride(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
304 ; CHECK-LABEL: strided_vpload_nxv1i64_unit_stride:
306 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
307 ; CHECK-NEXT: vle64.v v8, (a0), v0.t
309 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
310 ret <vscale x 1 x i64> %load
313 define <vscale x 1 x i64> @strided_vpload_nxv1i64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
314 ; CHECK-LABEL: strided_vpload_nxv1i64_allones_mask:
316 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
317 ; CHECK-NEXT: vlse64.v v8, (a0), a1
319 %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
320 %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
321 %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %b, i32 %evl)
322 ret <vscale x 1 x i64> %load
325 declare <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
327 define <vscale x 2 x i64> @strided_vpload_nxv2i64(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
328 ; CHECK-LABEL: strided_vpload_nxv2i64:
330 ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
331 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
333 %load = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
334 ret <vscale x 2 x i64> %load
337 declare <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
339 define <vscale x 4 x i64> @strided_vpload_nxv4i64(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
340 ; CHECK-LABEL: strided_vpload_nxv4i64:
342 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
343 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
345 %load = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
346 ret <vscale x 4 x i64> %load
349 declare <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
351 define <vscale x 8 x i64> @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
352 ; CHECK-LABEL: strided_vpload_nxv8i64:
354 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
355 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
357 %load = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
358 ret <vscale x 8 x i64> %load
361 declare <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
363 define <vscale x 1 x half> @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
364 ; CHECK-LABEL: strided_vpload_nxv1f16:
366 ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
367 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
369 %load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
370 ret <vscale x 1 x half> %load
373 declare <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
375 define <vscale x 2 x half> @strided_vpload_nxv2f16(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
376 ; CHECK-LABEL: strided_vpload_nxv2f16:
378 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
379 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
381 %load = call <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
382 ret <vscale x 2 x half> %load
385 define <vscale x 2 x half> @strided_vpload_nxv2f16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
386 ; CHECK-LABEL: strided_vpload_nxv2f16_allones_mask:
388 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
389 ; CHECK-NEXT: vlse16.v v8, (a0), a1
391 %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
392 %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
393 %load = call <vscale x 2 x half> @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %b, i32 %evl)
394 ret <vscale x 2 x half> %load
397 declare <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
399 define <vscale x 4 x half> @strided_vpload_nxv4f16(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
400 ; CHECK-LABEL: strided_vpload_nxv4f16:
402 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
403 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
405 %load = call <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
406 ret <vscale x 4 x half> %load
409 define <vscale x 4 x half> @strided_vpload_nxv4f16_unit_stride(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
410 ; CHECK-LABEL: strided_vpload_nxv4f16_unit_stride:
412 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
413 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
415 %load = call <vscale x 4 x half> @llvm.experimental.vp.strided.load.nxv4f16.p0.i32(ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
416 ret <vscale x 4 x half> %load
419 declare <vscale x 8 x half> @llvm.experimental.vp.strided.load.nxv8f16.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
421 define <vscale x 8 x half> @strided_vpload_nxv8f16(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
422 ; CHECK-LABEL: strided_vpload_nxv8f16:
424 ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
425 ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
427 %load = call <vscale x 8 x half> @llvm.experimental.vp.strided.load.nxv8f16.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
428 ret <vscale x 8 x half> %load
431 declare <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
433 define <vscale x 1 x float> @strided_vpload_nxv1f32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
434 ; CHECK-LABEL: strided_vpload_nxv1f32:
436 ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
437 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
439 %load = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
440 ret <vscale x 1 x float> %load
443 declare <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
445 define <vscale x 2 x float> @strided_vpload_nxv2f32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
446 ; CHECK-LABEL: strided_vpload_nxv2f32:
448 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
449 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
451 %load = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
452 ret <vscale x 2 x float> %load
455 define <vscale x 2 x float> @strided_vpload_nxv2f32_unit_stride(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
456 ; CHECK-LABEL: strided_vpload_nxv2f32_unit_stride:
458 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
459 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
461 %load = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.p0.i32(ptr %ptr, i32 4, <vscale x 2 x i1> %m, i32 %evl)
462 ret <vscale x 2 x float> %load
465 declare <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
467 define <vscale x 4 x float> @strided_vpload_nxv4f32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
468 ; CHECK-LABEL: strided_vpload_nxv4f32:
470 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
471 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
473 %load = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
474 ret <vscale x 4 x float> %load
477 declare <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
479 define <vscale x 8 x float> @strided_vpload_nxv8f32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
480 ; CHECK-LABEL: strided_vpload_nxv8f32:
482 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
483 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
485 %load = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
486 ret <vscale x 8 x float> %load
489 define <vscale x 8 x float> @strided_vpload_nxv8f32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
490 ; CHECK-LABEL: strided_vpload_nxv8f32_allones_mask:
492 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
493 ; CHECK-NEXT: vlse32.v v8, (a0), a1
495 %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
496 %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
497 %load = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %b, i32 %evl)
498 ret <vscale x 8 x float> %load
501 declare <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr, i32, <vscale x 1 x i1>, i32)
503 define <vscale x 1 x double> @strided_vpload_nxv1f64(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 zeroext %evl) {
504 ; CHECK-LABEL: strided_vpload_nxv1f64:
506 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
507 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
509 %load = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 1 x i1> %m, i32 %evl)
510 ret <vscale x 1 x double> %load
513 define <vscale x 1 x double> @strided_vpload_nxv1f64_unit_stride(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
514 ; CHECK-LABEL: strided_vpload_nxv1f64_unit_stride:
516 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
517 ; CHECK-NEXT: vle64.v v8, (a0), v0.t
519 %load = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.p0.i32(ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
520 ret <vscale x 1 x double> %load
523 declare <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr, i32, <vscale x 2 x i1>, i32)
525 define <vscale x 2 x double> @strided_vpload_nxv2f64(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 zeroext %evl) {
526 ; CHECK-LABEL: strided_vpload_nxv2f64:
528 ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
529 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
531 %load = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 2 x i1> %m, i32 %evl)
532 ret <vscale x 2 x double> %load
535 declare <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr, i32, <vscale x 4 x i1>, i32)
537 define <vscale x 4 x double> @strided_vpload_nxv4f64(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 zeroext %evl) {
538 ; CHECK-LABEL: strided_vpload_nxv4f64:
540 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
541 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
543 %load = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %m, i32 %evl)
544 ret <vscale x 4 x double> %load
547 define <vscale x 4 x double> @strided_vpload_nxv4f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
548 ; CHECK-LABEL: strided_vpload_nxv4f64_allones_mask:
550 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
551 ; CHECK-NEXT: vlse64.v v8, (a0), a1
553 %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
554 %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
555 %load = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 4 x i1> %b, i32 %evl)
556 ret <vscale x 4 x double> %load
559 declare <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0.i32(ptr, i32, <vscale x 8 x i1>, i32)
561 define <vscale x 8 x double> @strided_vpload_nxv8f64(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 zeroext %evl) {
562 ; CHECK-LABEL: strided_vpload_nxv8f64:
564 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
565 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
567 %load = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0.i32(ptr %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
568 ret <vscale x 8 x double> %load
572 define <vscale x 3 x double> @strided_vpload_nxv3f64(ptr %ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
573 ; CHECK-LABEL: strided_vpload_nxv3f64:
575 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
576 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
578 %v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
579 ret <vscale x 3 x double> %v
582 define <vscale x 3 x double> @strided_vpload_nxv3f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
583 ; CHECK-LABEL: strided_vpload_nxv3f64_allones_mask:
585 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
586 ; CHECK-NEXT: vlse64.v v8, (a0), a1
588 %one = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
589 %allones = shufflevector <vscale x 3 x i1> %one, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
590 %v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, <vscale x 3 x i1> %allones, i32 %evl)
591 ret <vscale x 3 x double> %v
594 declare <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr, i32, <vscale x 3 x i1>, i32)
597 define <vscale x 16 x double> @strided_load_nxv16f64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
598 ; CHECK-RV32-LABEL: strided_load_nxv16f64:
599 ; CHECK-RV32: # %bb.0:
600 ; CHECK-RV32-NEXT: vmv1r.v v9, v0
601 ; CHECK-RV32-NEXT: csrr a4, vlenb
602 ; CHECK-RV32-NEXT: sub a2, a3, a4
603 ; CHECK-RV32-NEXT: sltu a5, a3, a2
604 ; CHECK-RV32-NEXT: addi a5, a5, -1
605 ; CHECK-RV32-NEXT: and a2, a5, a2
606 ; CHECK-RV32-NEXT: bltu a3, a4, .LBB49_2
607 ; CHECK-RV32-NEXT: # %bb.1:
608 ; CHECK-RV32-NEXT: mv a3, a4
609 ; CHECK-RV32-NEXT: .LBB49_2:
610 ; CHECK-RV32-NEXT: mul a5, a3, a1
611 ; CHECK-RV32-NEXT: add a5, a0, a5
612 ; CHECK-RV32-NEXT: srli a4, a4, 3
613 ; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
614 ; CHECK-RV32-NEXT: vslidedown.vx v8, v9, a4
615 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
616 ; CHECK-RV32-NEXT: vmv1r.v v0, v8
617 ; CHECK-RV32-NEXT: vlse64.v v16, (a5), a1, v0.t
618 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
619 ; CHECK-RV32-NEXT: vmv1r.v v0, v9
620 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
621 ; CHECK-RV32-NEXT: ret
623 ; CHECK-RV64-LABEL: strided_load_nxv16f64:
624 ; CHECK-RV64: # %bb.0:
625 ; CHECK-RV64-NEXT: vmv1r.v v9, v0
626 ; CHECK-RV64-NEXT: csrr a4, vlenb
627 ; CHECK-RV64-NEXT: sub a3, a2, a4
628 ; CHECK-RV64-NEXT: sltu a5, a2, a3
629 ; CHECK-RV64-NEXT: addi a5, a5, -1
630 ; CHECK-RV64-NEXT: and a3, a5, a3
631 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB49_2
632 ; CHECK-RV64-NEXT: # %bb.1:
633 ; CHECK-RV64-NEXT: mv a2, a4
634 ; CHECK-RV64-NEXT: .LBB49_2:
635 ; CHECK-RV64-NEXT: mul a5, a2, a1
636 ; CHECK-RV64-NEXT: add a5, a0, a5
637 ; CHECK-RV64-NEXT: srli a4, a4, 3
638 ; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
639 ; CHECK-RV64-NEXT: vslidedown.vx v8, v9, a4
640 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
641 ; CHECK-RV64-NEXT: vmv1r.v v0, v8
642 ; CHECK-RV64-NEXT: vlse64.v v16, (a5), a1, v0.t
643 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
644 ; CHECK-RV64-NEXT: vmv1r.v v0, v9
645 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
646 ; CHECK-RV64-NEXT: ret
647 %v = call <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 %evl)
648 ret <vscale x 16 x double> %v
651 define <vscale x 16 x double> @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 %stride, i32 zeroext %evl) {
652 ; CHECK-RV32-LABEL: strided_load_nxv16f64_allones_mask:
653 ; CHECK-RV32: # %bb.0:
654 ; CHECK-RV32-NEXT: csrr a4, vlenb
655 ; CHECK-RV32-NEXT: sub a2, a3, a4
656 ; CHECK-RV32-NEXT: sltu a5, a3, a2
657 ; CHECK-RV32-NEXT: addi a5, a5, -1
658 ; CHECK-RV32-NEXT: and a2, a5, a2
659 ; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2
660 ; CHECK-RV32-NEXT: # %bb.1:
661 ; CHECK-RV32-NEXT: mv a3, a4
662 ; CHECK-RV32-NEXT: .LBB50_2:
663 ; CHECK-RV32-NEXT: mul a4, a3, a1
664 ; CHECK-RV32-NEXT: add a4, a0, a4
665 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
666 ; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
667 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
668 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
669 ; CHECK-RV32-NEXT: ret
671 ; CHECK-RV64-LABEL: strided_load_nxv16f64_allones_mask:
672 ; CHECK-RV64: # %bb.0:
673 ; CHECK-RV64-NEXT: csrr a4, vlenb
674 ; CHECK-RV64-NEXT: sub a3, a2, a4
675 ; CHECK-RV64-NEXT: sltu a5, a2, a3
676 ; CHECK-RV64-NEXT: addi a5, a5, -1
677 ; CHECK-RV64-NEXT: and a3, a5, a3
678 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2
679 ; CHECK-RV64-NEXT: # %bb.1:
680 ; CHECK-RV64-NEXT: mv a2, a4
681 ; CHECK-RV64-NEXT: .LBB50_2:
682 ; CHECK-RV64-NEXT: mul a4, a2, a1
683 ; CHECK-RV64-NEXT: add a4, a0, a4
684 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
685 ; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
686 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
687 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
688 ; CHECK-RV64-NEXT: ret
689 %one = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
690 %allones = shufflevector <vscale x 16 x i1> %one, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
691 %v = call <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %allones, i32 %evl)
692 ret <vscale x 16 x double> %v
695 declare <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr, i64, <vscale x 16 x i1>, i32)
697 ; Widening + splitting (with HiIsEmpty == true)
698 ; NOTE: We can't return <vscale x 17 x double> as that introduces a vector
699 ; store that can't yet be legalized through widening. In order to test purely
700 ; the vp.strided.load legalization, we manually split it.
701 define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl, <vscale x 1 x double>* %hi_ptr) {
702 ; CHECK-RV32-LABEL: strided_load_nxv17f64:
703 ; CHECK-RV32: # %bb.0:
704 ; CHECK-RV32-NEXT: csrr a2, vlenb
705 ; CHECK-RV32-NEXT: slli a7, a2, 1
706 ; CHECK-RV32-NEXT: vmv1r.v v8, v0
707 ; CHECK-RV32-NEXT: mv a6, a3
708 ; CHECK-RV32-NEXT: bltu a3, a7, .LBB51_2
709 ; CHECK-RV32-NEXT: # %bb.1:
710 ; CHECK-RV32-NEXT: mv a6, a7
711 ; CHECK-RV32-NEXT: .LBB51_2:
712 ; CHECK-RV32-NEXT: sub a5, a6, a2
713 ; CHECK-RV32-NEXT: sltu t0, a6, a5
714 ; CHECK-RV32-NEXT: addi t0, t0, -1
715 ; CHECK-RV32-NEXT: and t0, t0, a5
716 ; CHECK-RV32-NEXT: mv a5, a6
717 ; CHECK-RV32-NEXT: bltu a6, a2, .LBB51_4
718 ; CHECK-RV32-NEXT: # %bb.3:
719 ; CHECK-RV32-NEXT: mv a5, a2
720 ; CHECK-RV32-NEXT: .LBB51_4:
721 ; CHECK-RV32-NEXT: mul t1, a5, a1
722 ; CHECK-RV32-NEXT: add t1, a0, t1
723 ; CHECK-RV32-NEXT: srli t2, a2, 3
724 ; CHECK-RV32-NEXT: vsetvli t3, zero, e8, mf4, ta, ma
725 ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, t2
726 ; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma
727 ; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t
728 ; CHECK-RV32-NEXT: sub a7, a3, a7
729 ; CHECK-RV32-NEXT: sltu a3, a3, a7
730 ; CHECK-RV32-NEXT: addi a3, a3, -1
731 ; CHECK-RV32-NEXT: and a3, a3, a7
732 ; CHECK-RV32-NEXT: bltu a3, a2, .LBB51_6
733 ; CHECK-RV32-NEXT: # %bb.5:
734 ; CHECK-RV32-NEXT: mv a3, a2
735 ; CHECK-RV32-NEXT: .LBB51_6:
736 ; CHECK-RV32-NEXT: mul a6, a6, a1
737 ; CHECK-RV32-NEXT: add a6, a0, a6
738 ; CHECK-RV32-NEXT: srli a2, a2, 2
739 ; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
740 ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a2
741 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
742 ; CHECK-RV32-NEXT: vlse64.v v24, (a6), a1, v0.t
743 ; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
744 ; CHECK-RV32-NEXT: vmv1r.v v0, v8
745 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
746 ; CHECK-RV32-NEXT: vs1r.v v24, (a4)
747 ; CHECK-RV32-NEXT: ret
749 ; CHECK-RV64-LABEL: strided_load_nxv17f64:
750 ; CHECK-RV64: # %bb.0:
751 ; CHECK-RV64-NEXT: csrr a4, vlenb
752 ; CHECK-RV64-NEXT: slli a7, a4, 1
753 ; CHECK-RV64-NEXT: vmv1r.v v8, v0
754 ; CHECK-RV64-NEXT: mv a6, a2
755 ; CHECK-RV64-NEXT: bltu a2, a7, .LBB51_2
756 ; CHECK-RV64-NEXT: # %bb.1:
757 ; CHECK-RV64-NEXT: mv a6, a7
758 ; CHECK-RV64-NEXT: .LBB51_2:
759 ; CHECK-RV64-NEXT: sub a5, a6, a4
760 ; CHECK-RV64-NEXT: sltu t0, a6, a5
761 ; CHECK-RV64-NEXT: addi t0, t0, -1
762 ; CHECK-RV64-NEXT: and t0, t0, a5
763 ; CHECK-RV64-NEXT: mv a5, a6
764 ; CHECK-RV64-NEXT: bltu a6, a4, .LBB51_4
765 ; CHECK-RV64-NEXT: # %bb.3:
766 ; CHECK-RV64-NEXT: mv a5, a4
767 ; CHECK-RV64-NEXT: .LBB51_4:
768 ; CHECK-RV64-NEXT: mul t1, a5, a1
769 ; CHECK-RV64-NEXT: add t1, a0, t1
770 ; CHECK-RV64-NEXT: srli t2, a4, 3
771 ; CHECK-RV64-NEXT: vsetvli t3, zero, e8, mf4, ta, ma
772 ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, t2
773 ; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma
774 ; CHECK-RV64-NEXT: vlse64.v v16, (t1), a1, v0.t
775 ; CHECK-RV64-NEXT: sub a7, a2, a7
776 ; CHECK-RV64-NEXT: sltu a2, a2, a7
777 ; CHECK-RV64-NEXT: addi a2, a2, -1
778 ; CHECK-RV64-NEXT: and a2, a2, a7
779 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB51_6
780 ; CHECK-RV64-NEXT: # %bb.5:
781 ; CHECK-RV64-NEXT: mv a2, a4
782 ; CHECK-RV64-NEXT: .LBB51_6:
783 ; CHECK-RV64-NEXT: mul a6, a6, a1
784 ; CHECK-RV64-NEXT: add a6, a0, a6
785 ; CHECK-RV64-NEXT: srli a4, a4, 2
786 ; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
787 ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a4
788 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
789 ; CHECK-RV64-NEXT: vlse64.v v24, (a6), a1, v0.t
790 ; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
791 ; CHECK-RV64-NEXT: vmv1r.v v0, v8
792 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
793 ; CHECK-RV64-NEXT: vs1r.v v24, (a3)
794 ; CHECK-RV64-NEXT: ret
795 %v = call <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 %evl)
796 %lo = call <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %v, i64 0)
797 %hi = call <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %v, i64 16)
798 store <vscale x 1 x double> %hi, <vscale x 1 x double>* %hi_ptr
799 ret <vscale x 16 x double> %lo
802 declare <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, <vscale x 17 x i1>, i32)
803 declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
804 declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)