1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
3 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
5 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
7 define <4 x i8> @ret_v4i8(ptr %p) {
8 ; CHECK-LABEL: ret_v4i8:
10 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
13 %v = load <4 x i8>, ptr %p
17 define <4 x i32> @ret_v4i32(ptr %p) {
18 ; CHECK-LABEL: ret_v4i32:
20 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
21 ; CHECK-NEXT: vle32.v v8, (a0)
23 %v = load <4 x i32>, ptr %p
27 define <8 x i32> @ret_v8i32(ptr %p) {
28 ; LMULMAX8-LABEL: ret_v8i32:
30 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
31 ; LMULMAX8-NEXT: vle32.v v8, (a0)
34 ; LMULMAX4-LABEL: ret_v8i32:
36 ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma
37 ; LMULMAX4-NEXT: vle32.v v8, (a0)
40 ; LMULMAX2-LABEL: ret_v8i32:
42 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
43 ; LMULMAX2-NEXT: vle32.v v8, (a0)
46 ; LMULMAX1-LABEL: ret_v8i32:
48 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
49 ; LMULMAX1-NEXT: vle32.v v8, (a0)
50 ; LMULMAX1-NEXT: addi a0, a0, 16
51 ; LMULMAX1-NEXT: vle32.v v9, (a0)
53 %v = load <8 x i32>, ptr %p
57 define <16 x i64> @ret_v16i64(ptr %p) {
58 ; LMULMAX8-LABEL: ret_v16i64:
60 ; LMULMAX8-NEXT: vsetivli zero, 16, e64, m8, ta, ma
61 ; LMULMAX8-NEXT: vle64.v v8, (a0)
64 ; LMULMAX4-LABEL: ret_v16i64:
66 ; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma
67 ; LMULMAX4-NEXT: vle64.v v8, (a0)
68 ; LMULMAX4-NEXT: addi a0, a0, 64
69 ; LMULMAX4-NEXT: vle64.v v12, (a0)
72 ; LMULMAX2-LABEL: ret_v16i64:
74 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
75 ; LMULMAX2-NEXT: vle64.v v8, (a0)
76 ; LMULMAX2-NEXT: addi a1, a0, 32
77 ; LMULMAX2-NEXT: vle64.v v10, (a1)
78 ; LMULMAX2-NEXT: addi a1, a0, 64
79 ; LMULMAX2-NEXT: vle64.v v12, (a1)
80 ; LMULMAX2-NEXT: addi a0, a0, 96
81 ; LMULMAX2-NEXT: vle64.v v14, (a0)
84 ; LMULMAX1-LABEL: ret_v16i64:
86 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma
87 ; LMULMAX1-NEXT: vle64.v v8, (a0)
88 ; LMULMAX1-NEXT: addi a1, a0, 16
89 ; LMULMAX1-NEXT: vle64.v v9, (a1)
90 ; LMULMAX1-NEXT: addi a1, a0, 32
91 ; LMULMAX1-NEXT: vle64.v v10, (a1)
92 ; LMULMAX1-NEXT: addi a1, a0, 48
93 ; LMULMAX1-NEXT: vle64.v v11, (a1)
94 ; LMULMAX1-NEXT: addi a1, a0, 64
95 ; LMULMAX1-NEXT: vle64.v v12, (a1)
96 ; LMULMAX1-NEXT: addi a1, a0, 80
97 ; LMULMAX1-NEXT: vle64.v v13, (a1)
98 ; LMULMAX1-NEXT: addi a1, a0, 96
99 ; LMULMAX1-NEXT: vle64.v v14, (a1)
100 ; LMULMAX1-NEXT: addi a0, a0, 112
101 ; LMULMAX1-NEXT: vle64.v v15, (a0)
103 %v = load <16 x i64>, ptr %p
107 define <8 x i1> @ret_mask_v8i1(ptr %p) {
108 ; CHECK-LABEL: ret_mask_v8i1:
110 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
111 ; CHECK-NEXT: vlm.v v0, (a0)
113 %v = load <8 x i1>, ptr %p
117 define <32 x i1> @ret_mask_v32i1(ptr %p) {
118 ; LMULMAX8-LABEL: ret_mask_v32i1:
120 ; LMULMAX8-NEXT: li a1, 32
121 ; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma
122 ; LMULMAX8-NEXT: vlm.v v0, (a0)
125 ; LMULMAX4-LABEL: ret_mask_v32i1:
127 ; LMULMAX4-NEXT: li a1, 32
128 ; LMULMAX4-NEXT: vsetvli zero, a1, e8, m2, ta, ma
129 ; LMULMAX4-NEXT: vlm.v v0, (a0)
132 ; LMULMAX2-LABEL: ret_mask_v32i1:
134 ; LMULMAX2-NEXT: li a1, 32
135 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
136 ; LMULMAX2-NEXT: vlm.v v0, (a0)
139 ; LMULMAX1-LABEL: ret_mask_v32i1:
141 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
142 ; LMULMAX1-NEXT: vlm.v v0, (a0)
143 ; LMULMAX1-NEXT: addi a0, a0, 2
144 ; LMULMAX1-NEXT: vlm.v v8, (a0)
146 %v = load <32 x i1>, ptr %p
150 ; Return the vector via registers v8-v23
151 define <64 x i32> @ret_split_v64i32(ptr %x) {
152 ; LMULMAX8-LABEL: ret_split_v64i32:
154 ; LMULMAX8-NEXT: li a1, 32
155 ; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma
156 ; LMULMAX8-NEXT: vle32.v v8, (a0)
157 ; LMULMAX8-NEXT: addi a0, a0, 128
158 ; LMULMAX8-NEXT: vle32.v v16, (a0)
161 ; LMULMAX4-LABEL: ret_split_v64i32:
163 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
164 ; LMULMAX4-NEXT: vle32.v v8, (a0)
165 ; LMULMAX4-NEXT: addi a1, a0, 64
166 ; LMULMAX4-NEXT: vle32.v v12, (a1)
167 ; LMULMAX4-NEXT: addi a1, a0, 128
168 ; LMULMAX4-NEXT: vle32.v v16, (a1)
169 ; LMULMAX4-NEXT: addi a0, a0, 192
170 ; LMULMAX4-NEXT: vle32.v v20, (a0)
173 ; LMULMAX2-LABEL: ret_split_v64i32:
175 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
176 ; LMULMAX2-NEXT: vle32.v v8, (a0)
177 ; LMULMAX2-NEXT: addi a1, a0, 32
178 ; LMULMAX2-NEXT: vle32.v v10, (a1)
179 ; LMULMAX2-NEXT: addi a1, a0, 64
180 ; LMULMAX2-NEXT: vle32.v v12, (a1)
181 ; LMULMAX2-NEXT: addi a1, a0, 96
182 ; LMULMAX2-NEXT: vle32.v v14, (a1)
183 ; LMULMAX2-NEXT: addi a1, a0, 128
184 ; LMULMAX2-NEXT: vle32.v v16, (a1)
185 ; LMULMAX2-NEXT: addi a1, a0, 160
186 ; LMULMAX2-NEXT: vle32.v v18, (a1)
187 ; LMULMAX2-NEXT: addi a1, a0, 192
188 ; LMULMAX2-NEXT: vle32.v v20, (a1)
189 ; LMULMAX2-NEXT: addi a0, a0, 224
190 ; LMULMAX2-NEXT: vle32.v v22, (a0)
193 ; LMULMAX1-LABEL: ret_split_v64i32:
195 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
196 ; LMULMAX1-NEXT: vle32.v v8, (a0)
197 ; LMULMAX1-NEXT: addi a1, a0, 16
198 ; LMULMAX1-NEXT: vle32.v v9, (a1)
199 ; LMULMAX1-NEXT: addi a1, a0, 32
200 ; LMULMAX1-NEXT: vle32.v v10, (a1)
201 ; LMULMAX1-NEXT: addi a1, a0, 48
202 ; LMULMAX1-NEXT: vle32.v v11, (a1)
203 ; LMULMAX1-NEXT: addi a1, a0, 64
204 ; LMULMAX1-NEXT: vle32.v v12, (a1)
205 ; LMULMAX1-NEXT: addi a1, a0, 80
206 ; LMULMAX1-NEXT: vle32.v v13, (a1)
207 ; LMULMAX1-NEXT: addi a1, a0, 96
208 ; LMULMAX1-NEXT: vle32.v v14, (a1)
209 ; LMULMAX1-NEXT: addi a1, a0, 112
210 ; LMULMAX1-NEXT: vle32.v v15, (a1)
211 ; LMULMAX1-NEXT: addi a1, a0, 128
212 ; LMULMAX1-NEXT: vle32.v v16, (a1)
213 ; LMULMAX1-NEXT: addi a1, a0, 144
214 ; LMULMAX1-NEXT: vle32.v v17, (a1)
215 ; LMULMAX1-NEXT: addi a1, a0, 160
216 ; LMULMAX1-NEXT: vle32.v v18, (a1)
217 ; LMULMAX1-NEXT: addi a1, a0, 176
218 ; LMULMAX1-NEXT: vle32.v v19, (a1)
219 ; LMULMAX1-NEXT: addi a1, a0, 192
220 ; LMULMAX1-NEXT: vle32.v v20, (a1)
221 ; LMULMAX1-NEXT: addi a1, a0, 208
222 ; LMULMAX1-NEXT: vle32.v v21, (a1)
223 ; LMULMAX1-NEXT: addi a1, a0, 224
224 ; LMULMAX1-NEXT: vle32.v v22, (a1)
225 ; LMULMAX1-NEXT: addi a0, a0, 240
226 ; LMULMAX1-NEXT: vle32.v v23, (a0)
228 %v = load <64 x i32>, ptr %x
232 ; Return the vector fully via the stack
233 define <128 x i32> @ret_split_v128i32(ptr %x) {
234 ; LMULMAX8-LABEL: ret_split_v128i32:
236 ; LMULMAX8-NEXT: addi a2, a1, 128
237 ; LMULMAX8-NEXT: li a3, 32
238 ; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, ma
239 ; LMULMAX8-NEXT: vle32.v v8, (a2)
240 ; LMULMAX8-NEXT: addi a2, a1, 256
241 ; LMULMAX8-NEXT: vle32.v v16, (a1)
242 ; LMULMAX8-NEXT: addi a1, a1, 384
243 ; LMULMAX8-NEXT: vle32.v v24, (a1)
244 ; LMULMAX8-NEXT: vle32.v v0, (a2)
245 ; LMULMAX8-NEXT: vse32.v v16, (a0)
246 ; LMULMAX8-NEXT: addi a1, a0, 384
247 ; LMULMAX8-NEXT: vse32.v v24, (a1)
248 ; LMULMAX8-NEXT: addi a1, a0, 256
249 ; LMULMAX8-NEXT: vse32.v v0, (a1)
250 ; LMULMAX8-NEXT: addi a0, a0, 128
251 ; LMULMAX8-NEXT: vse32.v v8, (a0)
254 ; LMULMAX4-LABEL: ret_split_v128i32:
256 ; LMULMAX4-NEXT: addi a2, a1, 64
257 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
258 ; LMULMAX4-NEXT: vle32.v v8, (a2)
259 ; LMULMAX4-NEXT: addi a2, a1, 128
260 ; LMULMAX4-NEXT: vle32.v v12, (a2)
261 ; LMULMAX4-NEXT: addi a2, a1, 192
262 ; LMULMAX4-NEXT: vle32.v v16, (a2)
263 ; LMULMAX4-NEXT: addi a2, a1, 256
264 ; LMULMAX4-NEXT: vle32.v v20, (a2)
265 ; LMULMAX4-NEXT: addi a2, a1, 320
266 ; LMULMAX4-NEXT: vle32.v v24, (a2)
267 ; LMULMAX4-NEXT: addi a2, a1, 384
268 ; LMULMAX4-NEXT: vle32.v v28, (a1)
269 ; LMULMAX4-NEXT: addi a1, a1, 448
270 ; LMULMAX4-NEXT: vle32.v v0, (a1)
271 ; LMULMAX4-NEXT: vle32.v v4, (a2)
272 ; LMULMAX4-NEXT: vse32.v v28, (a0)
273 ; LMULMAX4-NEXT: addi a1, a0, 448
274 ; LMULMAX4-NEXT: vse32.v v0, (a1)
275 ; LMULMAX4-NEXT: addi a1, a0, 384
276 ; LMULMAX4-NEXT: vse32.v v4, (a1)
277 ; LMULMAX4-NEXT: addi a1, a0, 320
278 ; LMULMAX4-NEXT: vse32.v v24, (a1)
279 ; LMULMAX4-NEXT: addi a1, a0, 256
280 ; LMULMAX4-NEXT: vse32.v v20, (a1)
281 ; LMULMAX4-NEXT: addi a1, a0, 192
282 ; LMULMAX4-NEXT: vse32.v v16, (a1)
283 ; LMULMAX4-NEXT: addi a1, a0, 128
284 ; LMULMAX4-NEXT: vse32.v v12, (a1)
285 ; LMULMAX4-NEXT: addi a0, a0, 64
286 ; LMULMAX4-NEXT: vse32.v v8, (a0)
289 ; LMULMAX2-LABEL: ret_split_v128i32:
291 ; LMULMAX2-NEXT: addi a2, a1, 32
292 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
293 ; LMULMAX2-NEXT: vle32.v v8, (a2)
294 ; LMULMAX2-NEXT: addi a2, a1, 64
295 ; LMULMAX2-NEXT: vle32.v v10, (a2)
296 ; LMULMAX2-NEXT: addi a2, a1, 96
297 ; LMULMAX2-NEXT: vle32.v v12, (a2)
298 ; LMULMAX2-NEXT: addi a2, a1, 128
299 ; LMULMAX2-NEXT: vle32.v v14, (a2)
300 ; LMULMAX2-NEXT: addi a2, a1, 160
301 ; LMULMAX2-NEXT: vle32.v v16, (a2)
302 ; LMULMAX2-NEXT: addi a2, a1, 192
303 ; LMULMAX2-NEXT: vle32.v v18, (a2)
304 ; LMULMAX2-NEXT: addi a2, a1, 224
305 ; LMULMAX2-NEXT: vle32.v v20, (a2)
306 ; LMULMAX2-NEXT: addi a2, a1, 256
307 ; LMULMAX2-NEXT: vle32.v v22, (a2)
308 ; LMULMAX2-NEXT: addi a2, a1, 288
309 ; LMULMAX2-NEXT: vle32.v v24, (a2)
310 ; LMULMAX2-NEXT: addi a2, a1, 320
311 ; LMULMAX2-NEXT: vle32.v v26, (a2)
312 ; LMULMAX2-NEXT: addi a2, a1, 352
313 ; LMULMAX2-NEXT: vle32.v v28, (a2)
314 ; LMULMAX2-NEXT: addi a2, a1, 384
315 ; LMULMAX2-NEXT: vle32.v v30, (a2)
316 ; LMULMAX2-NEXT: addi a2, a1, 416
317 ; LMULMAX2-NEXT: vle32.v v0, (a2)
318 ; LMULMAX2-NEXT: addi a2, a1, 448
319 ; LMULMAX2-NEXT: vle32.v v2, (a1)
320 ; LMULMAX2-NEXT: addi a1, a1, 480
321 ; LMULMAX2-NEXT: vle32.v v4, (a1)
322 ; LMULMAX2-NEXT: vle32.v v6, (a2)
323 ; LMULMAX2-NEXT: vse32.v v2, (a0)
324 ; LMULMAX2-NEXT: addi a1, a0, 480
325 ; LMULMAX2-NEXT: vse32.v v4, (a1)
326 ; LMULMAX2-NEXT: addi a1, a0, 448
327 ; LMULMAX2-NEXT: vse32.v v6, (a1)
328 ; LMULMAX2-NEXT: addi a1, a0, 416
329 ; LMULMAX2-NEXT: vse32.v v0, (a1)
330 ; LMULMAX2-NEXT: addi a1, a0, 384
331 ; LMULMAX2-NEXT: vse32.v v30, (a1)
332 ; LMULMAX2-NEXT: addi a1, a0, 352
333 ; LMULMAX2-NEXT: vse32.v v28, (a1)
334 ; LMULMAX2-NEXT: addi a1, a0, 320
335 ; LMULMAX2-NEXT: vse32.v v26, (a1)
336 ; LMULMAX2-NEXT: addi a1, a0, 288
337 ; LMULMAX2-NEXT: vse32.v v24, (a1)
338 ; LMULMAX2-NEXT: addi a1, a0, 256
339 ; LMULMAX2-NEXT: vse32.v v22, (a1)
340 ; LMULMAX2-NEXT: addi a1, a0, 224
341 ; LMULMAX2-NEXT: vse32.v v20, (a1)
342 ; LMULMAX2-NEXT: addi a1, a0, 192
343 ; LMULMAX2-NEXT: vse32.v v18, (a1)
344 ; LMULMAX2-NEXT: addi a1, a0, 160
345 ; LMULMAX2-NEXT: vse32.v v16, (a1)
346 ; LMULMAX2-NEXT: addi a1, a0, 128
347 ; LMULMAX2-NEXT: vse32.v v14, (a1)
348 ; LMULMAX2-NEXT: addi a1, a0, 96
349 ; LMULMAX2-NEXT: vse32.v v12, (a1)
350 ; LMULMAX2-NEXT: addi a1, a0, 64
351 ; LMULMAX2-NEXT: vse32.v v10, (a1)
352 ; LMULMAX2-NEXT: addi a0, a0, 32
353 ; LMULMAX2-NEXT: vse32.v v8, (a0)
356 ; LMULMAX1-LABEL: ret_split_v128i32:
358 ; LMULMAX1-NEXT: addi a2, a1, 16
359 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
360 ; LMULMAX1-NEXT: vle32.v v8, (a2)
361 ; LMULMAX1-NEXT: addi a2, a1, 32
362 ; LMULMAX1-NEXT: vle32.v v9, (a2)
363 ; LMULMAX1-NEXT: addi a2, a1, 48
364 ; LMULMAX1-NEXT: vle32.v v10, (a2)
365 ; LMULMAX1-NEXT: addi a2, a1, 64
366 ; LMULMAX1-NEXT: vle32.v v11, (a2)
367 ; LMULMAX1-NEXT: addi a2, a1, 80
368 ; LMULMAX1-NEXT: vle32.v v12, (a2)
369 ; LMULMAX1-NEXT: addi a2, a1, 96
370 ; LMULMAX1-NEXT: vle32.v v13, (a2)
371 ; LMULMAX1-NEXT: addi a2, a1, 112
372 ; LMULMAX1-NEXT: vle32.v v14, (a2)
373 ; LMULMAX1-NEXT: addi a2, a1, 128
374 ; LMULMAX1-NEXT: vle32.v v15, (a2)
375 ; LMULMAX1-NEXT: addi a2, a1, 144
376 ; LMULMAX1-NEXT: vle32.v v16, (a2)
377 ; LMULMAX1-NEXT: addi a2, a1, 160
378 ; LMULMAX1-NEXT: vle32.v v17, (a2)
379 ; LMULMAX1-NEXT: addi a2, a1, 176
380 ; LMULMAX1-NEXT: vle32.v v18, (a2)
381 ; LMULMAX1-NEXT: addi a2, a1, 192
382 ; LMULMAX1-NEXT: vle32.v v19, (a2)
383 ; LMULMAX1-NEXT: addi a2, a1, 208
384 ; LMULMAX1-NEXT: vle32.v v20, (a2)
385 ; LMULMAX1-NEXT: addi a2, a1, 224
386 ; LMULMAX1-NEXT: vle32.v v21, (a2)
387 ; LMULMAX1-NEXT: addi a2, a1, 240
388 ; LMULMAX1-NEXT: vle32.v v22, (a2)
389 ; LMULMAX1-NEXT: addi a2, a1, 256
390 ; LMULMAX1-NEXT: vle32.v v23, (a2)
391 ; LMULMAX1-NEXT: addi a2, a1, 272
392 ; LMULMAX1-NEXT: vle32.v v24, (a2)
393 ; LMULMAX1-NEXT: addi a2, a1, 288
394 ; LMULMAX1-NEXT: vle32.v v25, (a2)
395 ; LMULMAX1-NEXT: addi a2, a1, 304
396 ; LMULMAX1-NEXT: vle32.v v26, (a2)
397 ; LMULMAX1-NEXT: addi a2, a1, 320
398 ; LMULMAX1-NEXT: vle32.v v27, (a2)
399 ; LMULMAX1-NEXT: addi a2, a1, 336
400 ; LMULMAX1-NEXT: vle32.v v28, (a2)
401 ; LMULMAX1-NEXT: addi a2, a1, 352
402 ; LMULMAX1-NEXT: vle32.v v29, (a2)
403 ; LMULMAX1-NEXT: addi a2, a1, 368
404 ; LMULMAX1-NEXT: vle32.v v30, (a2)
405 ; LMULMAX1-NEXT: addi a2, a1, 384
406 ; LMULMAX1-NEXT: vle32.v v31, (a2)
407 ; LMULMAX1-NEXT: addi a2, a1, 400
408 ; LMULMAX1-NEXT: vle32.v v0, (a2)
409 ; LMULMAX1-NEXT: addi a2, a1, 416
410 ; LMULMAX1-NEXT: vle32.v v1, (a2)
411 ; LMULMAX1-NEXT: addi a2, a1, 432
412 ; LMULMAX1-NEXT: vle32.v v2, (a2)
413 ; LMULMAX1-NEXT: addi a2, a1, 448
414 ; LMULMAX1-NEXT: vle32.v v3, (a2)
415 ; LMULMAX1-NEXT: addi a2, a1, 464
416 ; LMULMAX1-NEXT: vle32.v v4, (a2)
417 ; LMULMAX1-NEXT: addi a2, a1, 480
418 ; LMULMAX1-NEXT: vle32.v v5, (a1)
419 ; LMULMAX1-NEXT: addi a1, a1, 496
420 ; LMULMAX1-NEXT: vle32.v v6, (a1)
421 ; LMULMAX1-NEXT: vle32.v v7, (a2)
422 ; LMULMAX1-NEXT: vse32.v v5, (a0)
423 ; LMULMAX1-NEXT: addi a1, a0, 496
424 ; LMULMAX1-NEXT: vse32.v v6, (a1)
425 ; LMULMAX1-NEXT: addi a1, a0, 480
426 ; LMULMAX1-NEXT: vse32.v v7, (a1)
427 ; LMULMAX1-NEXT: addi a1, a0, 464
428 ; LMULMAX1-NEXT: vse32.v v4, (a1)
429 ; LMULMAX1-NEXT: addi a1, a0, 448
430 ; LMULMAX1-NEXT: vse32.v v3, (a1)
431 ; LMULMAX1-NEXT: addi a1, a0, 432
432 ; LMULMAX1-NEXT: vse32.v v2, (a1)
433 ; LMULMAX1-NEXT: addi a1, a0, 416
434 ; LMULMAX1-NEXT: vse32.v v1, (a1)
435 ; LMULMAX1-NEXT: addi a1, a0, 400
436 ; LMULMAX1-NEXT: vse32.v v0, (a1)
437 ; LMULMAX1-NEXT: addi a1, a0, 384
438 ; LMULMAX1-NEXT: vse32.v v31, (a1)
439 ; LMULMAX1-NEXT: addi a1, a0, 368
440 ; LMULMAX1-NEXT: vse32.v v30, (a1)
441 ; LMULMAX1-NEXT: addi a1, a0, 352
442 ; LMULMAX1-NEXT: vse32.v v29, (a1)
443 ; LMULMAX1-NEXT: addi a1, a0, 336
444 ; LMULMAX1-NEXT: vse32.v v28, (a1)
445 ; LMULMAX1-NEXT: addi a1, a0, 320
446 ; LMULMAX1-NEXT: vse32.v v27, (a1)
447 ; LMULMAX1-NEXT: addi a1, a0, 304
448 ; LMULMAX1-NEXT: vse32.v v26, (a1)
449 ; LMULMAX1-NEXT: addi a1, a0, 288
450 ; LMULMAX1-NEXT: vse32.v v25, (a1)
451 ; LMULMAX1-NEXT: addi a1, a0, 272
452 ; LMULMAX1-NEXT: vse32.v v24, (a1)
453 ; LMULMAX1-NEXT: addi a1, a0, 256
454 ; LMULMAX1-NEXT: vse32.v v23, (a1)
455 ; LMULMAX1-NEXT: addi a1, a0, 240
456 ; LMULMAX1-NEXT: vse32.v v22, (a1)
457 ; LMULMAX1-NEXT: addi a1, a0, 224
458 ; LMULMAX1-NEXT: vse32.v v21, (a1)
459 ; LMULMAX1-NEXT: addi a1, a0, 208
460 ; LMULMAX1-NEXT: vse32.v v20, (a1)
461 ; LMULMAX1-NEXT: addi a1, a0, 192
462 ; LMULMAX1-NEXT: vse32.v v19, (a1)
463 ; LMULMAX1-NEXT: addi a1, a0, 176
464 ; LMULMAX1-NEXT: vse32.v v18, (a1)
465 ; LMULMAX1-NEXT: addi a1, a0, 160
466 ; LMULMAX1-NEXT: vse32.v v17, (a1)
467 ; LMULMAX1-NEXT: addi a1, a0, 144
468 ; LMULMAX1-NEXT: vse32.v v16, (a1)
469 ; LMULMAX1-NEXT: addi a1, a0, 128
470 ; LMULMAX1-NEXT: vse32.v v15, (a1)
471 ; LMULMAX1-NEXT: addi a1, a0, 112
472 ; LMULMAX1-NEXT: vse32.v v14, (a1)
473 ; LMULMAX1-NEXT: addi a1, a0, 96
474 ; LMULMAX1-NEXT: vse32.v v13, (a1)
475 ; LMULMAX1-NEXT: addi a1, a0, 80
476 ; LMULMAX1-NEXT: vse32.v v12, (a1)
477 ; LMULMAX1-NEXT: addi a1, a0, 64
478 ; LMULMAX1-NEXT: vse32.v v11, (a1)
479 ; LMULMAX1-NEXT: addi a1, a0, 48
480 ; LMULMAX1-NEXT: vse32.v v10, (a1)
481 ; LMULMAX1-NEXT: addi a1, a0, 32
482 ; LMULMAX1-NEXT: vse32.v v9, (a1)
483 ; LMULMAX1-NEXT: addi a0, a0, 16
484 ; LMULMAX1-NEXT: vse32.v v8, (a0)
486 %v = load <128 x i32>, ptr %x
490 define <4 x i8> @ret_v8i8_param_v4i8(<4 x i8> %v) {
491 ; CHECK-LABEL: ret_v8i8_param_v4i8:
493 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
494 ; CHECK-NEXT: vadd.vi v8, v8, 2
496 %r = add <4 x i8> %v, <i8 2, i8 2, i8 2, i8 2>
500 define <4 x i8> @ret_v4i8_param_v4i8_v4i8(<4 x i8> %v, <4 x i8> %w) {
501 ; CHECK-LABEL: ret_v4i8_param_v4i8_v4i8:
503 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
504 ; CHECK-NEXT: vadd.vv v8, v8, v9
506 %r = add <4 x i8> %v, %w
510 define <4 x i64> @ret_v4i64_param_v4i64_v4i64(<4 x i64> %v, <4 x i64> %w) {
511 ; LMULMAX8-LABEL: ret_v4i64_param_v4i64_v4i64:
513 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
514 ; LMULMAX8-NEXT: vadd.vv v8, v8, v10
517 ; LMULMAX4-LABEL: ret_v4i64_param_v4i64_v4i64:
519 ; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma
520 ; LMULMAX4-NEXT: vadd.vv v8, v8, v10
523 ; LMULMAX2-LABEL: ret_v4i64_param_v4i64_v4i64:
525 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
526 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
529 ; LMULMAX1-LABEL: ret_v4i64_param_v4i64_v4i64:
531 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma
532 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
533 ; LMULMAX1-NEXT: vadd.vv v9, v9, v11
535 %r = add <4 x i64> %v, %w
539 define <8 x i1> @ret_v8i1_param_v8i1_v8i1(<8 x i1> %v, <8 x i1> %w) {
540 ; CHECK-LABEL: ret_v8i1_param_v8i1_v8i1:
542 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
543 ; CHECK-NEXT: vmxor.mm v0, v0, v8
545 %r = xor <8 x i1> %v, %w
549 define <32 x i1> @ret_v32i1_param_v32i1_v32i1(<32 x i1> %v, <32 x i1> %w) {
550 ; LMULMAX8-LABEL: ret_v32i1_param_v32i1_v32i1:
552 ; LMULMAX8-NEXT: li a0, 32
553 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma
554 ; LMULMAX8-NEXT: vmand.mm v0, v0, v8
557 ; LMULMAX4-LABEL: ret_v32i1_param_v32i1_v32i1:
559 ; LMULMAX4-NEXT: li a0, 32
560 ; LMULMAX4-NEXT: vsetvli zero, a0, e8, m2, ta, ma
561 ; LMULMAX4-NEXT: vmand.mm v0, v0, v8
564 ; LMULMAX2-LABEL: ret_v32i1_param_v32i1_v32i1:
566 ; LMULMAX2-NEXT: li a0, 32
567 ; LMULMAX2-NEXT: vsetvli zero, a0, e8, m2, ta, ma
568 ; LMULMAX2-NEXT: vmand.mm v0, v0, v8
571 ; LMULMAX1-LABEL: ret_v32i1_param_v32i1_v32i1:
573 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
574 ; LMULMAX1-NEXT: vmand.mm v0, v0, v9
575 ; LMULMAX1-NEXT: vmand.mm v8, v8, v10
577 %r = and <32 x i1> %v, %w
581 define <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
582 ; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
584 ; LMULMAX8-NEXT: li a2, 32
585 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma
586 ; LMULMAX8-NEXT: vle32.v v24, (a0)
587 ; LMULMAX8-NEXT: vadd.vv v8, v8, v16
588 ; LMULMAX8-NEXT: vadd.vv v8, v8, v24
589 ; LMULMAX8-NEXT: vadd.vx v8, v8, a1
592 ; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
594 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
595 ; LMULMAX4-NEXT: addi a1, a0, 64
596 ; LMULMAX4-NEXT: vle32.v v24, (a1)
597 ; LMULMAX4-NEXT: vle32.v v28, (a0)
598 ; LMULMAX4-NEXT: vadd.vv v8, v8, v16
599 ; LMULMAX4-NEXT: vadd.vv v12, v12, v20
600 ; LMULMAX4-NEXT: vadd.vv v12, v12, v24
601 ; LMULMAX4-NEXT: vadd.vv v8, v8, v28
602 ; LMULMAX4-NEXT: vadd.vx v8, v8, a2
603 ; LMULMAX4-NEXT: vadd.vx v12, v12, a2
606 ; LMULMAX2-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
608 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
609 ; LMULMAX2-NEXT: vle32.v v24, (a0)
610 ; LMULMAX2-NEXT: addi a1, a0, 32
611 ; LMULMAX2-NEXT: vle32.v v26, (a1)
612 ; LMULMAX2-NEXT: addi a1, a0, 64
613 ; LMULMAX2-NEXT: vle32.v v28, (a1)
614 ; LMULMAX2-NEXT: addi a0, a0, 96
615 ; LMULMAX2-NEXT: vle32.v v30, (a0)
616 ; LMULMAX2-NEXT: vadd.vv v8, v8, v16
617 ; LMULMAX2-NEXT: vadd.vv v10, v10, v18
618 ; LMULMAX2-NEXT: vadd.vv v12, v12, v20
619 ; LMULMAX2-NEXT: vadd.vv v14, v14, v22
620 ; LMULMAX2-NEXT: vadd.vv v14, v14, v30
621 ; LMULMAX2-NEXT: vadd.vv v12, v12, v28
622 ; LMULMAX2-NEXT: vadd.vv v10, v10, v26
623 ; LMULMAX2-NEXT: vadd.vv v8, v8, v24
624 ; LMULMAX2-NEXT: vadd.vx v8, v8, a4
625 ; LMULMAX2-NEXT: vadd.vx v10, v10, a4
626 ; LMULMAX2-NEXT: vadd.vx v12, v12, a4
627 ; LMULMAX2-NEXT: vadd.vx v14, v14, a4
630 ; LMULMAX1-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
632 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
633 ; LMULMAX1-NEXT: vle32.v v24, (a0)
634 ; LMULMAX1-NEXT: addi a1, a0, 16
635 ; LMULMAX1-NEXT: vle32.v v25, (a1)
636 ; LMULMAX1-NEXT: addi a1, a0, 32
637 ; LMULMAX1-NEXT: vle32.v v26, (a1)
638 ; LMULMAX1-NEXT: addi a1, a0, 48
639 ; LMULMAX1-NEXT: vle32.v v27, (a1)
640 ; LMULMAX1-NEXT: addi a1, a0, 64
641 ; LMULMAX1-NEXT: vle32.v v28, (a1)
642 ; LMULMAX1-NEXT: addi a1, a0, 80
643 ; LMULMAX1-NEXT: vle32.v v29, (a1)
644 ; LMULMAX1-NEXT: addi a1, a0, 96
645 ; LMULMAX1-NEXT: vle32.v v30, (a1)
646 ; LMULMAX1-NEXT: addi a0, a0, 112
647 ; LMULMAX1-NEXT: vle32.v v31, (a0)
648 ; LMULMAX1-NEXT: lw a0, 0(sp)
649 ; LMULMAX1-NEXT: vadd.vv v8, v8, v16
650 ; LMULMAX1-NEXT: vadd.vv v9, v9, v17
651 ; LMULMAX1-NEXT: vadd.vv v10, v10, v18
652 ; LMULMAX1-NEXT: vadd.vv v11, v11, v19
653 ; LMULMAX1-NEXT: vadd.vv v12, v12, v20
654 ; LMULMAX1-NEXT: vadd.vv v13, v13, v21
655 ; LMULMAX1-NEXT: vadd.vv v14, v14, v22
656 ; LMULMAX1-NEXT: vadd.vv v15, v15, v23
657 ; LMULMAX1-NEXT: vadd.vv v15, v15, v31
658 ; LMULMAX1-NEXT: vadd.vv v14, v14, v30
659 ; LMULMAX1-NEXT: vadd.vv v13, v13, v29
660 ; LMULMAX1-NEXT: vadd.vv v12, v12, v28
661 ; LMULMAX1-NEXT: vadd.vv v11, v11, v27
662 ; LMULMAX1-NEXT: vadd.vv v10, v10, v26
663 ; LMULMAX1-NEXT: vadd.vv v9, v9, v25
664 ; LMULMAX1-NEXT: vadd.vv v8, v8, v24
665 ; LMULMAX1-NEXT: vadd.vx v8, v8, a0
666 ; LMULMAX1-NEXT: vadd.vx v9, v9, a0
667 ; LMULMAX1-NEXT: vadd.vx v10, v10, a0
668 ; LMULMAX1-NEXT: vadd.vx v11, v11, a0
669 ; LMULMAX1-NEXT: vadd.vx v12, v12, a0
670 ; LMULMAX1-NEXT: vadd.vx v13, v13, a0
671 ; LMULMAX1-NEXT: vadd.vx v14, v14, a0
672 ; LMULMAX1-NEXT: vadd.vx v15, v15, a0
674 %r = add <32 x i32> %x, %y
675 %s = add <32 x i32> %r, %z
676 %head = insertelement <32 x i32> poison, i32 %w, i32 0
677 %splat = shufflevector <32 x i32> %head, <32 x i32> poison, <32 x i32> zeroinitializer
678 %t = add <32 x i32> %s, %splat
682 declare <32 x i32> @ext2(<32 x i32>, <32 x i32>, i32, i32)
683 declare <32 x i32> @ext3(<32 x i32>, <32 x i32>, <32 x i32>, i32, i32)
685 define <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) {
686 ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
688 ; LMULMAX8-NEXT: addi sp, sp, -16
689 ; LMULMAX8-NEXT: .cfi_def_cfa_offset 16
690 ; LMULMAX8-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
691 ; LMULMAX8-NEXT: .cfi_offset ra, -8
692 ; LMULMAX8-NEXT: vmv8r.v v24, v8
693 ; LMULMAX8-NEXT: li a1, 2
694 ; LMULMAX8-NEXT: vmv8r.v v8, v16
695 ; LMULMAX8-NEXT: vmv8r.v v16, v24
696 ; LMULMAX8-NEXT: call ext2
697 ; LMULMAX8-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
698 ; LMULMAX8-NEXT: addi sp, sp, 16
701 ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
703 ; LMULMAX4-NEXT: addi sp, sp, -16
704 ; LMULMAX4-NEXT: .cfi_def_cfa_offset 16
705 ; LMULMAX4-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
706 ; LMULMAX4-NEXT: .cfi_offset ra, -8
707 ; LMULMAX4-NEXT: vmv4r.v v24, v12
708 ; LMULMAX4-NEXT: vmv4r.v v28, v8
709 ; LMULMAX4-NEXT: li a1, 2
710 ; LMULMAX4-NEXT: vmv4r.v v8, v16
711 ; LMULMAX4-NEXT: vmv4r.v v12, v20
712 ; LMULMAX4-NEXT: vmv4r.v v16, v28
713 ; LMULMAX4-NEXT: vmv4r.v v20, v24
714 ; LMULMAX4-NEXT: call ext2
715 ; LMULMAX4-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
716 ; LMULMAX4-NEXT: addi sp, sp, 16
719 ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
721 ; LMULMAX2-NEXT: addi sp, sp, -16
722 ; LMULMAX2-NEXT: .cfi_def_cfa_offset 16
723 ; LMULMAX2-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
724 ; LMULMAX2-NEXT: .cfi_offset ra, -8
725 ; LMULMAX2-NEXT: vmv2r.v v24, v14
726 ; LMULMAX2-NEXT: vmv2r.v v26, v12
727 ; LMULMAX2-NEXT: vmv2r.v v28, v10
728 ; LMULMAX2-NEXT: vmv2r.v v30, v8
729 ; LMULMAX2-NEXT: li a1, 2
730 ; LMULMAX2-NEXT: vmv2r.v v8, v16
731 ; LMULMAX2-NEXT: vmv2r.v v10, v18
732 ; LMULMAX2-NEXT: vmv2r.v v12, v20
733 ; LMULMAX2-NEXT: vmv2r.v v14, v22
734 ; LMULMAX2-NEXT: vmv2r.v v16, v30
735 ; LMULMAX2-NEXT: vmv2r.v v18, v28
736 ; LMULMAX2-NEXT: vmv2r.v v20, v26
737 ; LMULMAX2-NEXT: vmv2r.v v22, v24
738 ; LMULMAX2-NEXT: call ext2
739 ; LMULMAX2-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
740 ; LMULMAX2-NEXT: addi sp, sp, 16
743 ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
745 ; LMULMAX1-NEXT: addi sp, sp, -16
746 ; LMULMAX1-NEXT: .cfi_def_cfa_offset 16
747 ; LMULMAX1-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
748 ; LMULMAX1-NEXT: .cfi_offset ra, -8
749 ; LMULMAX1-NEXT: vmv1r.v v24, v15
750 ; LMULMAX1-NEXT: vmv1r.v v25, v14
751 ; LMULMAX1-NEXT: vmv1r.v v26, v13
752 ; LMULMAX1-NEXT: vmv1r.v v27, v12
753 ; LMULMAX1-NEXT: vmv1r.v v28, v11
754 ; LMULMAX1-NEXT: vmv1r.v v29, v10
755 ; LMULMAX1-NEXT: vmv1r.v v30, v9
756 ; LMULMAX1-NEXT: vmv1r.v v31, v8
757 ; LMULMAX1-NEXT: li a1, 2
758 ; LMULMAX1-NEXT: vmv1r.v v8, v16
759 ; LMULMAX1-NEXT: vmv1r.v v9, v17
760 ; LMULMAX1-NEXT: vmv1r.v v10, v18
761 ; LMULMAX1-NEXT: vmv1r.v v11, v19
762 ; LMULMAX1-NEXT: vmv1r.v v12, v20
763 ; LMULMAX1-NEXT: vmv1r.v v13, v21
764 ; LMULMAX1-NEXT: vmv1r.v v14, v22
765 ; LMULMAX1-NEXT: vmv1r.v v15, v23
766 ; LMULMAX1-NEXT: vmv1r.v v16, v31
767 ; LMULMAX1-NEXT: vmv1r.v v17, v30
768 ; LMULMAX1-NEXT: vmv1r.v v18, v29
769 ; LMULMAX1-NEXT: vmv1r.v v19, v28
770 ; LMULMAX1-NEXT: vmv1r.v v20, v27
771 ; LMULMAX1-NEXT: vmv1r.v v21, v26
772 ; LMULMAX1-NEXT: vmv1r.v v22, v25
773 ; LMULMAX1-NEXT: vmv1r.v v23, v24
774 ; LMULMAX1-NEXT: call ext2
775 ; LMULMAX1-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
776 ; LMULMAX1-NEXT: addi sp, sp, 16
778 %t = call <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2)
782 define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
783 ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
785 ; LMULMAX8-NEXT: addi sp, sp, -256
786 ; LMULMAX8-NEXT: .cfi_def_cfa_offset 256
787 ; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
788 ; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
789 ; LMULMAX8-NEXT: .cfi_offset ra, -8
790 ; LMULMAX8-NEXT: .cfi_offset s0, -16
791 ; LMULMAX8-NEXT: addi s0, sp, 256
792 ; LMULMAX8-NEXT: .cfi_def_cfa s0, 0
793 ; LMULMAX8-NEXT: andi sp, sp, -128
794 ; LMULMAX8-NEXT: li a2, 32
795 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma
796 ; LMULMAX8-NEXT: vle32.v v24, (a0)
797 ; LMULMAX8-NEXT: mv a3, sp
798 ; LMULMAX8-NEXT: mv a0, sp
799 ; LMULMAX8-NEXT: li a2, 42
800 ; LMULMAX8-NEXT: vse32.v v8, (a3)
801 ; LMULMAX8-NEXT: vmv.v.v v8, v24
802 ; LMULMAX8-NEXT: call ext3
803 ; LMULMAX8-NEXT: addi sp, s0, -256
804 ; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
805 ; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
806 ; LMULMAX8-NEXT: addi sp, sp, 256
809 ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
811 ; LMULMAX4-NEXT: addi sp, sp, -256
812 ; LMULMAX4-NEXT: .cfi_def_cfa_offset 256
813 ; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
814 ; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
815 ; LMULMAX4-NEXT: .cfi_offset ra, -8
816 ; LMULMAX4-NEXT: .cfi_offset s0, -16
817 ; LMULMAX4-NEXT: addi s0, sp, 256
818 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0
819 ; LMULMAX4-NEXT: andi sp, sp, -128
820 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
821 ; LMULMAX4-NEXT: vle32.v v24, (a0)
822 ; LMULMAX4-NEXT: addi a0, a0, 64
823 ; LMULMAX4-NEXT: vle32.v v28, (a0)
824 ; LMULMAX4-NEXT: addi a0, sp, 64
825 ; LMULMAX4-NEXT: vse32.v v12, (a0)
826 ; LMULMAX4-NEXT: mv a1, sp
827 ; LMULMAX4-NEXT: mv a0, sp
828 ; LMULMAX4-NEXT: li a3, 42
829 ; LMULMAX4-NEXT: vse32.v v8, (a1)
830 ; LMULMAX4-NEXT: vmv.v.v v8, v24
831 ; LMULMAX4-NEXT: vmv.v.v v12, v28
832 ; LMULMAX4-NEXT: call ext3
833 ; LMULMAX4-NEXT: addi sp, s0, -256
834 ; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
835 ; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
836 ; LMULMAX4-NEXT: addi sp, sp, 256
839 ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
841 ; LMULMAX2-NEXT: addi sp, sp, -256
842 ; LMULMAX2-NEXT: .cfi_def_cfa_offset 256
843 ; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
844 ; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
845 ; LMULMAX2-NEXT: .cfi_offset ra, -8
846 ; LMULMAX2-NEXT: .cfi_offset s0, -16
847 ; LMULMAX2-NEXT: addi s0, sp, 256
848 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
849 ; LMULMAX2-NEXT: andi sp, sp, -128
850 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
851 ; LMULMAX2-NEXT: vle32.v v24, (a0)
852 ; LMULMAX2-NEXT: addi a1, a0, 32
853 ; LMULMAX2-NEXT: vle32.v v26, (a1)
854 ; LMULMAX2-NEXT: addi a1, a0, 64
855 ; LMULMAX2-NEXT: vle32.v v28, (a1)
856 ; LMULMAX2-NEXT: addi a0, a0, 96
857 ; LMULMAX2-NEXT: vle32.v v30, (a0)
858 ; LMULMAX2-NEXT: addi a0, sp, 96
859 ; LMULMAX2-NEXT: vse32.v v14, (a0)
860 ; LMULMAX2-NEXT: addi a0, sp, 64
861 ; LMULMAX2-NEXT: vse32.v v12, (a0)
862 ; LMULMAX2-NEXT: addi a0, sp, 32
863 ; LMULMAX2-NEXT: vse32.v v10, (a0)
864 ; LMULMAX2-NEXT: mv a1, sp
865 ; LMULMAX2-NEXT: mv a0, sp
866 ; LMULMAX2-NEXT: li a5, 42
867 ; LMULMAX2-NEXT: vse32.v v8, (a1)
868 ; LMULMAX2-NEXT: vmv.v.v v8, v24
869 ; LMULMAX2-NEXT: vmv.v.v v10, v26
870 ; LMULMAX2-NEXT: vmv.v.v v12, v28
871 ; LMULMAX2-NEXT: vmv.v.v v14, v30
872 ; LMULMAX2-NEXT: call ext3
873 ; LMULMAX2-NEXT: addi sp, s0, -256
874 ; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
875 ; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
876 ; LMULMAX2-NEXT: addi sp, sp, 256
879 ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
881 ; LMULMAX1-NEXT: addi sp, sp, -256
882 ; LMULMAX1-NEXT: .cfi_def_cfa_offset 256
883 ; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
884 ; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
885 ; LMULMAX1-NEXT: sd s1, 232(sp) # 8-byte Folded Spill
886 ; LMULMAX1-NEXT: .cfi_offset ra, -8
887 ; LMULMAX1-NEXT: .cfi_offset s0, -16
888 ; LMULMAX1-NEXT: .cfi_offset s1, -24
889 ; LMULMAX1-NEXT: addi s0, sp, 256
890 ; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
891 ; LMULMAX1-NEXT: andi sp, sp, -128
892 ; LMULMAX1-NEXT: mv s1, sp
893 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
894 ; LMULMAX1-NEXT: vle32.v v24, (a0)
895 ; LMULMAX1-NEXT: addi a1, a0, 16
896 ; LMULMAX1-NEXT: vle32.v v25, (a1)
897 ; LMULMAX1-NEXT: addi a1, a0, 32
898 ; LMULMAX1-NEXT: vle32.v v26, (a1)
899 ; LMULMAX1-NEXT: addi a1, a0, 48
900 ; LMULMAX1-NEXT: vle32.v v27, (a1)
901 ; LMULMAX1-NEXT: addi a1, a0, 64
902 ; LMULMAX1-NEXT: vle32.v v28, (a1)
903 ; LMULMAX1-NEXT: addi a1, a0, 80
904 ; LMULMAX1-NEXT: vle32.v v29, (a1)
905 ; LMULMAX1-NEXT: addi a1, a0, 96
906 ; LMULMAX1-NEXT: vle32.v v30, (a1)
907 ; LMULMAX1-NEXT: addi a0, a0, 112
908 ; LMULMAX1-NEXT: vle32.v v31, (a0)
909 ; LMULMAX1-NEXT: ld a1, 0(s0)
910 ; LMULMAX1-NEXT: addi sp, sp, -16
911 ; LMULMAX1-NEXT: addi a0, s1, 112
912 ; LMULMAX1-NEXT: vse32.v v15, (a0)
913 ; LMULMAX1-NEXT: addi a0, s1, 96
914 ; LMULMAX1-NEXT: vse32.v v14, (a0)
915 ; LMULMAX1-NEXT: addi a0, s1, 80
916 ; LMULMAX1-NEXT: vse32.v v13, (a0)
917 ; LMULMAX1-NEXT: addi a0, s1, 64
918 ; LMULMAX1-NEXT: vse32.v v12, (a0)
919 ; LMULMAX1-NEXT: addi a0, s1, 48
920 ; LMULMAX1-NEXT: vse32.v v11, (a0)
921 ; LMULMAX1-NEXT: addi a0, s1, 32
922 ; LMULMAX1-NEXT: vse32.v v10, (a0)
923 ; LMULMAX1-NEXT: addi a0, s1, 16
924 ; LMULMAX1-NEXT: vse32.v v9, (a0)
925 ; LMULMAX1-NEXT: mv a0, s1
926 ; LMULMAX1-NEXT: vse32.v v8, (a0)
927 ; LMULMAX1-NEXT: li a0, 42
928 ; LMULMAX1-NEXT: sd a0, 8(sp)
929 ; LMULMAX1-NEXT: mv a0, s1
930 ; LMULMAX1-NEXT: sd a1, 0(sp)
931 ; LMULMAX1-NEXT: vmv.v.v v8, v24
932 ; LMULMAX1-NEXT: vmv.v.v v9, v25
933 ; LMULMAX1-NEXT: vmv.v.v v10, v26
934 ; LMULMAX1-NEXT: vmv.v.v v11, v27
935 ; LMULMAX1-NEXT: vmv.v.v v12, v28
936 ; LMULMAX1-NEXT: vmv.v.v v13, v29
937 ; LMULMAX1-NEXT: vmv.v.v v14, v30
938 ; LMULMAX1-NEXT: vmv.v.v v15, v31
939 ; LMULMAX1-NEXT: call ext3
940 ; LMULMAX1-NEXT: addi sp, sp, 16
941 ; LMULMAX1-NEXT: addi sp, s0, -256
942 ; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
943 ; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
944 ; LMULMAX1-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
945 ; LMULMAX1-NEXT: addi sp, sp, 256
947 %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
951 ; Test various configurations of split vector types where the values are split
952 ; across both registers and the stack.
953 ; LMUL8: Ins: v8,v9,v10,v11,v12, v16m8 y[0:31], a0+0 z[0:31]
954 ; LMUL4: Ins: v8,v9,v10,v11,v12, v16m4 y[0:15], v20m4 y[16:31], a0+0 z[0:15],
956 ; LMUL2: Ins: v8,v9,v10,v11,v12, v14m2 y[0:7], v16m2 y[8:15], v18m2 y[16:23],
957 ; v20m2 y[24:31], v22m2 z[0:7], a1+0 z[8:15], a1+32 z[16:23],
959 ; LMUL1: Ins: v8,v9,v10,v11,v12, v13 y[0:3], v14 y[4:7], v15 y[8:11],
960 ; v16 y[12:15], v17 y[16:19], v18 y[20:23], v19 y[24:27],
961 ; v20 y[28:31], v21 z[0:3], v22 z[4:7], v23 z[8:11],
962 ; a1+0 z[12:15], a1+16 z[16:19], a1+32 z[20:23], a1+48 z[24:27],
964 define <32 x i32> @split_vector_args(<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>, <32 x i32> %y, <32 x i32> %z) {
965 ; LMULMAX8-LABEL: split_vector_args:
967 ; LMULMAX8-NEXT: li a1, 32
968 ; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma
969 ; LMULMAX8-NEXT: vle32.v v8, (a0)
970 ; LMULMAX8-NEXT: vadd.vv v8, v16, v8
973 ; LMULMAX4-LABEL: split_vector_args:
975 ; LMULMAX4-NEXT: addi a1, a0, 64
976 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
977 ; LMULMAX4-NEXT: vle32.v v8, (a0)
978 ; LMULMAX4-NEXT: vle32.v v12, (a1)
979 ; LMULMAX4-NEXT: vadd.vv v8, v16, v8
980 ; LMULMAX4-NEXT: vadd.vv v12, v20, v12
983 ; LMULMAX2-LABEL: split_vector_args:
985 ; LMULMAX2-NEXT: addi a1, a0, 64
986 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
987 ; LMULMAX2-NEXT: vle32.v v10, (a0)
988 ; LMULMAX2-NEXT: addi a0, a0, 32
989 ; LMULMAX2-NEXT: vle32.v v12, (a0)
990 ; LMULMAX2-NEXT: vle32.v v24, (a1)
991 ; LMULMAX2-NEXT: vadd.vv v8, v14, v22
992 ; LMULMAX2-NEXT: vadd.vv v10, v16, v10
993 ; LMULMAX2-NEXT: vadd.vv v12, v18, v12
994 ; LMULMAX2-NEXT: vadd.vv v14, v20, v24
997 ; LMULMAX1-LABEL: split_vector_args:
999 ; LMULMAX1-NEXT: addi a1, a0, 64
1000 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1001 ; LMULMAX1-NEXT: vle32.v v24, (a1)
1002 ; LMULMAX1-NEXT: addi a1, a0, 48
1003 ; LMULMAX1-NEXT: vle32.v v25, (a1)
1004 ; LMULMAX1-NEXT: addi a1, a0, 32
1005 ; LMULMAX1-NEXT: vle32.v v26, (a1)
1006 ; LMULMAX1-NEXT: addi a1, a0, 16
1007 ; LMULMAX1-NEXT: vle32.v v12, (a1)
1008 ; LMULMAX1-NEXT: vle32.v v11, (a0)
1009 ; LMULMAX1-NEXT: vadd.vv v8, v13, v21
1010 ; LMULMAX1-NEXT: vadd.vv v9, v14, v22
1011 ; LMULMAX1-NEXT: vadd.vv v10, v15, v23
1012 ; LMULMAX1-NEXT: vadd.vv v11, v16, v11
1013 ; LMULMAX1-NEXT: vadd.vv v12, v17, v12
1014 ; LMULMAX1-NEXT: vadd.vv v13, v18, v26
1015 ; LMULMAX1-NEXT: vadd.vv v14, v19, v25
1016 ; LMULMAX1-NEXT: vadd.vv v15, v20, v24
1017 ; LMULMAX1-NEXT: ret
1018 %v0 = add <32 x i32> %y, %z
1022 define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) {
1023 ; LMULMAX8-LABEL: call_split_vector_args:
1024 ; LMULMAX8: # %bb.0:
1025 ; LMULMAX8-NEXT: addi sp, sp, -256
1026 ; LMULMAX8-NEXT: .cfi_def_cfa_offset 256
1027 ; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
1028 ; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
1029 ; LMULMAX8-NEXT: .cfi_offset ra, -8
1030 ; LMULMAX8-NEXT: .cfi_offset s0, -16
1031 ; LMULMAX8-NEXT: addi s0, sp, 256
1032 ; LMULMAX8-NEXT: .cfi_def_cfa s0, 0
1033 ; LMULMAX8-NEXT: andi sp, sp, -128
1034 ; LMULMAX8-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1035 ; LMULMAX8-NEXT: vle32.v v8, (a0)
1036 ; LMULMAX8-NEXT: li a0, 32
1037 ; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1038 ; LMULMAX8-NEXT: vle32.v v16, (a1)
1039 ; LMULMAX8-NEXT: mv a1, sp
1040 ; LMULMAX8-NEXT: mv a0, sp
1041 ; LMULMAX8-NEXT: vse32.v v16, (a1)
1042 ; LMULMAX8-NEXT: vmv1r.v v9, v8
1043 ; LMULMAX8-NEXT: vmv1r.v v10, v8
1044 ; LMULMAX8-NEXT: vmv1r.v v11, v8
1045 ; LMULMAX8-NEXT: vmv1r.v v12, v8
1046 ; LMULMAX8-NEXT: call split_vector_args
1047 ; LMULMAX8-NEXT: addi sp, s0, -256
1048 ; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
1049 ; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
1050 ; LMULMAX8-NEXT: addi sp, sp, 256
1051 ; LMULMAX8-NEXT: ret
1053 ; LMULMAX4-LABEL: call_split_vector_args:
1054 ; LMULMAX4: # %bb.0:
1055 ; LMULMAX4-NEXT: addi sp, sp, -256
1056 ; LMULMAX4-NEXT: .cfi_def_cfa_offset 256
1057 ; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
1058 ; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
1059 ; LMULMAX4-NEXT: .cfi_offset ra, -8
1060 ; LMULMAX4-NEXT: .cfi_offset s0, -16
1061 ; LMULMAX4-NEXT: addi s0, sp, 256
1062 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0
1063 ; LMULMAX4-NEXT: andi sp, sp, -128
1064 ; LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1065 ; LMULMAX4-NEXT: vle32.v v8, (a0)
1066 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1067 ; LMULMAX4-NEXT: vle32.v v16, (a1)
1068 ; LMULMAX4-NEXT: addi a0, a1, 64
1069 ; LMULMAX4-NEXT: vle32.v v20, (a0)
1070 ; LMULMAX4-NEXT: addi a0, sp, 64
1071 ; LMULMAX4-NEXT: vse32.v v20, (a0)
1072 ; LMULMAX4-NEXT: mv a1, sp
1073 ; LMULMAX4-NEXT: mv a0, sp
1074 ; LMULMAX4-NEXT: vse32.v v16, (a1)
1075 ; LMULMAX4-NEXT: vmv1r.v v9, v8
1076 ; LMULMAX4-NEXT: vmv1r.v v10, v8
1077 ; LMULMAX4-NEXT: vmv1r.v v11, v8
1078 ; LMULMAX4-NEXT: vmv1r.v v12, v8
1079 ; LMULMAX4-NEXT: call split_vector_args
1080 ; LMULMAX4-NEXT: addi sp, s0, -256
1081 ; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
1082 ; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
1083 ; LMULMAX4-NEXT: addi sp, sp, 256
1084 ; LMULMAX4-NEXT: ret
1086 ; LMULMAX2-LABEL: call_split_vector_args:
1087 ; LMULMAX2: # %bb.0:
1088 ; LMULMAX2-NEXT: addi sp, sp, -128
1089 ; LMULMAX2-NEXT: .cfi_def_cfa_offset 128
1090 ; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
1091 ; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
1092 ; LMULMAX2-NEXT: .cfi_offset ra, -8
1093 ; LMULMAX2-NEXT: .cfi_offset s0, -16
1094 ; LMULMAX2-NEXT: addi s0, sp, 128
1095 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
1096 ; LMULMAX2-NEXT: andi sp, sp, -128
1097 ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1098 ; LMULMAX2-NEXT: vle32.v v8, (a0)
1099 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1100 ; LMULMAX2-NEXT: vle32.v v14, (a1)
1101 ; LMULMAX2-NEXT: addi a0, a1, 32
1102 ; LMULMAX2-NEXT: vle32.v v16, (a0)
1103 ; LMULMAX2-NEXT: addi a0, a1, 64
1104 ; LMULMAX2-NEXT: vle32.v v18, (a0)
1105 ; LMULMAX2-NEXT: addi a0, a1, 96
1106 ; LMULMAX2-NEXT: vle32.v v20, (a0)
1107 ; LMULMAX2-NEXT: addi a0, sp, 64
1108 ; LMULMAX2-NEXT: vse32.v v20, (a0)
1109 ; LMULMAX2-NEXT: addi a0, sp, 32
1110 ; LMULMAX2-NEXT: vse32.v v18, (a0)
1111 ; LMULMAX2-NEXT: mv a1, sp
1112 ; LMULMAX2-NEXT: mv a0, sp
1113 ; LMULMAX2-NEXT: vse32.v v16, (a1)
1114 ; LMULMAX2-NEXT: vmv1r.v v9, v8
1115 ; LMULMAX2-NEXT: vmv1r.v v10, v8
1116 ; LMULMAX2-NEXT: vmv1r.v v11, v8
1117 ; LMULMAX2-NEXT: vmv1r.v v12, v8
1118 ; LMULMAX2-NEXT: vmv.v.v v22, v14
1119 ; LMULMAX2-NEXT: call split_vector_args
1120 ; LMULMAX2-NEXT: addi sp, s0, -128
1121 ; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
1122 ; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
1123 ; LMULMAX2-NEXT: addi sp, sp, 128
1124 ; LMULMAX2-NEXT: ret
1126 ; LMULMAX1-LABEL: call_split_vector_args:
1127 ; LMULMAX1: # %bb.0:
1128 ; LMULMAX1-NEXT: addi sp, sp, -128
1129 ; LMULMAX1-NEXT: .cfi_def_cfa_offset 128
1130 ; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
1131 ; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
1132 ; LMULMAX1-NEXT: .cfi_offset ra, -8
1133 ; LMULMAX1-NEXT: .cfi_offset s0, -16
1134 ; LMULMAX1-NEXT: addi s0, sp, 128
1135 ; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
1136 ; LMULMAX1-NEXT: andi sp, sp, -128
1137 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1138 ; LMULMAX1-NEXT: vle32.v v8, (a0)
1139 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1140 ; LMULMAX1-NEXT: vle32.v v13, (a1)
1141 ; LMULMAX1-NEXT: addi a0, a1, 32
1142 ; LMULMAX1-NEXT: vle32.v v15, (a0)
1143 ; LMULMAX1-NEXT: addi a0, a1, 16
1144 ; LMULMAX1-NEXT: vle32.v v14, (a0)
1145 ; LMULMAX1-NEXT: addi a0, a1, 48
1146 ; LMULMAX1-NEXT: vle32.v v16, (a0)
1147 ; LMULMAX1-NEXT: addi a0, a1, 64
1148 ; LMULMAX1-NEXT: vle32.v v17, (a0)
1149 ; LMULMAX1-NEXT: addi a0, a1, 80
1150 ; LMULMAX1-NEXT: vle32.v v18, (a0)
1151 ; LMULMAX1-NEXT: addi a0, a1, 96
1152 ; LMULMAX1-NEXT: vle32.v v19, (a0)
1153 ; LMULMAX1-NEXT: addi a0, a1, 112
1154 ; LMULMAX1-NEXT: vle32.v v20, (a0)
1155 ; LMULMAX1-NEXT: addi a0, sp, 64
1156 ; LMULMAX1-NEXT: vse32.v v20, (a0)
1157 ; LMULMAX1-NEXT: addi a0, sp, 48
1158 ; LMULMAX1-NEXT: vse32.v v19, (a0)
1159 ; LMULMAX1-NEXT: addi a0, sp, 32
1160 ; LMULMAX1-NEXT: vse32.v v18, (a0)
1161 ; LMULMAX1-NEXT: addi a0, sp, 16
1162 ; LMULMAX1-NEXT: vse32.v v17, (a0)
1163 ; LMULMAX1-NEXT: mv a1, sp
1164 ; LMULMAX1-NEXT: mv a0, sp
1165 ; LMULMAX1-NEXT: vse32.v v16, (a1)
1166 ; LMULMAX1-NEXT: vmv1r.v v9, v8
1167 ; LMULMAX1-NEXT: vmv1r.v v10, v8
1168 ; LMULMAX1-NEXT: vmv1r.v v11, v8
1169 ; LMULMAX1-NEXT: vmv1r.v v12, v8
1170 ; LMULMAX1-NEXT: vmv.v.v v21, v13
1171 ; LMULMAX1-NEXT: vmv.v.v v22, v14
1172 ; LMULMAX1-NEXT: vmv.v.v v23, v15
1173 ; LMULMAX1-NEXT: call split_vector_args
1174 ; LMULMAX1-NEXT: addi sp, s0, -128
1175 ; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
1176 ; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
1177 ; LMULMAX1-NEXT: addi sp, sp, 128
1178 ; LMULMAX1-NEXT: ret
1179 %a = load <2 x i32>, ptr %pa
1180 %b = load <32 x i32>, ptr %pb
1181 %r = call <32 x i32> @split_vector_args(<2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <32 x i32> %b, <32 x i32> %b)
1185 ; A rather pathological test case in which we exhaust all vector registers and
1186 ; all scalar registers, forcing %z and %8 to go through the stack.
1187 define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8) {
1188 ; LMULMAX8-LABEL: vector_arg_via_stack:
1189 ; LMULMAX8: # %bb.0:
1190 ; LMULMAX8-NEXT: li a0, 32
1191 ; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1192 ; LMULMAX8-NEXT: vle32.v v16, (sp)
1193 ; LMULMAX8-NEXT: vadd.vv v8, v8, v16
1194 ; LMULMAX8-NEXT: ret
1196 ; LMULMAX4-LABEL: vector_arg_via_stack:
1197 ; LMULMAX4: # %bb.0:
1198 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1199 ; LMULMAX4-NEXT: vle32.v v16, (sp)
1200 ; LMULMAX4-NEXT: addi a0, sp, 64
1201 ; LMULMAX4-NEXT: vle32.v v20, (a0)
1202 ; LMULMAX4-NEXT: vadd.vv v8, v8, v16
1203 ; LMULMAX4-NEXT: vadd.vv v12, v12, v20
1204 ; LMULMAX4-NEXT: ret
1206 ; LMULMAX2-LABEL: vector_arg_via_stack:
1207 ; LMULMAX2: # %bb.0:
1208 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1209 ; LMULMAX2-NEXT: addi a0, sp, 64
1210 ; LMULMAX2-NEXT: vle32.v v16, (a0)
1211 ; LMULMAX2-NEXT: vle32.v v18, (sp)
1212 ; LMULMAX2-NEXT: addi a0, sp, 32
1213 ; LMULMAX2-NEXT: vle32.v v20, (a0)
1214 ; LMULMAX2-NEXT: addi a0, sp, 96
1215 ; LMULMAX2-NEXT: vle32.v v22, (a0)
1216 ; LMULMAX2-NEXT: vadd.vv v8, v8, v18
1217 ; LMULMAX2-NEXT: vadd.vv v10, v10, v20
1218 ; LMULMAX2-NEXT: vadd.vv v12, v12, v16
1219 ; LMULMAX2-NEXT: vadd.vv v14, v14, v22
1220 ; LMULMAX2-NEXT: ret
1222 ; LMULMAX1-LABEL: vector_arg_via_stack:
1223 ; LMULMAX1: # %bb.0:
1224 ; LMULMAX1-NEXT: addi a0, sp, 112
1225 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1226 ; LMULMAX1-NEXT: vle32.v v16, (a0)
1227 ; LMULMAX1-NEXT: addi a0, sp, 96
1228 ; LMULMAX1-NEXT: vle32.v v17, (a0)
1229 ; LMULMAX1-NEXT: addi a0, sp, 80
1230 ; LMULMAX1-NEXT: vle32.v v18, (a0)
1231 ; LMULMAX1-NEXT: addi a0, sp, 64
1232 ; LMULMAX1-NEXT: vle32.v v19, (a0)
1233 ; LMULMAX1-NEXT: addi a0, sp, 32
1234 ; LMULMAX1-NEXT: vle32.v v20, (a0)
1235 ; LMULMAX1-NEXT: vle32.v v21, (sp)
1236 ; LMULMAX1-NEXT: addi a0, sp, 16
1237 ; LMULMAX1-NEXT: vle32.v v22, (a0)
1238 ; LMULMAX1-NEXT: addi a0, sp, 48
1239 ; LMULMAX1-NEXT: vle32.v v23, (a0)
1240 ; LMULMAX1-NEXT: vadd.vv v8, v8, v21
1241 ; LMULMAX1-NEXT: vadd.vv v9, v9, v22
1242 ; LMULMAX1-NEXT: vadd.vv v10, v10, v20
1243 ; LMULMAX1-NEXT: vadd.vv v11, v11, v23
1244 ; LMULMAX1-NEXT: vadd.vv v12, v12, v19
1245 ; LMULMAX1-NEXT: vadd.vv v13, v13, v18
1246 ; LMULMAX1-NEXT: vadd.vv v14, v14, v17
1247 ; LMULMAX1-NEXT: vadd.vv v15, v15, v16
1248 ; LMULMAX1-NEXT: ret
1249 %s = add <32 x i32> %x, %z
1253 ; Calling the function above. Ensure we pass the arguments correctly.
1254 define <32 x i32> @pass_vector_arg_via_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) {
1255 ; LMULMAX8-LABEL: pass_vector_arg_via_stack:
1256 ; LMULMAX8: # %bb.0:
1257 ; LMULMAX8-NEXT: addi sp, sp, -144
1258 ; LMULMAX8-NEXT: .cfi_def_cfa_offset 144
1259 ; LMULMAX8-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
1260 ; LMULMAX8-NEXT: .cfi_offset ra, -8
1261 ; LMULMAX8-NEXT: li a0, 32
1262 ; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1263 ; LMULMAX8-NEXT: vmv.v.i v8, 0
1264 ; LMULMAX8-NEXT: vse32.v v8, (sp)
1265 ; LMULMAX8-NEXT: li a0, 8
1266 ; LMULMAX8-NEXT: li a1, 1
1267 ; LMULMAX8-NEXT: li a2, 2
1268 ; LMULMAX8-NEXT: li a3, 3
1269 ; LMULMAX8-NEXT: li a4, 4
1270 ; LMULMAX8-NEXT: li a5, 5
1271 ; LMULMAX8-NEXT: li a6, 6
1272 ; LMULMAX8-NEXT: li a7, 7
1273 ; LMULMAX8-NEXT: sd a0, 128(sp)
1274 ; LMULMAX8-NEXT: li a0, 0
1275 ; LMULMAX8-NEXT: vmv.v.i v16, 0
1276 ; LMULMAX8-NEXT: call vector_arg_via_stack
1277 ; LMULMAX8-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
1278 ; LMULMAX8-NEXT: addi sp, sp, 144
1279 ; LMULMAX8-NEXT: ret
1281 ; LMULMAX4-LABEL: pass_vector_arg_via_stack:
1282 ; LMULMAX4: # %bb.0:
1283 ; LMULMAX4-NEXT: addi sp, sp, -144
1284 ; LMULMAX4-NEXT: .cfi_def_cfa_offset 144
1285 ; LMULMAX4-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
1286 ; LMULMAX4-NEXT: .cfi_offset ra, -8
1287 ; LMULMAX4-NEXT: li a0, 8
1288 ; LMULMAX4-NEXT: sd a0, 128(sp)
1289 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1290 ; LMULMAX4-NEXT: vmv.v.i v8, 0
1291 ; LMULMAX4-NEXT: vse32.v v8, (sp)
1292 ; LMULMAX4-NEXT: addi a0, sp, 64
1293 ; LMULMAX4-NEXT: li a1, 1
1294 ; LMULMAX4-NEXT: li a2, 2
1295 ; LMULMAX4-NEXT: li a3, 3
1296 ; LMULMAX4-NEXT: li a4, 4
1297 ; LMULMAX4-NEXT: li a5, 5
1298 ; LMULMAX4-NEXT: li a6, 6
1299 ; LMULMAX4-NEXT: li a7, 7
1300 ; LMULMAX4-NEXT: vse32.v v8, (a0)
1301 ; LMULMAX4-NEXT: li a0, 0
1302 ; LMULMAX4-NEXT: vmv.v.i v12, 0
1303 ; LMULMAX4-NEXT: vmv.v.i v16, 0
1304 ; LMULMAX4-NEXT: vmv.v.i v20, 0
1305 ; LMULMAX4-NEXT: call vector_arg_via_stack
1306 ; LMULMAX4-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
1307 ; LMULMAX4-NEXT: addi sp, sp, 144
1308 ; LMULMAX4-NEXT: ret
1310 ; LMULMAX2-LABEL: pass_vector_arg_via_stack:
1311 ; LMULMAX2: # %bb.0:
1312 ; LMULMAX2-NEXT: addi sp, sp, -144
1313 ; LMULMAX2-NEXT: .cfi_def_cfa_offset 144
1314 ; LMULMAX2-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
1315 ; LMULMAX2-NEXT: .cfi_offset ra, -8
1316 ; LMULMAX2-NEXT: li a0, 8
1317 ; LMULMAX2-NEXT: sd a0, 128(sp)
1318 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1319 ; LMULMAX2-NEXT: vmv.v.i v8, 0
1320 ; LMULMAX2-NEXT: vse32.v v8, (sp)
1321 ; LMULMAX2-NEXT: addi a0, sp, 96
1322 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1323 ; LMULMAX2-NEXT: addi a0, sp, 64
1324 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1325 ; LMULMAX2-NEXT: addi a0, sp, 32
1326 ; LMULMAX2-NEXT: li a1, 1
1327 ; LMULMAX2-NEXT: li a2, 2
1328 ; LMULMAX2-NEXT: li a3, 3
1329 ; LMULMAX2-NEXT: li a4, 4
1330 ; LMULMAX2-NEXT: li a5, 5
1331 ; LMULMAX2-NEXT: li a6, 6
1332 ; LMULMAX2-NEXT: li a7, 7
1333 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1334 ; LMULMAX2-NEXT: li a0, 0
1335 ; LMULMAX2-NEXT: vmv.v.i v10, 0
1336 ; LMULMAX2-NEXT: vmv.v.i v12, 0
1337 ; LMULMAX2-NEXT: vmv.v.i v14, 0
1338 ; LMULMAX2-NEXT: vmv.v.i v16, 0
1339 ; LMULMAX2-NEXT: vmv.v.i v18, 0
1340 ; LMULMAX2-NEXT: vmv.v.i v20, 0
1341 ; LMULMAX2-NEXT: vmv.v.i v22, 0
1342 ; LMULMAX2-NEXT: call vector_arg_via_stack
1343 ; LMULMAX2-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
1344 ; LMULMAX2-NEXT: addi sp, sp, 144
1345 ; LMULMAX2-NEXT: ret
1347 ; LMULMAX1-LABEL: pass_vector_arg_via_stack:
1348 ; LMULMAX1: # %bb.0:
1349 ; LMULMAX1-NEXT: addi sp, sp, -144
1350 ; LMULMAX1-NEXT: .cfi_def_cfa_offset 144
1351 ; LMULMAX1-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
1352 ; LMULMAX1-NEXT: .cfi_offset ra, -8
1353 ; LMULMAX1-NEXT: li a0, 8
1354 ; LMULMAX1-NEXT: sd a0, 128(sp)
1355 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1356 ; LMULMAX1-NEXT: vmv.v.i v8, 0
1357 ; LMULMAX1-NEXT: vse32.v v8, (sp)
1358 ; LMULMAX1-NEXT: addi a0, sp, 112
1359 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1360 ; LMULMAX1-NEXT: addi a0, sp, 96
1361 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1362 ; LMULMAX1-NEXT: addi a0, sp, 80
1363 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1364 ; LMULMAX1-NEXT: addi a0, sp, 64
1365 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1366 ; LMULMAX1-NEXT: addi a0, sp, 48
1367 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1368 ; LMULMAX1-NEXT: addi a0, sp, 32
1369 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1370 ; LMULMAX1-NEXT: addi a0, sp, 16
1371 ; LMULMAX1-NEXT: li a1, 1
1372 ; LMULMAX1-NEXT: li a2, 2
1373 ; LMULMAX1-NEXT: li a3, 3
1374 ; LMULMAX1-NEXT: li a4, 4
1375 ; LMULMAX1-NEXT: li a5, 5
1376 ; LMULMAX1-NEXT: li a6, 6
1377 ; LMULMAX1-NEXT: li a7, 7
1378 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1379 ; LMULMAX1-NEXT: li a0, 0
1380 ; LMULMAX1-NEXT: vmv.v.i v9, 0
1381 ; LMULMAX1-NEXT: vmv.v.i v10, 0
1382 ; LMULMAX1-NEXT: vmv.v.i v11, 0
1383 ; LMULMAX1-NEXT: vmv.v.i v12, 0
1384 ; LMULMAX1-NEXT: vmv.v.i v13, 0
1385 ; LMULMAX1-NEXT: vmv.v.i v14, 0
1386 ; LMULMAX1-NEXT: vmv.v.i v15, 0
1387 ; LMULMAX1-NEXT: vmv.v.i v16, 0
1388 ; LMULMAX1-NEXT: vmv.v.i v17, 0
1389 ; LMULMAX1-NEXT: vmv.v.i v18, 0
1390 ; LMULMAX1-NEXT: vmv.v.i v19, 0
1391 ; LMULMAX1-NEXT: vmv.v.i v20, 0
1392 ; LMULMAX1-NEXT: vmv.v.i v21, 0
1393 ; LMULMAX1-NEXT: vmv.v.i v22, 0
1394 ; LMULMAX1-NEXT: vmv.v.i v23, 0
1395 ; LMULMAX1-NEXT: call vector_arg_via_stack
1396 ; LMULMAX1-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
1397 ; LMULMAX1-NEXT: addi sp, sp, 144
1398 ; LMULMAX1-NEXT: ret
1399 %s = call <32 x i32> @vector_arg_via_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8)
1403 ; Another pathological case but where a small mask vector must be passed on the
1405 define <4 x i1> @vector_mask_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8, <4 x i1> %9, <4 x i1> %10) {
1406 ; CHECK-LABEL: vector_mask_arg_via_stack:
1408 ; CHECK-NEXT: addi a0, sp, 136
1409 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1410 ; CHECK-NEXT: vlm.v v0, (a0)
1415 ; Calling the function above. Ensure we pass the mask arguments correctly. We
1416 ; legalize stores of small masks such that the value is at least byte-sized.
1417 define <4 x i1> @pass_vector_mask_arg_via_stack(<4 x i1> %v) {
1418 ; LMULMAX8-LABEL: pass_vector_mask_arg_via_stack:
1419 ; LMULMAX8: # %bb.0:
1420 ; LMULMAX8-NEXT: addi sp, sp, -160
1421 ; LMULMAX8-NEXT: .cfi_def_cfa_offset 160
1422 ; LMULMAX8-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
1423 ; LMULMAX8-NEXT: .cfi_offset ra, -8
1424 ; LMULMAX8-NEXT: li a0, 32
1425 ; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1426 ; LMULMAX8-NEXT: vmv.v.i v8, 0
1427 ; LMULMAX8-NEXT: vse32.v v8, (sp)
1428 ; LMULMAX8-NEXT: li a0, 8
1429 ; LMULMAX8-NEXT: sd a0, 128(sp)
1430 ; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1431 ; LMULMAX8-NEXT: vmv.v.i v16, 0
1432 ; LMULMAX8-NEXT: vmerge.vim v16, v16, 1, v0
1433 ; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1434 ; LMULMAX8-NEXT: vmv.v.i v17, 0
1435 ; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
1436 ; LMULMAX8-NEXT: vmv.v.v v17, v16
1437 ; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1438 ; LMULMAX8-NEXT: vmsne.vi v16, v17, 0
1439 ; LMULMAX8-NEXT: addi a0, sp, 136
1440 ; LMULMAX8-NEXT: li a5, 5
1441 ; LMULMAX8-NEXT: li a6, 6
1442 ; LMULMAX8-NEXT: li a7, 7
1443 ; LMULMAX8-NEXT: vsm.v v16, (a0)
1444 ; LMULMAX8-NEXT: li a0, 0
1445 ; LMULMAX8-NEXT: li a1, 0
1446 ; LMULMAX8-NEXT: li a2, 0
1447 ; LMULMAX8-NEXT: li a3, 0
1448 ; LMULMAX8-NEXT: li a4, 0
1449 ; LMULMAX8-NEXT: vmv8r.v v16, v8
1450 ; LMULMAX8-NEXT: call vector_mask_arg_via_stack
1451 ; LMULMAX8-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
1452 ; LMULMAX8-NEXT: addi sp, sp, 160
1453 ; LMULMAX8-NEXT: ret
1455 ; LMULMAX4-LABEL: pass_vector_mask_arg_via_stack:
1456 ; LMULMAX4: # %bb.0:
1457 ; LMULMAX4-NEXT: addi sp, sp, -160
1458 ; LMULMAX4-NEXT: .cfi_def_cfa_offset 160
1459 ; LMULMAX4-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
1460 ; LMULMAX4-NEXT: .cfi_offset ra, -8
1461 ; LMULMAX4-NEXT: li a0, 8
1462 ; LMULMAX4-NEXT: sd a0, 128(sp)
1463 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1464 ; LMULMAX4-NEXT: vmv.v.i v8, 0
1465 ; LMULMAX4-NEXT: vse32.v v8, (sp)
1466 ; LMULMAX4-NEXT: addi a0, sp, 64
1467 ; LMULMAX4-NEXT: vse32.v v8, (a0)
1468 ; LMULMAX4-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1469 ; LMULMAX4-NEXT: vmv.v.i v12, 0
1470 ; LMULMAX4-NEXT: vmerge.vim v12, v12, 1, v0
1471 ; LMULMAX4-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1472 ; LMULMAX4-NEXT: vmv.v.i v13, 0
1473 ; LMULMAX4-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
1474 ; LMULMAX4-NEXT: vmv.v.v v13, v12
1475 ; LMULMAX4-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1476 ; LMULMAX4-NEXT: vmsne.vi v12, v13, 0
1477 ; LMULMAX4-NEXT: addi a0, sp, 136
1478 ; LMULMAX4-NEXT: li a5, 5
1479 ; LMULMAX4-NEXT: li a6, 6
1480 ; LMULMAX4-NEXT: li a7, 7
1481 ; LMULMAX4-NEXT: vsm.v v12, (a0)
1482 ; LMULMAX4-NEXT: li a0, 0
1483 ; LMULMAX4-NEXT: li a1, 0
1484 ; LMULMAX4-NEXT: li a2, 0
1485 ; LMULMAX4-NEXT: li a3, 0
1486 ; LMULMAX4-NEXT: li a4, 0
1487 ; LMULMAX4-NEXT: vmv4r.v v12, v8
1488 ; LMULMAX4-NEXT: vmv4r.v v16, v8
1489 ; LMULMAX4-NEXT: vmv4r.v v20, v8
1490 ; LMULMAX4-NEXT: call vector_mask_arg_via_stack
1491 ; LMULMAX4-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
1492 ; LMULMAX4-NEXT: addi sp, sp, 160
1493 ; LMULMAX4-NEXT: ret
1495 ; LMULMAX2-LABEL: pass_vector_mask_arg_via_stack:
1496 ; LMULMAX2: # %bb.0:
1497 ; LMULMAX2-NEXT: addi sp, sp, -160
1498 ; LMULMAX2-NEXT: .cfi_def_cfa_offset 160
1499 ; LMULMAX2-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
1500 ; LMULMAX2-NEXT: .cfi_offset ra, -8
1501 ; LMULMAX2-NEXT: li a0, 8
1502 ; LMULMAX2-NEXT: sd a0, 128(sp)
1503 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1504 ; LMULMAX2-NEXT: vmv.v.i v8, 0
1505 ; LMULMAX2-NEXT: vse32.v v8, (sp)
1506 ; LMULMAX2-NEXT: addi a0, sp, 96
1507 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1508 ; LMULMAX2-NEXT: addi a0, sp, 64
1509 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1510 ; LMULMAX2-NEXT: addi a0, sp, 32
1511 ; LMULMAX2-NEXT: vse32.v v8, (a0)
1512 ; LMULMAX2-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1513 ; LMULMAX2-NEXT: vmv.v.i v10, 0
1514 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
1515 ; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1516 ; LMULMAX2-NEXT: vmv.v.i v11, 0
1517 ; LMULMAX2-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
1518 ; LMULMAX2-NEXT: vmv.v.v v11, v10
1519 ; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1520 ; LMULMAX2-NEXT: vmsne.vi v10, v11, 0
1521 ; LMULMAX2-NEXT: addi a0, sp, 136
1522 ; LMULMAX2-NEXT: li a5, 5
1523 ; LMULMAX2-NEXT: li a6, 6
1524 ; LMULMAX2-NEXT: li a7, 7
1525 ; LMULMAX2-NEXT: vsm.v v10, (a0)
1526 ; LMULMAX2-NEXT: li a0, 0
1527 ; LMULMAX2-NEXT: li a1, 0
1528 ; LMULMAX2-NEXT: li a2, 0
1529 ; LMULMAX2-NEXT: li a3, 0
1530 ; LMULMAX2-NEXT: li a4, 0
1531 ; LMULMAX2-NEXT: vmv2r.v v10, v8
1532 ; LMULMAX2-NEXT: vmv2r.v v12, v8
1533 ; LMULMAX2-NEXT: vmv2r.v v14, v8
1534 ; LMULMAX2-NEXT: vmv2r.v v16, v8
1535 ; LMULMAX2-NEXT: vmv2r.v v18, v8
1536 ; LMULMAX2-NEXT: vmv2r.v v20, v8
1537 ; LMULMAX2-NEXT: vmv2r.v v22, v8
1538 ; LMULMAX2-NEXT: call vector_mask_arg_via_stack
1539 ; LMULMAX2-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
1540 ; LMULMAX2-NEXT: addi sp, sp, 160
1541 ; LMULMAX2-NEXT: ret
1543 ; LMULMAX1-LABEL: pass_vector_mask_arg_via_stack:
1544 ; LMULMAX1: # %bb.0:
1545 ; LMULMAX1-NEXT: addi sp, sp, -160
1546 ; LMULMAX1-NEXT: .cfi_def_cfa_offset 160
1547 ; LMULMAX1-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
1548 ; LMULMAX1-NEXT: .cfi_offset ra, -8
1549 ; LMULMAX1-NEXT: li a0, 8
1550 ; LMULMAX1-NEXT: sd a0, 128(sp)
1551 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1552 ; LMULMAX1-NEXT: vmv.v.i v8, 0
1553 ; LMULMAX1-NEXT: vse32.v v8, (sp)
1554 ; LMULMAX1-NEXT: addi a0, sp, 112
1555 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1556 ; LMULMAX1-NEXT: addi a0, sp, 96
1557 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1558 ; LMULMAX1-NEXT: addi a0, sp, 80
1559 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1560 ; LMULMAX1-NEXT: addi a0, sp, 64
1561 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1562 ; LMULMAX1-NEXT: addi a0, sp, 48
1563 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1564 ; LMULMAX1-NEXT: addi a0, sp, 32
1565 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1566 ; LMULMAX1-NEXT: addi a0, sp, 16
1567 ; LMULMAX1-NEXT: vse32.v v8, (a0)
1568 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1569 ; LMULMAX1-NEXT: vmv.v.i v9, 0
1570 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
1571 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1572 ; LMULMAX1-NEXT: vmv.v.i v10, 0
1573 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
1574 ; LMULMAX1-NEXT: vmv.v.v v10, v9
1575 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1576 ; LMULMAX1-NEXT: vmsne.vi v9, v10, 0
1577 ; LMULMAX1-NEXT: addi a0, sp, 136
1578 ; LMULMAX1-NEXT: li a5, 5
1579 ; LMULMAX1-NEXT: li a6, 6
1580 ; LMULMAX1-NEXT: li a7, 7
1581 ; LMULMAX1-NEXT: vsm.v v9, (a0)
1582 ; LMULMAX1-NEXT: li a0, 0
1583 ; LMULMAX1-NEXT: li a1, 0
1584 ; LMULMAX1-NEXT: li a2, 0
1585 ; LMULMAX1-NEXT: li a3, 0
1586 ; LMULMAX1-NEXT: li a4, 0
1587 ; LMULMAX1-NEXT: vmv1r.v v9, v8
1588 ; LMULMAX1-NEXT: vmv1r.v v10, v8
1589 ; LMULMAX1-NEXT: vmv1r.v v11, v8
1590 ; LMULMAX1-NEXT: vmv1r.v v12, v8
1591 ; LMULMAX1-NEXT: vmv1r.v v13, v8
1592 ; LMULMAX1-NEXT: vmv1r.v v14, v8
1593 ; LMULMAX1-NEXT: vmv1r.v v15, v8
1594 ; LMULMAX1-NEXT: vmv1r.v v16, v8
1595 ; LMULMAX1-NEXT: vmv1r.v v17, v8
1596 ; LMULMAX1-NEXT: vmv1r.v v18, v8
1597 ; LMULMAX1-NEXT: vmv1r.v v19, v8
1598 ; LMULMAX1-NEXT: vmv1r.v v20, v8
1599 ; LMULMAX1-NEXT: vmv1r.v v21, v8
1600 ; LMULMAX1-NEXT: vmv1r.v v22, v8
1601 ; LMULMAX1-NEXT: vmv1r.v v23, v8
1602 ; LMULMAX1-NEXT: call vector_mask_arg_via_stack
1603 ; LMULMAX1-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
1604 ; LMULMAX1-NEXT: addi sp, sp, 160
1605 ; LMULMAX1-NEXT: ret
1606 %r = call <4 x i1> @vector_mask_arg_via_stack(i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8, <4 x i1> %v, <4 x i1> %v)