1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
6 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
7 ; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
9 define void @lmul1() nounwind {
12 ; NOZBA-NEXT: csrr a0, vlenb
13 ; NOZBA-NEXT: slli a0, a0, 1
14 ; NOZBA-NEXT: sub sp, sp, a0
15 ; NOZBA-NEXT: csrr a0, vlenb
16 ; NOZBA-NEXT: slli a0, a0, 1
17 ; NOZBA-NEXT: add sp, sp, a0
22 ; ZBA-NEXT: csrr a0, vlenb
23 ; ZBA-NEXT: slli a0, a0, 1
24 ; ZBA-NEXT: sub sp, sp, a0
25 ; ZBA-NEXT: csrr a0, vlenb
26 ; ZBA-NEXT: sh1add sp, a0, sp
31 ; NOMUL-NEXT: csrr a0, vlenb
32 ; NOMUL-NEXT: slli a0, a0, 1
33 ; NOMUL-NEXT: sub sp, sp, a0
34 ; NOMUL-NEXT: csrr a0, vlenb
35 ; NOMUL-NEXT: slli a0, a0, 1
36 ; NOMUL-NEXT: add sp, sp, a0
38 %v = alloca <vscale x 1 x i64>
42 define void @lmul2() nounwind {
45 ; NOZBA-NEXT: csrr a0, vlenb
46 ; NOZBA-NEXT: slli a0, a0, 1
47 ; NOZBA-NEXT: sub sp, sp, a0
48 ; NOZBA-NEXT: csrr a0, vlenb
49 ; NOZBA-NEXT: slli a0, a0, 1
50 ; NOZBA-NEXT: add sp, sp, a0
55 ; ZBA-NEXT: csrr a0, vlenb
56 ; ZBA-NEXT: slli a0, a0, 1
57 ; ZBA-NEXT: sub sp, sp, a0
58 ; ZBA-NEXT: csrr a0, vlenb
59 ; ZBA-NEXT: sh1add sp, a0, sp
64 ; NOMUL-NEXT: csrr a0, vlenb
65 ; NOMUL-NEXT: slli a0, a0, 1
66 ; NOMUL-NEXT: sub sp, sp, a0
67 ; NOMUL-NEXT: csrr a0, vlenb
68 ; NOMUL-NEXT: slli a0, a0, 1
69 ; NOMUL-NEXT: add sp, sp, a0
71 %v = alloca <vscale x 2 x i64>
75 define void @lmul4() nounwind {
78 ; CHECK-NEXT: addi sp, sp, -48
79 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
80 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
81 ; CHECK-NEXT: addi s0, sp, 48
82 ; CHECK-NEXT: csrr a0, vlenb
83 ; CHECK-NEXT: slli a0, a0, 2
84 ; CHECK-NEXT: sub sp, sp, a0
85 ; CHECK-NEXT: andi sp, sp, -32
86 ; CHECK-NEXT: addi sp, s0, -48
87 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
88 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
89 ; CHECK-NEXT: addi sp, sp, 48
91 %v = alloca <vscale x 4 x i64>
95 define void @lmul8() nounwind {
98 ; CHECK-NEXT: addi sp, sp, -80
99 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
100 ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
101 ; CHECK-NEXT: addi s0, sp, 80
102 ; CHECK-NEXT: csrr a0, vlenb
103 ; CHECK-NEXT: slli a0, a0, 3
104 ; CHECK-NEXT: sub sp, sp, a0
105 ; CHECK-NEXT: andi sp, sp, -64
106 ; CHECK-NEXT: addi sp, s0, -80
107 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
108 ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
109 ; CHECK-NEXT: addi sp, sp, 80
111 %v = alloca <vscale x 8 x i64>
115 define void @lmul1_and_2() nounwind {
116 ; NOZBA-LABEL: lmul1_and_2:
118 ; NOZBA-NEXT: csrr a0, vlenb
119 ; NOZBA-NEXT: slli a0, a0, 2
120 ; NOZBA-NEXT: sub sp, sp, a0
121 ; NOZBA-NEXT: csrr a0, vlenb
122 ; NOZBA-NEXT: slli a0, a0, 2
123 ; NOZBA-NEXT: add sp, sp, a0
126 ; ZBA-LABEL: lmul1_and_2:
128 ; ZBA-NEXT: csrr a0, vlenb
129 ; ZBA-NEXT: slli a0, a0, 2
130 ; ZBA-NEXT: sub sp, sp, a0
131 ; ZBA-NEXT: csrr a0, vlenb
132 ; ZBA-NEXT: sh2add sp, a0, sp
135 ; NOMUL-LABEL: lmul1_and_2:
137 ; NOMUL-NEXT: csrr a0, vlenb
138 ; NOMUL-NEXT: slli a0, a0, 2
139 ; NOMUL-NEXT: sub sp, sp, a0
140 ; NOMUL-NEXT: csrr a0, vlenb
141 ; NOMUL-NEXT: slli a0, a0, 2
142 ; NOMUL-NEXT: add sp, sp, a0
144 %v1 = alloca <vscale x 1 x i64>
145 %v2 = alloca <vscale x 2 x i64>
149 define void @lmul2_and_4() nounwind {
150 ; CHECK-LABEL: lmul2_and_4:
152 ; CHECK-NEXT: addi sp, sp, -48
153 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
154 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
155 ; CHECK-NEXT: addi s0, sp, 48
156 ; CHECK-NEXT: csrr a0, vlenb
157 ; CHECK-NEXT: slli a0, a0, 3
158 ; CHECK-NEXT: sub sp, sp, a0
159 ; CHECK-NEXT: andi sp, sp, -32
160 ; CHECK-NEXT: addi sp, s0, -48
161 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
162 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
163 ; CHECK-NEXT: addi sp, sp, 48
165 %v1 = alloca <vscale x 2 x i64>
166 %v2 = alloca <vscale x 4 x i64>
170 define void @lmul1_and_4() nounwind {
171 ; CHECK-LABEL: lmul1_and_4:
173 ; CHECK-NEXT: addi sp, sp, -48
174 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
175 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
176 ; CHECK-NEXT: addi s0, sp, 48
177 ; CHECK-NEXT: csrr a0, vlenb
178 ; CHECK-NEXT: slli a0, a0, 3
179 ; CHECK-NEXT: sub sp, sp, a0
180 ; CHECK-NEXT: andi sp, sp, -32
181 ; CHECK-NEXT: addi sp, s0, -48
182 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
183 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
184 ; CHECK-NEXT: addi sp, sp, 48
186 %v1 = alloca <vscale x 1 x i64>
187 %v2 = alloca <vscale x 4 x i64>
191 define void @lmul2_and_1() nounwind {
192 ; NOZBA-LABEL: lmul2_and_1:
194 ; NOZBA-NEXT: csrr a0, vlenb
195 ; NOZBA-NEXT: slli a0, a0, 2
196 ; NOZBA-NEXT: sub sp, sp, a0
197 ; NOZBA-NEXT: csrr a0, vlenb
198 ; NOZBA-NEXT: slli a0, a0, 2
199 ; NOZBA-NEXT: add sp, sp, a0
202 ; ZBA-LABEL: lmul2_and_1:
204 ; ZBA-NEXT: csrr a0, vlenb
205 ; ZBA-NEXT: slli a0, a0, 2
206 ; ZBA-NEXT: sub sp, sp, a0
207 ; ZBA-NEXT: csrr a0, vlenb
208 ; ZBA-NEXT: sh2add sp, a0, sp
211 ; NOMUL-LABEL: lmul2_and_1:
213 ; NOMUL-NEXT: csrr a0, vlenb
214 ; NOMUL-NEXT: slli a0, a0, 2
215 ; NOMUL-NEXT: sub sp, sp, a0
216 ; NOMUL-NEXT: csrr a0, vlenb
217 ; NOMUL-NEXT: slli a0, a0, 2
218 ; NOMUL-NEXT: add sp, sp, a0
220 %v1 = alloca <vscale x 2 x i64>
221 %v2 = alloca <vscale x 1 x i64>
225 define void @lmul4_and_1() nounwind {
226 ; CHECK-LABEL: lmul4_and_1:
228 ; CHECK-NEXT: addi sp, sp, -48
229 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
230 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
231 ; CHECK-NEXT: addi s0, sp, 48
232 ; CHECK-NEXT: csrr a0, vlenb
233 ; CHECK-NEXT: slli a0, a0, 3
234 ; CHECK-NEXT: sub sp, sp, a0
235 ; CHECK-NEXT: andi sp, sp, -32
236 ; CHECK-NEXT: addi sp, s0, -48
237 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
238 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
239 ; CHECK-NEXT: addi sp, sp, 48
241 %v1 = alloca <vscale x 4 x i64>
242 %v2 = alloca <vscale x 1 x i64>
246 define void @lmul4_and_2() nounwind {
247 ; CHECK-LABEL: lmul4_and_2:
249 ; CHECK-NEXT: addi sp, sp, -48
250 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
251 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
252 ; CHECK-NEXT: addi s0, sp, 48
253 ; CHECK-NEXT: csrr a0, vlenb
254 ; CHECK-NEXT: slli a0, a0, 3
255 ; CHECK-NEXT: sub sp, sp, a0
256 ; CHECK-NEXT: andi sp, sp, -32
257 ; CHECK-NEXT: addi sp, s0, -48
258 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
259 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
260 ; CHECK-NEXT: addi sp, sp, 48
262 %v1 = alloca <vscale x 4 x i64>
263 %v2 = alloca <vscale x 2 x i64>
267 define void @lmul4_and_2_x2_0() nounwind {
268 ; CHECK-LABEL: lmul4_and_2_x2_0:
270 ; CHECK-NEXT: addi sp, sp, -48
271 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
272 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
273 ; CHECK-NEXT: addi s0, sp, 48
274 ; CHECK-NEXT: csrr a0, vlenb
275 ; CHECK-NEXT: slli a0, a0, 4
276 ; CHECK-NEXT: sub sp, sp, a0
277 ; CHECK-NEXT: andi sp, sp, -32
278 ; CHECK-NEXT: addi sp, s0, -48
279 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
280 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
281 ; CHECK-NEXT: addi sp, sp, 48
283 %v1 = alloca <vscale x 4 x i64>
284 %v2 = alloca <vscale x 2 x i64>
285 %v3 = alloca <vscale x 4 x i64>
286 %v4 = alloca <vscale x 2 x i64>
290 define void @lmul4_and_2_x2_1() nounwind {
291 ; NOZBA-LABEL: lmul4_and_2_x2_1:
293 ; NOZBA-NEXT: addi sp, sp, -48
294 ; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
295 ; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
296 ; NOZBA-NEXT: addi s0, sp, 48
297 ; NOZBA-NEXT: csrr a0, vlenb
298 ; NOZBA-NEXT: li a1, 12
299 ; NOZBA-NEXT: mul a0, a0, a1
300 ; NOZBA-NEXT: sub sp, sp, a0
301 ; NOZBA-NEXT: andi sp, sp, -32
302 ; NOZBA-NEXT: addi sp, s0, -48
303 ; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
304 ; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
305 ; NOZBA-NEXT: addi sp, sp, 48
308 ; ZBA-LABEL: lmul4_and_2_x2_1:
310 ; ZBA-NEXT: addi sp, sp, -48
311 ; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
312 ; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
313 ; ZBA-NEXT: addi s0, sp, 48
314 ; ZBA-NEXT: csrr a0, vlenb
315 ; ZBA-NEXT: slli a0, a0, 2
316 ; ZBA-NEXT: sh1add a0, a0, a0
317 ; ZBA-NEXT: sub sp, sp, a0
318 ; ZBA-NEXT: andi sp, sp, -32
319 ; ZBA-NEXT: addi sp, s0, -48
320 ; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
321 ; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
322 ; ZBA-NEXT: addi sp, sp, 48
325 ; NOMUL-LABEL: lmul4_and_2_x2_1:
327 ; NOMUL-NEXT: addi sp, sp, -48
328 ; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
329 ; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
330 ; NOMUL-NEXT: addi s0, sp, 48
331 ; NOMUL-NEXT: csrr a0, vlenb
332 ; NOMUL-NEXT: slli a0, a0, 2
333 ; NOMUL-NEXT: mv a1, a0
334 ; NOMUL-NEXT: slli a0, a0, 1
335 ; NOMUL-NEXT: add a0, a0, a1
336 ; NOMUL-NEXT: sub sp, sp, a0
337 ; NOMUL-NEXT: andi sp, sp, -32
338 ; NOMUL-NEXT: addi sp, s0, -48
339 ; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
340 ; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
341 ; NOMUL-NEXT: addi sp, sp, 48
343 %v1 = alloca <vscale x 4 x i64>
344 %v3 = alloca <vscale x 4 x i64>
345 %v2 = alloca <vscale x 2 x i64>
346 %v4 = alloca <vscale x 2 x i64>
351 define void @gpr_and_lmul1_and_2() nounwind {
352 ; NOZBA-LABEL: gpr_and_lmul1_and_2:
354 ; NOZBA-NEXT: addi sp, sp, -16
355 ; NOZBA-NEXT: csrr a0, vlenb
356 ; NOZBA-NEXT: slli a0, a0, 2
357 ; NOZBA-NEXT: sub sp, sp, a0
358 ; NOZBA-NEXT: li a0, 3
359 ; NOZBA-NEXT: sd a0, 8(sp)
360 ; NOZBA-NEXT: csrr a0, vlenb
361 ; NOZBA-NEXT: slli a0, a0, 2
362 ; NOZBA-NEXT: add sp, sp, a0
363 ; NOZBA-NEXT: addi sp, sp, 16
366 ; ZBA-LABEL: gpr_and_lmul1_and_2:
368 ; ZBA-NEXT: addi sp, sp, -16
369 ; ZBA-NEXT: csrr a0, vlenb
370 ; ZBA-NEXT: slli a0, a0, 2
371 ; ZBA-NEXT: sub sp, sp, a0
373 ; ZBA-NEXT: sd a0, 8(sp)
374 ; ZBA-NEXT: csrr a0, vlenb
375 ; ZBA-NEXT: sh2add sp, a0, sp
376 ; ZBA-NEXT: addi sp, sp, 16
379 ; NOMUL-LABEL: gpr_and_lmul1_and_2:
381 ; NOMUL-NEXT: addi sp, sp, -16
382 ; NOMUL-NEXT: csrr a0, vlenb
383 ; NOMUL-NEXT: slli a0, a0, 2
384 ; NOMUL-NEXT: sub sp, sp, a0
385 ; NOMUL-NEXT: li a0, 3
386 ; NOMUL-NEXT: sd a0, 8(sp)
387 ; NOMUL-NEXT: csrr a0, vlenb
388 ; NOMUL-NEXT: slli a0, a0, 2
389 ; NOMUL-NEXT: add sp, sp, a0
390 ; NOMUL-NEXT: addi sp, sp, 16
393 %v1 = alloca <vscale x 1 x i64>
394 %v2 = alloca <vscale x 2 x i64>
395 store volatile i64 3, ptr %x1
399 define void @gpr_and_lmul1_and_4() nounwind {
400 ; CHECK-LABEL: gpr_and_lmul1_and_4:
402 ; CHECK-NEXT: addi sp, sp, -48
403 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
404 ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
405 ; CHECK-NEXT: addi s0, sp, 48
406 ; CHECK-NEXT: csrr a0, vlenb
407 ; CHECK-NEXT: slli a0, a0, 3
408 ; CHECK-NEXT: sub sp, sp, a0
409 ; CHECK-NEXT: andi sp, sp, -32
410 ; CHECK-NEXT: li a0, 3
411 ; CHECK-NEXT: sd a0, 8(sp)
412 ; CHECK-NEXT: addi sp, s0, -48
413 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
414 ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
415 ; CHECK-NEXT: addi sp, sp, 48
418 %v1 = alloca <vscale x 1 x i64>
419 %v2 = alloca <vscale x 4 x i64>
420 store volatile i64 3, ptr %x1
424 define void @lmul_1_2_4_8() nounwind {
425 ; CHECK-LABEL: lmul_1_2_4_8:
427 ; CHECK-NEXT: addi sp, sp, -80
428 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
429 ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
430 ; CHECK-NEXT: addi s0, sp, 80
431 ; CHECK-NEXT: csrr a0, vlenb
432 ; CHECK-NEXT: slli a0, a0, 4
433 ; CHECK-NEXT: sub sp, sp, a0
434 ; CHECK-NEXT: andi sp, sp, -64
435 ; CHECK-NEXT: addi sp, s0, -80
436 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
437 ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
438 ; CHECK-NEXT: addi sp, sp, 80
440 %v1 = alloca <vscale x 1 x i64>
441 %v2 = alloca <vscale x 2 x i64>
442 %v4 = alloca <vscale x 4 x i64>
443 %v8 = alloca <vscale x 8 x i64>
447 define void @lmul_1_2_4_8_x2_0() nounwind {
448 ; CHECK-LABEL: lmul_1_2_4_8_x2_0:
450 ; CHECK-NEXT: addi sp, sp, -80
451 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
452 ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
453 ; CHECK-NEXT: addi s0, sp, 80
454 ; CHECK-NEXT: csrr a0, vlenb
455 ; CHECK-NEXT: slli a0, a0, 5
456 ; CHECK-NEXT: sub sp, sp, a0
457 ; CHECK-NEXT: andi sp, sp, -64
458 ; CHECK-NEXT: addi sp, s0, -80
459 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
460 ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
461 ; CHECK-NEXT: addi sp, sp, 80
463 %v1 = alloca <vscale x 1 x i64>
464 %v2 = alloca <vscale x 1 x i64>
465 %v3 = alloca <vscale x 2 x i64>
466 %v4 = alloca <vscale x 2 x i64>
467 %v5 = alloca <vscale x 4 x i64>
468 %v6 = alloca <vscale x 4 x i64>
469 %v7 = alloca <vscale x 8 x i64>
470 %v8 = alloca <vscale x 8 x i64>
474 define void @lmul_1_2_4_8_x2_1() nounwind {
475 ; CHECK-LABEL: lmul_1_2_4_8_x2_1:
477 ; CHECK-NEXT: addi sp, sp, -80
478 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
479 ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
480 ; CHECK-NEXT: addi s0, sp, 80
481 ; CHECK-NEXT: csrr a0, vlenb
482 ; CHECK-NEXT: slli a0, a0, 5
483 ; CHECK-NEXT: sub sp, sp, a0
484 ; CHECK-NEXT: andi sp, sp, -64
485 ; CHECK-NEXT: addi sp, s0, -80
486 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
487 ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
488 ; CHECK-NEXT: addi sp, sp, 80
490 %v8 = alloca <vscale x 8 x i64>
491 %v7 = alloca <vscale x 8 x i64>
492 %v6 = alloca <vscale x 4 x i64>
493 %v5 = alloca <vscale x 4 x i64>
494 %v4 = alloca <vscale x 2 x i64>
495 %v3 = alloca <vscale x 2 x i64>
496 %v2 = alloca <vscale x 1 x i64>
497 %v1 = alloca <vscale x 1 x i64>
501 define void @masks() nounwind {
502 ; NOZBA-LABEL: masks:
504 ; NOZBA-NEXT: csrr a0, vlenb
505 ; NOZBA-NEXT: slli a0, a0, 2
506 ; NOZBA-NEXT: sub sp, sp, a0
507 ; NOZBA-NEXT: csrr a0, vlenb
508 ; NOZBA-NEXT: slli a0, a0, 2
509 ; NOZBA-NEXT: add sp, sp, a0
514 ; ZBA-NEXT: csrr a0, vlenb
515 ; ZBA-NEXT: slli a0, a0, 2
516 ; ZBA-NEXT: sub sp, sp, a0
517 ; ZBA-NEXT: csrr a0, vlenb
518 ; ZBA-NEXT: sh2add sp, a0, sp
521 ; NOMUL-LABEL: masks:
523 ; NOMUL-NEXT: csrr a0, vlenb
524 ; NOMUL-NEXT: slli a0, a0, 2
525 ; NOMUL-NEXT: sub sp, sp, a0
526 ; NOMUL-NEXT: csrr a0, vlenb
527 ; NOMUL-NEXT: slli a0, a0, 2
528 ; NOMUL-NEXT: add sp, sp, a0
530 %v1 = alloca <vscale x 1 x i1>
531 %v2 = alloca <vscale x 2 x i1>
532 %v4 = alloca <vscale x 4 x i1>
533 %v8 = alloca <vscale x 8 x i1>
537 define void @lmul_8_x5() nounwind {
538 ; NOZBA-LABEL: lmul_8_x5:
540 ; NOZBA-NEXT: addi sp, sp, -80
541 ; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
542 ; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
543 ; NOZBA-NEXT: addi s0, sp, 80
544 ; NOZBA-NEXT: csrr a0, vlenb
545 ; NOZBA-NEXT: li a1, 40
546 ; NOZBA-NEXT: mul a0, a0, a1
547 ; NOZBA-NEXT: sub sp, sp, a0
548 ; NOZBA-NEXT: andi sp, sp, -64
549 ; NOZBA-NEXT: addi sp, s0, -80
550 ; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
551 ; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
552 ; NOZBA-NEXT: addi sp, sp, 80
555 ; ZBA-LABEL: lmul_8_x5:
557 ; ZBA-NEXT: addi sp, sp, -80
558 ; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
559 ; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
560 ; ZBA-NEXT: addi s0, sp, 80
561 ; ZBA-NEXT: csrr a0, vlenb
562 ; ZBA-NEXT: slli a0, a0, 3
563 ; ZBA-NEXT: sh2add a0, a0, a0
564 ; ZBA-NEXT: sub sp, sp, a0
565 ; ZBA-NEXT: andi sp, sp, -64
566 ; ZBA-NEXT: addi sp, s0, -80
567 ; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
568 ; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
569 ; ZBA-NEXT: addi sp, sp, 80
572 ; NOMUL-LABEL: lmul_8_x5:
574 ; NOMUL-NEXT: addi sp, sp, -80
575 ; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
576 ; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
577 ; NOMUL-NEXT: addi s0, sp, 80
578 ; NOMUL-NEXT: csrr a0, vlenb
579 ; NOMUL-NEXT: slli a0, a0, 3
580 ; NOMUL-NEXT: mv a1, a0
581 ; NOMUL-NEXT: slli a0, a0, 2
582 ; NOMUL-NEXT: add a0, a0, a1
583 ; NOMUL-NEXT: sub sp, sp, a0
584 ; NOMUL-NEXT: andi sp, sp, -64
585 ; NOMUL-NEXT: addi sp, s0, -80
586 ; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
587 ; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
588 ; NOMUL-NEXT: addi sp, sp, 80
590 %v1 = alloca <vscale x 8 x i64>
591 %v2 = alloca <vscale x 8 x i64>
592 %v3 = alloca <vscale x 8 x i64>
593 %v4 = alloca <vscale x 8 x i64>
594 %v5 = alloca <vscale x 8 x i64>
598 define void @lmul_8_x9() nounwind {
599 ; NOZBA-LABEL: lmul_8_x9:
601 ; NOZBA-NEXT: addi sp, sp, -80
602 ; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
603 ; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
604 ; NOZBA-NEXT: addi s0, sp, 80
605 ; NOZBA-NEXT: csrr a0, vlenb
606 ; NOZBA-NEXT: li a1, 72
607 ; NOZBA-NEXT: mul a0, a0, a1
608 ; NOZBA-NEXT: sub sp, sp, a0
609 ; NOZBA-NEXT: andi sp, sp, -64
610 ; NOZBA-NEXT: addi sp, s0, -80
611 ; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
612 ; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
613 ; NOZBA-NEXT: addi sp, sp, 80
616 ; ZBA-LABEL: lmul_8_x9:
618 ; ZBA-NEXT: addi sp, sp, -80
619 ; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
620 ; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
621 ; ZBA-NEXT: addi s0, sp, 80
622 ; ZBA-NEXT: csrr a0, vlenb
623 ; ZBA-NEXT: slli a0, a0, 3
624 ; ZBA-NEXT: sh3add a0, a0, a0
625 ; ZBA-NEXT: sub sp, sp, a0
626 ; ZBA-NEXT: andi sp, sp, -64
627 ; ZBA-NEXT: addi sp, s0, -80
628 ; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
629 ; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
630 ; ZBA-NEXT: addi sp, sp, 80
633 ; NOMUL-LABEL: lmul_8_x9:
635 ; NOMUL-NEXT: addi sp, sp, -80
636 ; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
637 ; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
638 ; NOMUL-NEXT: addi s0, sp, 80
639 ; NOMUL-NEXT: csrr a0, vlenb
640 ; NOMUL-NEXT: slli a0, a0, 3
641 ; NOMUL-NEXT: mv a1, a0
642 ; NOMUL-NEXT: slli a0, a0, 3
643 ; NOMUL-NEXT: add a0, a0, a1
644 ; NOMUL-NEXT: sub sp, sp, a0
645 ; NOMUL-NEXT: andi sp, sp, -64
646 ; NOMUL-NEXT: addi sp, s0, -80
647 ; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
648 ; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
649 ; NOMUL-NEXT: addi sp, sp, 80
651 %v1 = alloca <vscale x 8 x i64>
652 %v2 = alloca <vscale x 8 x i64>
653 %v3 = alloca <vscale x 8 x i64>
654 %v4 = alloca <vscale x 8 x i64>
655 %v5 = alloca <vscale x 8 x i64>
656 %v6 = alloca <vscale x 8 x i64>
657 %v7 = alloca <vscale x 8 x i64>
658 %v8 = alloca <vscale x 8 x i64>
659 %v9 = alloca <vscale x 8 x i64>