1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32
3 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
4 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64
6 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
7 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
9 define void @splat_v16i8(ptr %x, i8 %y) {
10 ; CHECK-LABEL: splat_v16i8:
12 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
13 ; CHECK-NEXT: vmv.v.x v8, a1
14 ; CHECK-NEXT: vse8.v v8, (a0)
16 %a = insertelement <16 x i8> poison, i8 %y, i32 0
17 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer
18 store <16 x i8> %b, ptr %x
22 define void @splat_v8i16(ptr %x, i16 %y) {
23 ; CHECK-LABEL: splat_v8i16:
25 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
26 ; CHECK-NEXT: vmv.v.x v8, a1
27 ; CHECK-NEXT: vse16.v v8, (a0)
29 %a = insertelement <8 x i16> poison, i16 %y, i32 0
30 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer
31 store <8 x i16> %b, ptr %x
35 define void @splat_v4i32(ptr %x, i32 %y) {
36 ; CHECK-LABEL: splat_v4i32:
38 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
39 ; CHECK-NEXT: vmv.v.x v8, a1
40 ; CHECK-NEXT: vse32.v v8, (a0)
42 %a = insertelement <4 x i32> poison, i32 %y, i32 0
43 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer
44 store <4 x i32> %b, ptr %x
48 define void @splat_v2i64(ptr %x, i64 %y) {
49 ; LMULMAX8-RV32-LABEL: splat_v2i64:
50 ; LMULMAX8-RV32: # %bb.0:
51 ; LMULMAX8-RV32-NEXT: addi sp, sp, -16
52 ; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16
53 ; LMULMAX8-RV32-NEXT: sw a2, 12(sp)
54 ; LMULMAX8-RV32-NEXT: sw a1, 8(sp)
55 ; LMULMAX8-RV32-NEXT: addi a1, sp, 8
56 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
57 ; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero
58 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0)
59 ; LMULMAX8-RV32-NEXT: addi sp, sp, 16
60 ; LMULMAX8-RV32-NEXT: ret
62 ; LMULMAX2-RV32-LABEL: splat_v2i64:
63 ; LMULMAX2-RV32: # %bb.0:
64 ; LMULMAX2-RV32-NEXT: addi sp, sp, -16
65 ; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16
66 ; LMULMAX2-RV32-NEXT: sw a2, 12(sp)
67 ; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
68 ; LMULMAX2-RV32-NEXT: addi a1, sp, 8
69 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
70 ; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero
71 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
72 ; LMULMAX2-RV32-NEXT: addi sp, sp, 16
73 ; LMULMAX2-RV32-NEXT: ret
75 ; LMULMAX1-RV32-LABEL: splat_v2i64:
76 ; LMULMAX1-RV32: # %bb.0:
77 ; LMULMAX1-RV32-NEXT: addi sp, sp, -16
78 ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16
79 ; LMULMAX1-RV32-NEXT: sw a2, 12(sp)
80 ; LMULMAX1-RV32-NEXT: sw a1, 8(sp)
81 ; LMULMAX1-RV32-NEXT: addi a1, sp, 8
82 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
83 ; LMULMAX1-RV32-NEXT: vlse64.v v8, (a1), zero
84 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
85 ; LMULMAX1-RV32-NEXT: addi sp, sp, 16
86 ; LMULMAX1-RV32-NEXT: ret
88 ; LMULMAX8-RV64-LABEL: splat_v2i64:
89 ; LMULMAX8-RV64: # %bb.0:
90 ; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
91 ; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1
92 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
93 ; LMULMAX8-RV64-NEXT: ret
95 ; LMULMAX2-RV64-LABEL: splat_v2i64:
96 ; LMULMAX2-RV64: # %bb.0:
97 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
98 ; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1
99 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
100 ; LMULMAX2-RV64-NEXT: ret
102 ; LMULMAX1-RV64-LABEL: splat_v2i64:
103 ; LMULMAX1-RV64: # %bb.0:
104 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
105 ; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1
106 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
107 ; LMULMAX1-RV64-NEXT: ret
108 %a = insertelement <2 x i64> poison, i64 %y, i32 0
109 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
110 store <2 x i64> %b, ptr %x
114 define void @splat_v32i8(ptr %x, i8 %y) {
115 ; LMULMAX8-LABEL: splat_v32i8:
117 ; LMULMAX8-NEXT: li a2, 32
118 ; LMULMAX8-NEXT: vsetvli zero, a2, e8, m2, ta, ma
119 ; LMULMAX8-NEXT: vmv.v.x v8, a1
120 ; LMULMAX8-NEXT: vse8.v v8, (a0)
123 ; LMULMAX2-LABEL: splat_v32i8:
125 ; LMULMAX2-NEXT: li a2, 32
126 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
127 ; LMULMAX2-NEXT: vmv.v.x v8, a1
128 ; LMULMAX2-NEXT: vse8.v v8, (a0)
131 ; LMULMAX1-LABEL: splat_v32i8:
133 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
134 ; LMULMAX1-NEXT: vmv.v.x v8, a1
135 ; LMULMAX1-NEXT: addi a1, a0, 16
136 ; LMULMAX1-NEXT: vse8.v v8, (a1)
137 ; LMULMAX1-NEXT: vse8.v v8, (a0)
139 %a = insertelement <32 x i8> poison, i8 %y, i32 0
140 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
141 store <32 x i8> %b, ptr %x
145 define void @splat_v16i16(ptr %x, i16 %y) {
146 ; LMULMAX8-LABEL: splat_v16i16:
148 ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma
149 ; LMULMAX8-NEXT: vmv.v.x v8, a1
150 ; LMULMAX8-NEXT: vse16.v v8, (a0)
153 ; LMULMAX2-LABEL: splat_v16i16:
155 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
156 ; LMULMAX2-NEXT: vmv.v.x v8, a1
157 ; LMULMAX2-NEXT: vse16.v v8, (a0)
160 ; LMULMAX1-LABEL: splat_v16i16:
162 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
163 ; LMULMAX1-NEXT: vmv.v.x v8, a1
164 ; LMULMAX1-NEXT: addi a1, a0, 16
165 ; LMULMAX1-NEXT: vse16.v v8, (a1)
166 ; LMULMAX1-NEXT: vse16.v v8, (a0)
168 %a = insertelement <16 x i16> poison, i16 %y, i32 0
169 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
170 store <16 x i16> %b, ptr %x
174 define void @splat_v8i32(ptr %x, i32 %y) {
175 ; LMULMAX8-LABEL: splat_v8i32:
177 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
178 ; LMULMAX8-NEXT: vmv.v.x v8, a1
179 ; LMULMAX8-NEXT: vse32.v v8, (a0)
182 ; LMULMAX2-LABEL: splat_v8i32:
184 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
185 ; LMULMAX2-NEXT: vmv.v.x v8, a1
186 ; LMULMAX2-NEXT: vse32.v v8, (a0)
189 ; LMULMAX1-LABEL: splat_v8i32:
191 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192 ; LMULMAX1-NEXT: vmv.v.x v8, a1
193 ; LMULMAX1-NEXT: addi a1, a0, 16
194 ; LMULMAX1-NEXT: vse32.v v8, (a1)
195 ; LMULMAX1-NEXT: vse32.v v8, (a0)
197 %a = insertelement <8 x i32> poison, i32 %y, i32 0
198 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
199 store <8 x i32> %b, ptr %x
203 define void @splat_v4i64(ptr %x, i64 %y) {
204 ; LMULMAX8-RV32-LABEL: splat_v4i64:
205 ; LMULMAX8-RV32: # %bb.0:
206 ; LMULMAX8-RV32-NEXT: addi sp, sp, -16
207 ; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16
208 ; LMULMAX8-RV32-NEXT: sw a2, 12(sp)
209 ; LMULMAX8-RV32-NEXT: sw a1, 8(sp)
210 ; LMULMAX8-RV32-NEXT: addi a1, sp, 8
211 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
212 ; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero
213 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0)
214 ; LMULMAX8-RV32-NEXT: addi sp, sp, 16
215 ; LMULMAX8-RV32-NEXT: ret
217 ; LMULMAX2-RV32-LABEL: splat_v4i64:
218 ; LMULMAX2-RV32: # %bb.0:
219 ; LMULMAX2-RV32-NEXT: addi sp, sp, -16
220 ; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16
221 ; LMULMAX2-RV32-NEXT: sw a2, 12(sp)
222 ; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
223 ; LMULMAX2-RV32-NEXT: addi a1, sp, 8
224 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
225 ; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero
226 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
227 ; LMULMAX2-RV32-NEXT: addi sp, sp, 16
228 ; LMULMAX2-RV32-NEXT: ret
230 ; LMULMAX1-RV32-LABEL: splat_v4i64:
231 ; LMULMAX1-RV32: # %bb.0:
232 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
233 ; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5
234 ; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2
235 ; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0
236 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
237 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
238 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
239 ; LMULMAX1-RV32-NEXT: ret
241 ; LMULMAX8-RV64-LABEL: splat_v4i64:
242 ; LMULMAX8-RV64: # %bb.0:
243 ; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
244 ; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1
245 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
246 ; LMULMAX8-RV64-NEXT: ret
248 ; LMULMAX2-RV64-LABEL: splat_v4i64:
249 ; LMULMAX2-RV64: # %bb.0:
250 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
251 ; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1
252 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
253 ; LMULMAX2-RV64-NEXT: ret
255 ; LMULMAX1-RV64-LABEL: splat_v4i64:
256 ; LMULMAX1-RV64: # %bb.0:
257 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
258 ; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1
259 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
260 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
261 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
262 ; LMULMAX1-RV64-NEXT: ret
263 %a = insertelement <4 x i64> poison, i64 %y, i32 0
264 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
265 store <4 x i64> %b, ptr %x
269 define void @splat_zero_v16i8(ptr %x) {
270 ; CHECK-LABEL: splat_zero_v16i8:
272 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
273 ; CHECK-NEXT: vmv.v.i v8, 0
274 ; CHECK-NEXT: vse8.v v8, (a0)
276 %a = insertelement <16 x i8> poison, i8 0, i32 0
277 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer
278 store <16 x i8> %b, ptr %x
282 define void @splat_zero_v8i16(ptr %x) {
283 ; CHECK-LABEL: splat_zero_v8i16:
285 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
286 ; CHECK-NEXT: vmv.v.i v8, 0
287 ; CHECK-NEXT: vse16.v v8, (a0)
289 %a = insertelement <8 x i16> poison, i16 0, i32 0
290 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer
291 store <8 x i16> %b, ptr %x
295 define void @splat_zero_v4i32(ptr %x) {
296 ; CHECK-LABEL: splat_zero_v4i32:
298 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
299 ; CHECK-NEXT: vmv.v.i v8, 0
300 ; CHECK-NEXT: vse32.v v8, (a0)
302 %a = insertelement <4 x i32> poison, i32 0, i32 0
303 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer
304 store <4 x i32> %b, ptr %x
308 define void @splat_zero_v2i64(ptr %x) {
309 ; CHECK-LABEL: splat_zero_v2i64:
311 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
312 ; CHECK-NEXT: vmv.v.i v8, 0
313 ; CHECK-NEXT: vse64.v v8, (a0)
315 %a = insertelement <2 x i64> poison, i64 0, i32 0
316 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
317 store <2 x i64> %b, ptr %x
321 define void @splat_zero_v32i8(ptr %x) {
322 ; LMULMAX8-LABEL: splat_zero_v32i8:
324 ; LMULMAX8-NEXT: li a1, 32
325 ; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma
326 ; LMULMAX8-NEXT: vmv.v.i v8, 0
327 ; LMULMAX8-NEXT: vse8.v v8, (a0)
330 ; LMULMAX2-LABEL: splat_zero_v32i8:
332 ; LMULMAX2-NEXT: li a1, 32
333 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
334 ; LMULMAX2-NEXT: vmv.v.i v8, 0
335 ; LMULMAX2-NEXT: vse8.v v8, (a0)
338 ; LMULMAX1-LABEL: splat_zero_v32i8:
340 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
341 ; LMULMAX1-NEXT: vmv.v.i v8, 0
342 ; LMULMAX1-NEXT: vse8.v v8, (a0)
343 ; LMULMAX1-NEXT: addi a0, a0, 16
344 ; LMULMAX1-NEXT: vse8.v v8, (a0)
346 %a = insertelement <32 x i8> poison, i8 0, i32 0
347 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
348 store <32 x i8> %b, ptr %x
352 define void @splat_zero_v16i16(ptr %x) {
353 ; LMULMAX8-LABEL: splat_zero_v16i16:
355 ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma
356 ; LMULMAX8-NEXT: vmv.v.i v8, 0
357 ; LMULMAX8-NEXT: vse16.v v8, (a0)
360 ; LMULMAX2-LABEL: splat_zero_v16i16:
362 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
363 ; LMULMAX2-NEXT: vmv.v.i v8, 0
364 ; LMULMAX2-NEXT: vse16.v v8, (a0)
367 ; LMULMAX1-LABEL: splat_zero_v16i16:
369 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
370 ; LMULMAX1-NEXT: vmv.v.i v8, 0
371 ; LMULMAX1-NEXT: vse16.v v8, (a0)
372 ; LMULMAX1-NEXT: addi a0, a0, 16
373 ; LMULMAX1-NEXT: vse16.v v8, (a0)
375 %a = insertelement <16 x i16> poison, i16 0, i32 0
376 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
377 store <16 x i16> %b, ptr %x
381 define void @splat_zero_v8i32(ptr %x) {
382 ; LMULMAX8-LABEL: splat_zero_v8i32:
384 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
385 ; LMULMAX8-NEXT: vmv.v.i v8, 0
386 ; LMULMAX8-NEXT: vse32.v v8, (a0)
389 ; LMULMAX2-LABEL: splat_zero_v8i32:
391 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
392 ; LMULMAX2-NEXT: vmv.v.i v8, 0
393 ; LMULMAX2-NEXT: vse32.v v8, (a0)
396 ; LMULMAX1-LABEL: splat_zero_v8i32:
398 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
399 ; LMULMAX1-NEXT: vmv.v.i v8, 0
400 ; LMULMAX1-NEXT: vse32.v v8, (a0)
401 ; LMULMAX1-NEXT: addi a0, a0, 16
402 ; LMULMAX1-NEXT: vse32.v v8, (a0)
404 %a = insertelement <8 x i32> poison, i32 0, i32 0
405 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
406 store <8 x i32> %b, ptr %x
410 define void @splat_zero_v4i64(ptr %x) {
411 ; LMULMAX8-LABEL: splat_zero_v4i64:
413 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
414 ; LMULMAX8-NEXT: vmv.v.i v8, 0
415 ; LMULMAX8-NEXT: vse64.v v8, (a0)
418 ; LMULMAX2-LABEL: splat_zero_v4i64:
420 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
421 ; LMULMAX2-NEXT: vmv.v.i v8, 0
422 ; LMULMAX2-NEXT: vse64.v v8, (a0)
425 ; LMULMAX1-RV32-LABEL: splat_zero_v4i64:
426 ; LMULMAX1-RV32: # %bb.0:
427 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
428 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
429 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
430 ; LMULMAX1-RV32-NEXT: addi a0, a0, 16
431 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
432 ; LMULMAX1-RV32-NEXT: ret
434 ; LMULMAX1-RV64-LABEL: splat_zero_v4i64:
435 ; LMULMAX1-RV64: # %bb.0:
436 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
437 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
438 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
439 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
440 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
441 ; LMULMAX1-RV64-NEXT: ret
442 %a = insertelement <4 x i64> poison, i64 0, i32 0
443 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
444 store <4 x i64> %b, ptr %x
448 define void @splat_zero_v2i16(ptr %p) {
449 ; CHECK-LABEL: splat_zero_v2i16:
451 ; CHECK-NEXT: sw zero, 0(a0)
453 store <2 x i16> zeroinitializer, ptr %p
457 define void @splat_zero_v2i16_unaligned(ptr %p) {
458 ; CHECK-LABEL: splat_zero_v2i16_unaligned:
460 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
461 ; CHECK-NEXT: vmv.v.i v8, 0
462 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
463 ; CHECK-NEXT: vse8.v v8, (a0)
465 store <2 x i16> zeroinitializer, ptr %p, align 1
469 define void @splat_zero_v4i16(ptr %p) {
470 ; LMULMAX8-RV32-LABEL: splat_zero_v4i16:
471 ; LMULMAX8-RV32: # %bb.0:
472 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
473 ; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
474 ; LMULMAX8-RV32-NEXT: vse16.v v8, (a0)
475 ; LMULMAX8-RV32-NEXT: ret
477 ; LMULMAX2-RV32-LABEL: splat_zero_v4i16:
478 ; LMULMAX2-RV32: # %bb.0:
479 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
480 ; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
481 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
482 ; LMULMAX2-RV32-NEXT: ret
484 ; LMULMAX1-RV32-LABEL: splat_zero_v4i16:
485 ; LMULMAX1-RV32: # %bb.0:
486 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
487 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
488 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
489 ; LMULMAX1-RV32-NEXT: ret
491 ; LMULMAX8-RV64-LABEL: splat_zero_v4i16:
492 ; LMULMAX8-RV64: # %bb.0:
493 ; LMULMAX8-RV64-NEXT: sd zero, 0(a0)
494 ; LMULMAX8-RV64-NEXT: ret
496 ; LMULMAX2-RV64-LABEL: splat_zero_v4i16:
497 ; LMULMAX2-RV64: # %bb.0:
498 ; LMULMAX2-RV64-NEXT: sd zero, 0(a0)
499 ; LMULMAX2-RV64-NEXT: ret
501 ; LMULMAX1-RV64-LABEL: splat_zero_v4i16:
502 ; LMULMAX1-RV64: # %bb.0:
503 ; LMULMAX1-RV64-NEXT: sd zero, 0(a0)
504 ; LMULMAX1-RV64-NEXT: ret
505 store <4 x i16> zeroinitializer, ptr %p
509 define void @splat_zero_v2i32(ptr %p) {
510 ; LMULMAX8-RV32-LABEL: splat_zero_v2i32:
511 ; LMULMAX8-RV32: # %bb.0:
512 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
513 ; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
514 ; LMULMAX8-RV32-NEXT: vse32.v v8, (a0)
515 ; LMULMAX8-RV32-NEXT: ret
517 ; LMULMAX2-RV32-LABEL: splat_zero_v2i32:
518 ; LMULMAX2-RV32: # %bb.0:
519 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
520 ; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
521 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
522 ; LMULMAX2-RV32-NEXT: ret
524 ; LMULMAX1-RV32-LABEL: splat_zero_v2i32:
525 ; LMULMAX1-RV32: # %bb.0:
526 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
527 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
528 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
529 ; LMULMAX1-RV32-NEXT: ret
531 ; LMULMAX8-RV64-LABEL: splat_zero_v2i32:
532 ; LMULMAX8-RV64: # %bb.0:
533 ; LMULMAX8-RV64-NEXT: sd zero, 0(a0)
534 ; LMULMAX8-RV64-NEXT: ret
536 ; LMULMAX2-RV64-LABEL: splat_zero_v2i32:
537 ; LMULMAX2-RV64: # %bb.0:
538 ; LMULMAX2-RV64-NEXT: sd zero, 0(a0)
539 ; LMULMAX2-RV64-NEXT: ret
541 ; LMULMAX1-RV64-LABEL: splat_zero_v2i32:
542 ; LMULMAX1-RV64: # %bb.0:
543 ; LMULMAX1-RV64-NEXT: sd zero, 0(a0)
544 ; LMULMAX1-RV64-NEXT: ret
545 store <2 x i32> zeroinitializer, ptr %p
549 ; Not a power of two and requires more than two scalar stores.
550 define void @splat_zero_v7i16(ptr %p) {
551 ; CHECK-LABEL: splat_zero_v7i16:
553 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
554 ; CHECK-NEXT: vmv.v.i v8, 0
555 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma
556 ; CHECK-NEXT: vse16.v v8, (a0)
558 store <7 x i16> zeroinitializer, ptr %p
562 define void @splat_allones_v16i8(ptr %x) {
563 ; CHECK-LABEL: splat_allones_v16i8:
565 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
566 ; CHECK-NEXT: vmv.v.i v8, -1
567 ; CHECK-NEXT: vse8.v v8, (a0)
569 %a = insertelement <16 x i8> poison, i8 -1, i32 0
570 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer
571 store <16 x i8> %b, ptr %x
575 define void @splat_allones_v8i16(ptr %x) {
576 ; CHECK-LABEL: splat_allones_v8i16:
578 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
579 ; CHECK-NEXT: vmv.v.i v8, -1
580 ; CHECK-NEXT: vse16.v v8, (a0)
582 %a = insertelement <8 x i16> poison, i16 -1, i32 0
583 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer
584 store <8 x i16> %b, ptr %x
588 define void @splat_allones_v4i32(ptr %x) {
589 ; CHECK-LABEL: splat_allones_v4i32:
591 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
592 ; CHECK-NEXT: vmv.v.i v8, -1
593 ; CHECK-NEXT: vse32.v v8, (a0)
595 %a = insertelement <4 x i32> poison, i32 -1, i32 0
596 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer
597 store <4 x i32> %b, ptr %x
601 define void @splat_allones_v2i64(ptr %x) {
602 ; CHECK-LABEL: splat_allones_v2i64:
604 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
605 ; CHECK-NEXT: vmv.v.i v8, -1
606 ; CHECK-NEXT: vse64.v v8, (a0)
608 %a = insertelement <2 x i64> poison, i64 -1, i32 0
609 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
610 store <2 x i64> %b, ptr %x
614 define void @splat_allones_v32i8(ptr %x) {
615 ; LMULMAX8-LABEL: splat_allones_v32i8:
617 ; LMULMAX8-NEXT: li a1, 32
618 ; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma
619 ; LMULMAX8-NEXT: vmv.v.i v8, -1
620 ; LMULMAX8-NEXT: vse8.v v8, (a0)
623 ; LMULMAX2-LABEL: splat_allones_v32i8:
625 ; LMULMAX2-NEXT: li a1, 32
626 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
627 ; LMULMAX2-NEXT: vmv.v.i v8, -1
628 ; LMULMAX2-NEXT: vse8.v v8, (a0)
631 ; LMULMAX1-LABEL: splat_allones_v32i8:
633 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
634 ; LMULMAX1-NEXT: vmv.v.i v8, -1
635 ; LMULMAX1-NEXT: vse8.v v8, (a0)
636 ; LMULMAX1-NEXT: addi a0, a0, 16
637 ; LMULMAX1-NEXT: vse8.v v8, (a0)
639 %a = insertelement <32 x i8> poison, i8 -1, i32 0
640 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
641 store <32 x i8> %b, ptr %x
645 define void @splat_allones_v16i16(ptr %x) {
646 ; LMULMAX8-LABEL: splat_allones_v16i16:
648 ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma
649 ; LMULMAX8-NEXT: vmv.v.i v8, -1
650 ; LMULMAX8-NEXT: vse16.v v8, (a0)
653 ; LMULMAX2-LABEL: splat_allones_v16i16:
655 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
656 ; LMULMAX2-NEXT: vmv.v.i v8, -1
657 ; LMULMAX2-NEXT: vse16.v v8, (a0)
660 ; LMULMAX1-LABEL: splat_allones_v16i16:
662 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
663 ; LMULMAX1-NEXT: vmv.v.i v8, -1
664 ; LMULMAX1-NEXT: vse16.v v8, (a0)
665 ; LMULMAX1-NEXT: addi a0, a0, 16
666 ; LMULMAX1-NEXT: vse16.v v8, (a0)
668 %a = insertelement <16 x i16> poison, i16 -1, i32 0
669 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
670 store <16 x i16> %b, ptr %x
674 define void @splat_allones_v8i32(ptr %x) {
675 ; LMULMAX8-LABEL: splat_allones_v8i32:
677 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
678 ; LMULMAX8-NEXT: vmv.v.i v8, -1
679 ; LMULMAX8-NEXT: vse32.v v8, (a0)
682 ; LMULMAX2-LABEL: splat_allones_v8i32:
684 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
685 ; LMULMAX2-NEXT: vmv.v.i v8, -1
686 ; LMULMAX2-NEXT: vse32.v v8, (a0)
689 ; LMULMAX1-LABEL: splat_allones_v8i32:
691 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
692 ; LMULMAX1-NEXT: vmv.v.i v8, -1
693 ; LMULMAX1-NEXT: vse32.v v8, (a0)
694 ; LMULMAX1-NEXT: addi a0, a0, 16
695 ; LMULMAX1-NEXT: vse32.v v8, (a0)
697 %a = insertelement <8 x i32> poison, i32 -1, i32 0
698 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
699 store <8 x i32> %b, ptr %x
703 define void @splat_allones_v4i64(ptr %x) {
704 ; LMULMAX8-LABEL: splat_allones_v4i64:
706 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
707 ; LMULMAX8-NEXT: vmv.v.i v8, -1
708 ; LMULMAX8-NEXT: vse64.v v8, (a0)
711 ; LMULMAX2-LABEL: splat_allones_v4i64:
713 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
714 ; LMULMAX2-NEXT: vmv.v.i v8, -1
715 ; LMULMAX2-NEXT: vse64.v v8, (a0)
718 ; LMULMAX1-RV32-LABEL: splat_allones_v4i64:
719 ; LMULMAX1-RV32: # %bb.0:
720 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
721 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1
722 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
723 ; LMULMAX1-RV32-NEXT: addi a0, a0, 16
724 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
725 ; LMULMAX1-RV32-NEXT: ret
727 ; LMULMAX1-RV64-LABEL: splat_allones_v4i64:
728 ; LMULMAX1-RV64: # %bb.0:
729 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
730 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1
731 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
732 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
733 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
734 ; LMULMAX1-RV64-NEXT: ret
735 %a = insertelement <4 x i64> poison, i64 -1, i32 0
736 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
737 store <4 x i64> %b, ptr %x
741 ; This requires a bitcast on RV32 due to type legalization rewriting the
742 ; build_vector to v8i32.
743 ; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x
744 ; with SEW=64 on RV32.
745 define void @splat_allones_with_use_v4i64(ptr %x) {
746 ; LMULMAX8-LABEL: splat_allones_with_use_v4i64:
748 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
749 ; LMULMAX8-NEXT: vle64.v v8, (a0)
750 ; LMULMAX8-NEXT: vadd.vi v8, v8, -1
751 ; LMULMAX8-NEXT: vse64.v v8, (a0)
754 ; LMULMAX2-LABEL: splat_allones_with_use_v4i64:
756 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
757 ; LMULMAX2-NEXT: vle64.v v8, (a0)
758 ; LMULMAX2-NEXT: vadd.vi v8, v8, -1
759 ; LMULMAX2-NEXT: vse64.v v8, (a0)
762 ; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64:
763 ; LMULMAX1-RV32: # %bb.0:
764 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
765 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
766 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
767 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
768 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
769 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
770 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
771 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
772 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
773 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
774 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
775 ; LMULMAX1-RV32-NEXT: ret
777 ; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64:
778 ; LMULMAX1-RV64: # %bb.0:
779 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
780 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
781 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1)
782 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
783 ; LMULMAX1-RV64-NEXT: vadd.vi v8, v8, -1
784 ; LMULMAX1-RV64-NEXT: vadd.vi v9, v9, -1
785 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
786 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
787 ; LMULMAX1-RV64-NEXT: ret
788 %a = load <4 x i64>, ptr %x
789 %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
790 store <4 x i64> %b, ptr %x
794 ; This test used to crash at LMUL=8 when inserting a v16i64 subvector into
795 ; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of
796 ; which exceeded maximum-expected size of 512. The scalable container type of
797 ; nxv8i64 should have been used instead.
798 define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) {
799 ; LMULMAX8-RV32-LABEL: vadd_vx_v16i64:
800 ; LMULMAX8-RV32: # %bb.0:
801 ; LMULMAX8-RV32-NEXT: addi sp, sp, -16
802 ; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16
803 ; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
804 ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0)
805 ; LMULMAX8-RV32-NEXT: sw a2, 12(sp)
806 ; LMULMAX8-RV32-NEXT: sw a1, 8(sp)
807 ; LMULMAX8-RV32-NEXT: addi a0, sp, 8
808 ; LMULMAX8-RV32-NEXT: vlse64.v v16, (a0), zero
809 ; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16
810 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a3)
811 ; LMULMAX8-RV32-NEXT: addi sp, sp, 16
812 ; LMULMAX8-RV32-NEXT: ret
814 ; LMULMAX2-RV32-LABEL: vadd_vx_v16i64:
815 ; LMULMAX2-RV32: # %bb.0:
816 ; LMULMAX2-RV32-NEXT: addi a4, a0, 64
817 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
818 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a4)
819 ; LMULMAX2-RV32-NEXT: addi a4, a0, 96
820 ; LMULMAX2-RV32-NEXT: vle64.v v10, (a4)
821 ; LMULMAX2-RV32-NEXT: vle64.v v12, (a0)
822 ; LMULMAX2-RV32-NEXT: addi a0, a0, 32
823 ; LMULMAX2-RV32-NEXT: vle64.v v14, (a0)
824 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
825 ; LMULMAX2-RV32-NEXT: li a0, 85
826 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0
827 ; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2
828 ; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0
829 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
830 ; LMULMAX2-RV32-NEXT: vadd.vv v14, v14, v16
831 ; LMULMAX2-RV32-NEXT: vadd.vv v12, v12, v16
832 ; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v16
833 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v16
834 ; LMULMAX2-RV32-NEXT: addi a0, a3, 64
835 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
836 ; LMULMAX2-RV32-NEXT: addi a0, a3, 96
837 ; LMULMAX2-RV32-NEXT: vse64.v v10, (a0)
838 ; LMULMAX2-RV32-NEXT: vse64.v v12, (a3)
839 ; LMULMAX2-RV32-NEXT: addi a0, a3, 32
840 ; LMULMAX2-RV32-NEXT: vse64.v v14, (a0)
841 ; LMULMAX2-RV32-NEXT: ret
843 ; LMULMAX1-RV32-LABEL: vadd_vx_v16i64:
844 ; LMULMAX1-RV32: # %bb.0:
845 ; LMULMAX1-RV32-NEXT: addi a4, a0, 96
846 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
847 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a4)
848 ; LMULMAX1-RV32-NEXT: addi a4, a0, 112
849 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a4)
850 ; LMULMAX1-RV32-NEXT: addi a4, a0, 64
851 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a4)
852 ; LMULMAX1-RV32-NEXT: addi a4, a0, 80
853 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a4)
854 ; LMULMAX1-RV32-NEXT: addi a4, a0, 32
855 ; LMULMAX1-RV32-NEXT: vle64.v v12, (a4)
856 ; LMULMAX1-RV32-NEXT: addi a4, a0, 48
857 ; LMULMAX1-RV32-NEXT: vle64.v v13, (a4)
858 ; LMULMAX1-RV32-NEXT: vle64.v v14, (a0)
859 ; LMULMAX1-RV32-NEXT: addi a0, a0, 16
860 ; LMULMAX1-RV32-NEXT: vle64.v v15, (a0)
861 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
862 ; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5
863 ; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2
864 ; LMULMAX1-RV32-NEXT: vmerge.vxm v16, v16, a1, v0
865 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
866 ; LMULMAX1-RV32-NEXT: vadd.vv v15, v15, v16
867 ; LMULMAX1-RV32-NEXT: vadd.vv v14, v14, v16
868 ; LMULMAX1-RV32-NEXT: vadd.vv v13, v13, v16
869 ; LMULMAX1-RV32-NEXT: vadd.vv v12, v12, v16
870 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v11, v16
871 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v16
872 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v16
873 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v16
874 ; LMULMAX1-RV32-NEXT: addi a0, a3, 96
875 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
876 ; LMULMAX1-RV32-NEXT: addi a0, a3, 112
877 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a0)
878 ; LMULMAX1-RV32-NEXT: addi a0, a3, 64
879 ; LMULMAX1-RV32-NEXT: vse64.v v10, (a0)
880 ; LMULMAX1-RV32-NEXT: addi a0, a3, 80
881 ; LMULMAX1-RV32-NEXT: vse64.v v11, (a0)
882 ; LMULMAX1-RV32-NEXT: addi a0, a3, 32
883 ; LMULMAX1-RV32-NEXT: vse64.v v12, (a0)
884 ; LMULMAX1-RV32-NEXT: addi a0, a3, 48
885 ; LMULMAX1-RV32-NEXT: vse64.v v13, (a0)
886 ; LMULMAX1-RV32-NEXT: vse64.v v14, (a3)
887 ; LMULMAX1-RV32-NEXT: addi a3, a3, 16
888 ; LMULMAX1-RV32-NEXT: vse64.v v15, (a3)
889 ; LMULMAX1-RV32-NEXT: ret
891 ; LMULMAX8-RV64-LABEL: vadd_vx_v16i64:
892 ; LMULMAX8-RV64: # %bb.0:
893 ; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
894 ; LMULMAX8-RV64-NEXT: vle64.v v8, (a0)
895 ; LMULMAX8-RV64-NEXT: vadd.vx v8, v8, a1
896 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a2)
897 ; LMULMAX8-RV64-NEXT: ret
899 ; LMULMAX2-RV64-LABEL: vadd_vx_v16i64:
900 ; LMULMAX2-RV64: # %bb.0:
901 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
902 ; LMULMAX2-RV64-NEXT: addi a3, a0, 96
903 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a3)
904 ; LMULMAX2-RV64-NEXT: addi a3, a0, 32
905 ; LMULMAX2-RV64-NEXT: vle64.v v10, (a3)
906 ; LMULMAX2-RV64-NEXT: addi a3, a0, 64
907 ; LMULMAX2-RV64-NEXT: vle64.v v12, (a3)
908 ; LMULMAX2-RV64-NEXT: vle64.v v14, (a0)
909 ; LMULMAX2-RV64-NEXT: vadd.vx v10, v10, a1
910 ; LMULMAX2-RV64-NEXT: vadd.vx v8, v8, a1
911 ; LMULMAX2-RV64-NEXT: vadd.vx v12, v12, a1
912 ; LMULMAX2-RV64-NEXT: vadd.vx v14, v14, a1
913 ; LMULMAX2-RV64-NEXT: vse64.v v14, (a2)
914 ; LMULMAX2-RV64-NEXT: addi a0, a2, 64
915 ; LMULMAX2-RV64-NEXT: vse64.v v12, (a0)
916 ; LMULMAX2-RV64-NEXT: addi a0, a2, 96
917 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
918 ; LMULMAX2-RV64-NEXT: addi a0, a2, 32
919 ; LMULMAX2-RV64-NEXT: vse64.v v10, (a0)
920 ; LMULMAX2-RV64-NEXT: ret
922 ; LMULMAX1-RV64-LABEL: vadd_vx_v16i64:
923 ; LMULMAX1-RV64: # %bb.0:
924 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
925 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
926 ; LMULMAX1-RV64-NEXT: addi a3, a0, 96
927 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a3)
928 ; LMULMAX1-RV64-NEXT: addi a3, a0, 112
929 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a3)
930 ; LMULMAX1-RV64-NEXT: addi a3, a0, 64
931 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a3)
932 ; LMULMAX1-RV64-NEXT: addi a3, a0, 48
933 ; LMULMAX1-RV64-NEXT: vle64.v v12, (a3)
934 ; LMULMAX1-RV64-NEXT: addi a3, a0, 16
935 ; LMULMAX1-RV64-NEXT: vle64.v v13, (a3)
936 ; LMULMAX1-RV64-NEXT: addi a3, a0, 80
937 ; LMULMAX1-RV64-NEXT: addi a0, a0, 32
938 ; LMULMAX1-RV64-NEXT: vle64.v v14, (a0)
939 ; LMULMAX1-RV64-NEXT: vle64.v v15, (a3)
940 ; LMULMAX1-RV64-NEXT: vadd.vx v13, v13, a1
941 ; LMULMAX1-RV64-NEXT: vadd.vx v12, v12, a1
942 ; LMULMAX1-RV64-NEXT: vadd.vx v14, v14, a1
943 ; LMULMAX1-RV64-NEXT: vadd.vx v15, v15, a1
944 ; LMULMAX1-RV64-NEXT: vadd.vx v11, v11, a1
945 ; LMULMAX1-RV64-NEXT: vadd.vx v10, v10, a1
946 ; LMULMAX1-RV64-NEXT: vadd.vx v9, v9, a1
947 ; LMULMAX1-RV64-NEXT: vadd.vx v8, v8, a1
948 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a2)
949 ; LMULMAX1-RV64-NEXT: addi a0, a2, 96
950 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
951 ; LMULMAX1-RV64-NEXT: addi a0, a2, 112
952 ; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
953 ; LMULMAX1-RV64-NEXT: addi a0, a2, 64
954 ; LMULMAX1-RV64-NEXT: vse64.v v11, (a0)
955 ; LMULMAX1-RV64-NEXT: addi a0, a2, 80
956 ; LMULMAX1-RV64-NEXT: vse64.v v15, (a0)
957 ; LMULMAX1-RV64-NEXT: addi a0, a2, 32
958 ; LMULMAX1-RV64-NEXT: vse64.v v14, (a0)
959 ; LMULMAX1-RV64-NEXT: addi a0, a2, 48
960 ; LMULMAX1-RV64-NEXT: vse64.v v12, (a0)
961 ; LMULMAX1-RV64-NEXT: addi a2, a2, 16
962 ; LMULMAX1-RV64-NEXT: vse64.v v13, (a2)
963 ; LMULMAX1-RV64-NEXT: ret
964 %va = load <16 x i64>, ptr %a
965 %head = insertelement <16 x i64> poison, i64 %b, i32 0
966 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer
967 %vc = add <16 x i64> %va, %splat
968 store <16 x i64> %vc, ptr %c