1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define i8 @explode_2xi8(<2 x i8> %v) {
6 ; CHECK-LABEL: explode_2xi8:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vmv.s.x v9, zero
10 ; CHECK-NEXT: vredxor.vs v8, v8, v9
11 ; CHECK-NEXT: vmv.x.s a0, v8
13 %e0 = extractelement <2 x i8> %v, i32 0
14 %e1 = extractelement <2 x i8> %v, i32 1
15 %add0 = xor i8 %e0, %e1
19 define i8 @explode_4xi8(<4 x i8> %v) {
20 ; CHECK-LABEL: explode_4xi8:
22 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
23 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
24 ; CHECK-NEXT: vmv.x.s a0, v9
25 ; CHECK-NEXT: vslidedown.vi v9, v8, 3
26 ; CHECK-NEXT: vmv.x.s a1, v9
27 ; CHECK-NEXT: vmv.s.x v9, zero
28 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
29 ; CHECK-NEXT: vredxor.vs v8, v8, v9
30 ; CHECK-NEXT: vmv.x.s a2, v8
31 ; CHECK-NEXT: add a0, a0, a1
32 ; CHECK-NEXT: add a0, a2, a0
34 %e0 = extractelement <4 x i8> %v, i32 0
35 %e1 = extractelement <4 x i8> %v, i32 1
36 %e2 = extractelement <4 x i8> %v, i32 2
37 %e3 = extractelement <4 x i8> %v, i32 3
38 %add0 = xor i8 %e0, %e1
39 %add1 = add i8 %add0, %e2
40 %add2 = add i8 %add1, %e3
45 define i8 @explode_8xi8(<8 x i8> %v) {
46 ; CHECK-LABEL: explode_8xi8:
48 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
49 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
50 ; CHECK-NEXT: vmv.x.s a0, v9
51 ; CHECK-NEXT: vslidedown.vi v9, v8, 3
52 ; CHECK-NEXT: vmv.x.s a1, v9
53 ; CHECK-NEXT: vslidedown.vi v9, v8, 4
54 ; CHECK-NEXT: vmv.x.s a2, v9
55 ; CHECK-NEXT: vslidedown.vi v9, v8, 5
56 ; CHECK-NEXT: vmv.x.s a3, v9
57 ; CHECK-NEXT: vslidedown.vi v9, v8, 6
58 ; CHECK-NEXT: vmv.x.s a4, v9
59 ; CHECK-NEXT: vslidedown.vi v9, v8, 7
60 ; CHECK-NEXT: vmv.x.s a5, v9
61 ; CHECK-NEXT: vmv.s.x v9, zero
62 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
63 ; CHECK-NEXT: vredxor.vs v8, v8, v9
64 ; CHECK-NEXT: vmv.x.s a6, v8
65 ; CHECK-NEXT: add a0, a0, a1
66 ; CHECK-NEXT: add a0, a6, a0
67 ; CHECK-NEXT: add a2, a2, a3
68 ; CHECK-NEXT: add a2, a2, a4
69 ; CHECK-NEXT: add a0, a0, a2
70 ; CHECK-NEXT: add a0, a0, a5
72 %e0 = extractelement <8 x i8> %v, i32 0
73 %e1 = extractelement <8 x i8> %v, i32 1
74 %e2 = extractelement <8 x i8> %v, i32 2
75 %e3 = extractelement <8 x i8> %v, i32 3
76 %e4 = extractelement <8 x i8> %v, i32 4
77 %e5 = extractelement <8 x i8> %v, i32 5
78 %e6 = extractelement <8 x i8> %v, i32 6
79 %e7 = extractelement <8 x i8> %v, i32 7
80 %add0 = xor i8 %e0, %e1
81 %add1 = add i8 %add0, %e2
82 %add2 = add i8 %add1, %e3
83 %add3 = add i8 %add2, %e4
84 %add4 = add i8 %add3, %e5
85 %add5 = add i8 %add4, %e6
86 %add6 = add i8 %add5, %e7
90 define i8 @explode_16xi8(<16 x i8> %v) {
91 ; CHECK-LABEL: explode_16xi8:
93 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
94 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
95 ; CHECK-NEXT: vmv.x.s a0, v9
96 ; CHECK-NEXT: vslidedown.vi v9, v8, 3
97 ; CHECK-NEXT: vmv.x.s a1, v9
98 ; CHECK-NEXT: vslidedown.vi v9, v8, 4
99 ; CHECK-NEXT: vmv.x.s a2, v9
100 ; CHECK-NEXT: vslidedown.vi v9, v8, 5
101 ; CHECK-NEXT: vmv.x.s a3, v9
102 ; CHECK-NEXT: vslidedown.vi v9, v8, 6
103 ; CHECK-NEXT: vmv.x.s a4, v9
104 ; CHECK-NEXT: vslidedown.vi v9, v8, 7
105 ; CHECK-NEXT: vmv.x.s a5, v9
106 ; CHECK-NEXT: vslidedown.vi v9, v8, 8
107 ; CHECK-NEXT: vmv.x.s a6, v9
108 ; CHECK-NEXT: vslidedown.vi v9, v8, 9
109 ; CHECK-NEXT: vmv.x.s a7, v9
110 ; CHECK-NEXT: vslidedown.vi v9, v8, 10
111 ; CHECK-NEXT: vmv.x.s t0, v9
112 ; CHECK-NEXT: vslidedown.vi v9, v8, 11
113 ; CHECK-NEXT: vmv.x.s t1, v9
114 ; CHECK-NEXT: vslidedown.vi v9, v8, 12
115 ; CHECK-NEXT: vmv.x.s t2, v9
116 ; CHECK-NEXT: vslidedown.vi v9, v8, 13
117 ; CHECK-NEXT: vmv.x.s t3, v9
118 ; CHECK-NEXT: vslidedown.vi v9, v8, 14
119 ; CHECK-NEXT: vmv.x.s t4, v9
120 ; CHECK-NEXT: vslidedown.vi v9, v8, 15
121 ; CHECK-NEXT: vmv.x.s t5, v9
122 ; CHECK-NEXT: vmv.s.x v9, zero
123 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
124 ; CHECK-NEXT: vredxor.vs v8, v8, v9
125 ; CHECK-NEXT: vmv.x.s t6, v8
126 ; CHECK-NEXT: add a0, a0, a1
127 ; CHECK-NEXT: add a0, t6, a0
128 ; CHECK-NEXT: add a2, a2, a3
129 ; CHECK-NEXT: add a2, a2, a4
130 ; CHECK-NEXT: add a0, a0, a2
131 ; CHECK-NEXT: add a5, a5, a6
132 ; CHECK-NEXT: add a5, a5, a7
133 ; CHECK-NEXT: add a5, a5, t0
134 ; CHECK-NEXT: add a0, a0, a5
135 ; CHECK-NEXT: add t1, t1, t2
136 ; CHECK-NEXT: add t1, t1, t3
137 ; CHECK-NEXT: add t1, t1, t4
138 ; CHECK-NEXT: add t1, t1, t5
139 ; CHECK-NEXT: add a0, a0, t1
141 %e0 = extractelement <16 x i8> %v, i32 0
142 %e1 = extractelement <16 x i8> %v, i32 1
143 %e2 = extractelement <16 x i8> %v, i32 2
144 %e3 = extractelement <16 x i8> %v, i32 3
145 %e4 = extractelement <16 x i8> %v, i32 4
146 %e5 = extractelement <16 x i8> %v, i32 5
147 %e6 = extractelement <16 x i8> %v, i32 6
148 %e7 = extractelement <16 x i8> %v, i32 7
149 %e8 = extractelement <16 x i8> %v, i32 8
150 %e9 = extractelement <16 x i8> %v, i32 9
151 %e10 = extractelement <16 x i8> %v, i32 10
152 %e11 = extractelement <16 x i8> %v, i32 11
153 %e12 = extractelement <16 x i8> %v, i32 12
154 %e13 = extractelement <16 x i8> %v, i32 13
155 %e14 = extractelement <16 x i8> %v, i32 14
156 %e15 = extractelement <16 x i8> %v, i32 15
157 %add0 = xor i8 %e0, %e1
158 %add1 = add i8 %add0, %e2
159 %add2 = add i8 %add1, %e3
160 %add3 = add i8 %add2, %e4
161 %add4 = add i8 %add3, %e5
162 %add5 = add i8 %add4, %e6
163 %add6 = add i8 %add5, %e7
164 %add7 = add i8 %add6, %e8
165 %add8 = add i8 %add7, %e9
166 %add9 = add i8 %add8, %e10
167 %add10 = add i8 %add9, %e11
168 %add11 = add i8 %add10, %e12
169 %add12 = add i8 %add11, %e13
170 %add13 = add i8 %add12, %e14
171 %add14 = add i8 %add13, %e15
175 define i16 @explode_2xi16(<2 x i16> %v) {
176 ; CHECK-LABEL: explode_2xi16:
178 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
179 ; CHECK-NEXT: vmv.s.x v9, zero
180 ; CHECK-NEXT: vredxor.vs v8, v8, v9
181 ; CHECK-NEXT: vmv.x.s a0, v8
183 %e0 = extractelement <2 x i16> %v, i32 0
184 %e1 = extractelement <2 x i16> %v, i32 1
185 %add0 = xor i16 %e0, %e1
189 define i16 @explode_4xi16(<4 x i16> %v) {
190 ; CHECK-LABEL: explode_4xi16:
192 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
193 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
194 ; CHECK-NEXT: vmv.x.s a0, v9
195 ; CHECK-NEXT: vslidedown.vi v9, v8, 3
196 ; CHECK-NEXT: vmv.x.s a1, v9
197 ; CHECK-NEXT: vmv.s.x v9, zero
198 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
199 ; CHECK-NEXT: vredxor.vs v8, v8, v9
200 ; CHECK-NEXT: vmv.x.s a2, v8
201 ; CHECK-NEXT: add a0, a0, a1
202 ; CHECK-NEXT: add a0, a2, a0
204 %e0 = extractelement <4 x i16> %v, i32 0
205 %e1 = extractelement <4 x i16> %v, i32 1
206 %e2 = extractelement <4 x i16> %v, i32 2
207 %e3 = extractelement <4 x i16> %v, i32 3
208 %add0 = xor i16 %e0, %e1
209 %add1 = add i16 %add0, %e2
210 %add2 = add i16 %add1, %e3
215 define i16 @explode_8xi16(<8 x i16> %v) {
216 ; CHECK-LABEL: explode_8xi16:
218 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
219 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
220 ; CHECK-NEXT: vmv.x.s a0, v9
221 ; CHECK-NEXT: vslidedown.vi v9, v8, 3
222 ; CHECK-NEXT: vmv.x.s a1, v9
223 ; CHECK-NEXT: vslidedown.vi v9, v8, 4
224 ; CHECK-NEXT: vmv.x.s a2, v9
225 ; CHECK-NEXT: vslidedown.vi v9, v8, 5
226 ; CHECK-NEXT: vmv.x.s a3, v9
227 ; CHECK-NEXT: vslidedown.vi v9, v8, 6
228 ; CHECK-NEXT: vmv.x.s a4, v9
229 ; CHECK-NEXT: vslidedown.vi v9, v8, 7
230 ; CHECK-NEXT: vmv.x.s a5, v9
231 ; CHECK-NEXT: vmv.s.x v9, zero
232 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
233 ; CHECK-NEXT: vredxor.vs v8, v8, v9
234 ; CHECK-NEXT: vmv.x.s a6, v8
235 ; CHECK-NEXT: add a0, a0, a1
236 ; CHECK-NEXT: add a0, a6, a0
237 ; CHECK-NEXT: add a2, a2, a3
238 ; CHECK-NEXT: add a2, a2, a4
239 ; CHECK-NEXT: add a0, a0, a2
240 ; CHECK-NEXT: add a0, a0, a5
242 %e0 = extractelement <8 x i16> %v, i32 0
243 %e1 = extractelement <8 x i16> %v, i32 1
244 %e2 = extractelement <8 x i16> %v, i32 2
245 %e3 = extractelement <8 x i16> %v, i32 3
246 %e4 = extractelement <8 x i16> %v, i32 4
247 %e5 = extractelement <8 x i16> %v, i32 5
248 %e6 = extractelement <8 x i16> %v, i32 6
249 %e7 = extractelement <8 x i16> %v, i32 7
250 %add0 = xor i16 %e0, %e1
251 %add1 = add i16 %add0, %e2
252 %add2 = add i16 %add1, %e3
253 %add3 = add i16 %add2, %e4
254 %add4 = add i16 %add3, %e5
255 %add5 = add i16 %add4, %e6
256 %add6 = add i16 %add5, %e7
260 define i16 @explode_16xi16(<16 x i16> %v) {
261 ; CHECK-LABEL: explode_16xi16:
263 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
264 ; CHECK-NEXT: vslidedown.vi v10, v8, 2
265 ; CHECK-NEXT: vmv.x.s a0, v10
266 ; CHECK-NEXT: vslidedown.vi v10, v8, 3
267 ; CHECK-NEXT: vmv.x.s a1, v10
268 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
269 ; CHECK-NEXT: vmv.x.s a2, v10
270 ; CHECK-NEXT: vslidedown.vi v10, v8, 5
271 ; CHECK-NEXT: vmv.x.s a3, v10
272 ; CHECK-NEXT: vslidedown.vi v10, v8, 6
273 ; CHECK-NEXT: vmv.x.s a4, v10
274 ; CHECK-NEXT: vslidedown.vi v10, v8, 7
275 ; CHECK-NEXT: vmv.x.s a5, v10
276 ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
277 ; CHECK-NEXT: vslidedown.vi v10, v8, 8
278 ; CHECK-NEXT: vmv.x.s a6, v10
279 ; CHECK-NEXT: vslidedown.vi v10, v8, 9
280 ; CHECK-NEXT: vmv.x.s a7, v10
281 ; CHECK-NEXT: vslidedown.vi v10, v8, 10
282 ; CHECK-NEXT: vmv.x.s t0, v10
283 ; CHECK-NEXT: vslidedown.vi v10, v8, 11
284 ; CHECK-NEXT: vmv.x.s t1, v10
285 ; CHECK-NEXT: vslidedown.vi v10, v8, 12
286 ; CHECK-NEXT: vmv.x.s t2, v10
287 ; CHECK-NEXT: vslidedown.vi v10, v8, 13
288 ; CHECK-NEXT: vmv.x.s t3, v10
289 ; CHECK-NEXT: vslidedown.vi v10, v8, 14
290 ; CHECK-NEXT: vmv.x.s t4, v10
291 ; CHECK-NEXT: vslidedown.vi v10, v8, 15
292 ; CHECK-NEXT: vmv.x.s t5, v10
293 ; CHECK-NEXT: vmv.s.x v9, zero
294 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
295 ; CHECK-NEXT: vredxor.vs v8, v8, v9
296 ; CHECK-NEXT: vmv.x.s t6, v8
297 ; CHECK-NEXT: add a0, a0, a1
298 ; CHECK-NEXT: add a0, t6, a0
299 ; CHECK-NEXT: add a2, a2, a3
300 ; CHECK-NEXT: add a2, a2, a4
301 ; CHECK-NEXT: add a0, a0, a2
302 ; CHECK-NEXT: add a5, a5, a6
303 ; CHECK-NEXT: add a5, a5, a7
304 ; CHECK-NEXT: add a5, a5, t0
305 ; CHECK-NEXT: add a0, a0, a5
306 ; CHECK-NEXT: add t1, t1, t2
307 ; CHECK-NEXT: add t1, t1, t3
308 ; CHECK-NEXT: add t1, t1, t4
309 ; CHECK-NEXT: add t1, t1, t5
310 ; CHECK-NEXT: add a0, a0, t1
312 %e0 = extractelement <16 x i16> %v, i32 0
313 %e1 = extractelement <16 x i16> %v, i32 1
314 %e2 = extractelement <16 x i16> %v, i32 2
315 %e3 = extractelement <16 x i16> %v, i32 3
316 %e4 = extractelement <16 x i16> %v, i32 4
317 %e5 = extractelement <16 x i16> %v, i32 5
318 %e6 = extractelement <16 x i16> %v, i32 6
319 %e7 = extractelement <16 x i16> %v, i32 7
320 %e8 = extractelement <16 x i16> %v, i32 8
321 %e9 = extractelement <16 x i16> %v, i32 9
322 %e10 = extractelement <16 x i16> %v, i32 10
323 %e11 = extractelement <16 x i16> %v, i32 11
324 %e12 = extractelement <16 x i16> %v, i32 12
325 %e13 = extractelement <16 x i16> %v, i32 13
326 %e14 = extractelement <16 x i16> %v, i32 14
327 %e15 = extractelement <16 x i16> %v, i32 15
328 %add0 = xor i16 %e0, %e1
329 %add1 = add i16 %add0, %e2
330 %add2 = add i16 %add1, %e3
331 %add3 = add i16 %add2, %e4
332 %add4 = add i16 %add3, %e5
333 %add5 = add i16 %add4, %e6
334 %add6 = add i16 %add5, %e7
335 %add7 = add i16 %add6, %e8
336 %add8 = add i16 %add7, %e9
337 %add9 = add i16 %add8, %e10
338 %add10 = add i16 %add9, %e11
339 %add11 = add i16 %add10, %e12
340 %add12 = add i16 %add11, %e13
341 %add13 = add i16 %add12, %e14
342 %add14 = add i16 %add13, %e15
346 define i32 @explode_2xi32(<2 x i32> %v) {
347 ; CHECK-LABEL: explode_2xi32:
349 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
350 ; CHECK-NEXT: vmv.s.x v9, zero
351 ; CHECK-NEXT: vredxor.vs v8, v8, v9
352 ; CHECK-NEXT: vmv.x.s a0, v8
354 %e0 = extractelement <2 x i32> %v, i32 0
355 %e1 = extractelement <2 x i32> %v, i32 1
356 %add0 = xor i32 %e0, %e1
360 define i32 @explode_4xi32(<4 x i32> %v) {
361 ; RV32-LABEL: explode_4xi32:
363 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
364 ; RV32-NEXT: vslidedown.vi v9, v8, 2
365 ; RV32-NEXT: vmv.x.s a0, v9
366 ; RV32-NEXT: vslidedown.vi v9, v8, 3
367 ; RV32-NEXT: vmv.x.s a1, v9
368 ; RV32-NEXT: vmv.s.x v9, zero
369 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
370 ; RV32-NEXT: vredxor.vs v8, v8, v9
371 ; RV32-NEXT: vmv.x.s a2, v8
372 ; RV32-NEXT: add a0, a0, a1
373 ; RV32-NEXT: add a0, a2, a0
376 ; RV64-LABEL: explode_4xi32:
378 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
379 ; RV64-NEXT: vslidedown.vi v9, v8, 2
380 ; RV64-NEXT: vmv.x.s a0, v9
381 ; RV64-NEXT: vslidedown.vi v9, v8, 3
382 ; RV64-NEXT: vmv.x.s a1, v9
383 ; RV64-NEXT: vmv.s.x v9, zero
384 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
385 ; RV64-NEXT: vredxor.vs v8, v8, v9
386 ; RV64-NEXT: vmv.x.s a2, v8
387 ; RV64-NEXT: add a0, a0, a1
388 ; RV64-NEXT: addw a0, a2, a0
390 %e0 = extractelement <4 x i32> %v, i32 0
391 %e1 = extractelement <4 x i32> %v, i32 1
392 %e2 = extractelement <4 x i32> %v, i32 2
393 %e3 = extractelement <4 x i32> %v, i32 3
394 %add0 = xor i32 %e0, %e1
395 %add1 = add i32 %add0, %e2
396 %add2 = add i32 %add1, %e3
401 define i32 @explode_8xi32(<8 x i32> %v) {
402 ; RV32-LABEL: explode_8xi32:
404 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
405 ; RV32-NEXT: vslidedown.vi v10, v8, 2
406 ; RV32-NEXT: vmv.x.s a0, v10
407 ; RV32-NEXT: vslidedown.vi v10, v8, 3
408 ; RV32-NEXT: vmv.x.s a1, v10
409 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
410 ; RV32-NEXT: vslidedown.vi v10, v8, 4
411 ; RV32-NEXT: vmv.x.s a2, v10
412 ; RV32-NEXT: vslidedown.vi v10, v8, 5
413 ; RV32-NEXT: vmv.x.s a3, v10
414 ; RV32-NEXT: vslidedown.vi v10, v8, 6
415 ; RV32-NEXT: vmv.x.s a4, v10
416 ; RV32-NEXT: vslidedown.vi v10, v8, 7
417 ; RV32-NEXT: vmv.x.s a5, v10
418 ; RV32-NEXT: vmv.s.x v9, zero
419 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
420 ; RV32-NEXT: vredxor.vs v8, v8, v9
421 ; RV32-NEXT: vmv.x.s a6, v8
422 ; RV32-NEXT: add a0, a0, a1
423 ; RV32-NEXT: add a0, a6, a0
424 ; RV32-NEXT: add a2, a2, a3
425 ; RV32-NEXT: add a2, a2, a4
426 ; RV32-NEXT: add a0, a0, a2
427 ; RV32-NEXT: add a0, a0, a5
430 ; RV64-LABEL: explode_8xi32:
432 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
433 ; RV64-NEXT: vslidedown.vi v10, v8, 2
434 ; RV64-NEXT: vmv.x.s a0, v10
435 ; RV64-NEXT: vslidedown.vi v10, v8, 3
436 ; RV64-NEXT: vmv.x.s a1, v10
437 ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
438 ; RV64-NEXT: vslidedown.vi v10, v8, 4
439 ; RV64-NEXT: vmv.x.s a2, v10
440 ; RV64-NEXT: vslidedown.vi v10, v8, 5
441 ; RV64-NEXT: vmv.x.s a3, v10
442 ; RV64-NEXT: vslidedown.vi v10, v8, 6
443 ; RV64-NEXT: vmv.x.s a4, v10
444 ; RV64-NEXT: vslidedown.vi v10, v8, 7
445 ; RV64-NEXT: vmv.x.s a5, v10
446 ; RV64-NEXT: vmv.s.x v9, zero
447 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
448 ; RV64-NEXT: vredxor.vs v8, v8, v9
449 ; RV64-NEXT: vmv.x.s a6, v8
450 ; RV64-NEXT: add a0, a0, a1
451 ; RV64-NEXT: add a0, a6, a0
452 ; RV64-NEXT: add a2, a2, a3
453 ; RV64-NEXT: add a2, a2, a4
454 ; RV64-NEXT: add a0, a0, a2
455 ; RV64-NEXT: addw a0, a0, a5
457 %e0 = extractelement <8 x i32> %v, i32 0
458 %e1 = extractelement <8 x i32> %v, i32 1
459 %e2 = extractelement <8 x i32> %v, i32 2
460 %e3 = extractelement <8 x i32> %v, i32 3
461 %e4 = extractelement <8 x i32> %v, i32 4
462 %e5 = extractelement <8 x i32> %v, i32 5
463 %e6 = extractelement <8 x i32> %v, i32 6
464 %e7 = extractelement <8 x i32> %v, i32 7
465 %add0 = xor i32 %e0, %e1
466 %add1 = add i32 %add0, %e2
467 %add2 = add i32 %add1, %e3
468 %add3 = add i32 %add2, %e4
469 %add4 = add i32 %add3, %e5
470 %add5 = add i32 %add4, %e6
471 %add6 = add i32 %add5, %e7
475 define i32 @explode_16xi32(<16 x i32> %v) {
476 ; RV32-LABEL: explode_16xi32:
478 ; RV32-NEXT: addi sp, sp, -128
479 ; RV32-NEXT: .cfi_def_cfa_offset 128
480 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
481 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
482 ; RV32-NEXT: .cfi_offset ra, -4
483 ; RV32-NEXT: .cfi_offset s0, -8
484 ; RV32-NEXT: addi s0, sp, 128
485 ; RV32-NEXT: .cfi_def_cfa s0, 0
486 ; RV32-NEXT: andi sp, sp, -64
487 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
488 ; RV32-NEXT: vslidedown.vi v12, v8, 2
489 ; RV32-NEXT: vmv.x.s a0, v12
490 ; RV32-NEXT: vslidedown.vi v12, v8, 3
491 ; RV32-NEXT: vmv.x.s a1, v12
492 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
493 ; RV32-NEXT: vslidedown.vi v12, v8, 4
494 ; RV32-NEXT: vmv.x.s a2, v12
495 ; RV32-NEXT: vslidedown.vi v12, v8, 5
496 ; RV32-NEXT: vmv.x.s a3, v12
497 ; RV32-NEXT: vslidedown.vi v12, v8, 6
498 ; RV32-NEXT: vmv.x.s a4, v12
499 ; RV32-NEXT: vslidedown.vi v12, v8, 7
500 ; RV32-NEXT: vmv.x.s a5, v12
501 ; RV32-NEXT: mv a6, sp
502 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
503 ; RV32-NEXT: vse32.v v8, (a6)
504 ; RV32-NEXT: lw a6, 32(sp)
505 ; RV32-NEXT: lw a7, 36(sp)
506 ; RV32-NEXT: lw t0, 40(sp)
507 ; RV32-NEXT: lw t1, 44(sp)
508 ; RV32-NEXT: lw t2, 48(sp)
509 ; RV32-NEXT: lw t3, 52(sp)
510 ; RV32-NEXT: lw t4, 56(sp)
511 ; RV32-NEXT: lw t5, 60(sp)
512 ; RV32-NEXT: vmv.s.x v9, zero
513 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
514 ; RV32-NEXT: vredxor.vs v8, v8, v9
515 ; RV32-NEXT: vmv.x.s t6, v8
516 ; RV32-NEXT: add a0, a0, a1
517 ; RV32-NEXT: add a0, t6, a0
518 ; RV32-NEXT: add a2, a2, a3
519 ; RV32-NEXT: add a2, a2, a4
520 ; RV32-NEXT: add a0, a0, a2
521 ; RV32-NEXT: add a5, a5, a6
522 ; RV32-NEXT: add a0, a0, a5
523 ; RV32-NEXT: add a7, a7, t0
524 ; RV32-NEXT: add a7, a7, t1
525 ; RV32-NEXT: add a0, a0, a7
526 ; RV32-NEXT: add t2, t2, t3
527 ; RV32-NEXT: add t2, t2, t4
528 ; RV32-NEXT: add t2, t2, t5
529 ; RV32-NEXT: add a0, a0, t2
530 ; RV32-NEXT: addi sp, s0, -128
531 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
532 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
533 ; RV32-NEXT: addi sp, sp, 128
536 ; RV64-LABEL: explode_16xi32:
538 ; RV64-NEXT: addi sp, sp, -128
539 ; RV64-NEXT: .cfi_def_cfa_offset 128
540 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
541 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
542 ; RV64-NEXT: .cfi_offset ra, -8
543 ; RV64-NEXT: .cfi_offset s0, -16
544 ; RV64-NEXT: addi s0, sp, 128
545 ; RV64-NEXT: .cfi_def_cfa s0, 0
546 ; RV64-NEXT: andi sp, sp, -64
547 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
548 ; RV64-NEXT: vslidedown.vi v12, v8, 2
549 ; RV64-NEXT: vmv.x.s a0, v12
550 ; RV64-NEXT: vslidedown.vi v12, v8, 3
551 ; RV64-NEXT: vmv.x.s a1, v12
552 ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
553 ; RV64-NEXT: vslidedown.vi v12, v8, 4
554 ; RV64-NEXT: vmv.x.s a2, v12
555 ; RV64-NEXT: vslidedown.vi v12, v8, 5
556 ; RV64-NEXT: vmv.x.s a3, v12
557 ; RV64-NEXT: vslidedown.vi v12, v8, 6
558 ; RV64-NEXT: vmv.x.s a4, v12
559 ; RV64-NEXT: vslidedown.vi v12, v8, 7
560 ; RV64-NEXT: vmv.x.s a5, v12
561 ; RV64-NEXT: mv a6, sp
562 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
563 ; RV64-NEXT: vse32.v v8, (a6)
564 ; RV64-NEXT: lw a6, 32(sp)
565 ; RV64-NEXT: lw a7, 36(sp)
566 ; RV64-NEXT: lw t0, 40(sp)
567 ; RV64-NEXT: lw t1, 44(sp)
568 ; RV64-NEXT: lw t2, 48(sp)
569 ; RV64-NEXT: lw t3, 52(sp)
570 ; RV64-NEXT: lw t4, 56(sp)
571 ; RV64-NEXT: lw t5, 60(sp)
572 ; RV64-NEXT: vmv.s.x v9, zero
573 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
574 ; RV64-NEXT: vredxor.vs v8, v8, v9
575 ; RV64-NEXT: vmv.x.s t6, v8
576 ; RV64-NEXT: add a0, a0, a1
577 ; RV64-NEXT: add a0, t6, a0
578 ; RV64-NEXT: add a2, a2, a3
579 ; RV64-NEXT: add a2, a2, a4
580 ; RV64-NEXT: add a0, a0, a2
581 ; RV64-NEXT: add a5, a5, a6
582 ; RV64-NEXT: add a0, a0, a5
583 ; RV64-NEXT: add a7, a7, t0
584 ; RV64-NEXT: add a7, a7, t1
585 ; RV64-NEXT: add a0, a0, a7
586 ; RV64-NEXT: add t2, t2, t3
587 ; RV64-NEXT: add t2, t2, t4
588 ; RV64-NEXT: add t2, t2, t5
589 ; RV64-NEXT: addw a0, a0, t2
590 ; RV64-NEXT: addi sp, s0, -128
591 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
592 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
593 ; RV64-NEXT: addi sp, sp, 128
595 %e0 = extractelement <16 x i32> %v, i32 0
596 %e1 = extractelement <16 x i32> %v, i32 1
597 %e2 = extractelement <16 x i32> %v, i32 2
598 %e3 = extractelement <16 x i32> %v, i32 3
599 %e4 = extractelement <16 x i32> %v, i32 4
600 %e5 = extractelement <16 x i32> %v, i32 5
601 %e6 = extractelement <16 x i32> %v, i32 6
602 %e7 = extractelement <16 x i32> %v, i32 7
603 %e8 = extractelement <16 x i32> %v, i32 8
604 %e9 = extractelement <16 x i32> %v, i32 9
605 %e10 = extractelement <16 x i32> %v, i32 10
606 %e11 = extractelement <16 x i32> %v, i32 11
607 %e12 = extractelement <16 x i32> %v, i32 12
608 %e13 = extractelement <16 x i32> %v, i32 13
609 %e14 = extractelement <16 x i32> %v, i32 14
610 %e15 = extractelement <16 x i32> %v, i32 15
611 %add0 = xor i32 %e0, %e1
612 %add1 = add i32 %add0, %e2
613 %add2 = add i32 %add1, %e3
614 %add3 = add i32 %add2, %e4
615 %add4 = add i32 %add3, %e5
616 %add5 = add i32 %add4, %e6
617 %add6 = add i32 %add5, %e7
618 %add7 = add i32 %add6, %e8
619 %add8 = add i32 %add7, %e9
620 %add9 = add i32 %add8, %e10
621 %add10 = add i32 %add9, %e11
622 %add11 = add i32 %add10, %e12
623 %add12 = add i32 %add11, %e13
624 %add13 = add i32 %add12, %e14
625 %add14 = add i32 %add13, %e15
629 define i64 @explode_2xi64(<2 x i64> %v) {
630 ; RV32-LABEL: explode_2xi64:
632 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
633 ; RV32-NEXT: vmv.s.x v9, zero
634 ; RV32-NEXT: vredxor.vs v8, v8, v9
635 ; RV32-NEXT: vmv.x.s a0, v8
636 ; RV32-NEXT: li a1, 32
637 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
638 ; RV32-NEXT: vsrl.vx v8, v8, a1
639 ; RV32-NEXT: vmv.x.s a1, v8
642 ; RV64-LABEL: explode_2xi64:
644 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
645 ; RV64-NEXT: vmv.s.x v9, zero
646 ; RV64-NEXT: vredxor.vs v8, v8, v9
647 ; RV64-NEXT: vmv.x.s a0, v8
649 %e0 = extractelement <2 x i64> %v, i32 0
650 %e1 = extractelement <2 x i64> %v, i32 1
651 %add0 = xor i64 %e0, %e1
655 define i64 @explode_4xi64(<4 x i64> %v) {
656 ; RV32-LABEL: explode_4xi64:
658 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
659 ; RV32-NEXT: vslidedown.vi v10, v8, 2
660 ; RV32-NEXT: li a0, 32
661 ; RV32-NEXT: vsrl.vx v12, v10, a0
662 ; RV32-NEXT: vmv.x.s a1, v12
663 ; RV32-NEXT: vmv.x.s a2, v10
664 ; RV32-NEXT: vslidedown.vi v10, v8, 3
665 ; RV32-NEXT: vsrl.vx v12, v10, a0
666 ; RV32-NEXT: vmv.x.s a3, v12
667 ; RV32-NEXT: vmv.x.s a4, v10
668 ; RV32-NEXT: vmv.s.x v9, zero
669 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
670 ; RV32-NEXT: vredxor.vs v8, v8, v9
671 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
672 ; RV32-NEXT: vsrl.vx v9, v8, a0
673 ; RV32-NEXT: vmv.x.s a0, v9
674 ; RV32-NEXT: vmv.x.s a5, v8
675 ; RV32-NEXT: add a2, a5, a2
676 ; RV32-NEXT: sltu a5, a2, a5
677 ; RV32-NEXT: add a0, a0, a1
678 ; RV32-NEXT: add a0, a0, a5
679 ; RV32-NEXT: add a1, a0, a3
680 ; RV32-NEXT: add a0, a2, a4
681 ; RV32-NEXT: sltu a2, a0, a2
682 ; RV32-NEXT: add a1, a1, a2
685 ; RV64-LABEL: explode_4xi64:
687 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
688 ; RV64-NEXT: vslidedown.vi v10, v8, 2
689 ; RV64-NEXT: vmv.x.s a0, v10
690 ; RV64-NEXT: vslidedown.vi v10, v8, 3
691 ; RV64-NEXT: vmv.x.s a1, v10
692 ; RV64-NEXT: vmv.s.x v9, zero
693 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
694 ; RV64-NEXT: vredxor.vs v8, v8, v9
695 ; RV64-NEXT: vmv.x.s a2, v8
696 ; RV64-NEXT: add a0, a0, a1
697 ; RV64-NEXT: add a0, a2, a0
699 %e0 = extractelement <4 x i64> %v, i32 0
700 %e1 = extractelement <4 x i64> %v, i32 1
701 %e2 = extractelement <4 x i64> %v, i32 2
702 %e3 = extractelement <4 x i64> %v, i32 3
703 %add0 = xor i64 %e0, %e1
704 %add1 = add i64 %add0, %e2
705 %add2 = add i64 %add1, %e3
710 define i64 @explode_8xi64(<8 x i64> %v) {
711 ; RV32-LABEL: explode_8xi64:
713 ; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
714 ; RV32-NEXT: vslidedown.vi v12, v8, 2
715 ; RV32-NEXT: li a0, 32
716 ; RV32-NEXT: vsrl.vx v16, v12, a0
717 ; RV32-NEXT: vmv.x.s a1, v16
718 ; RV32-NEXT: vmv.x.s a2, v12
719 ; RV32-NEXT: vslidedown.vi v12, v8, 3
720 ; RV32-NEXT: vsrl.vx v16, v12, a0
721 ; RV32-NEXT: vmv.x.s a3, v16
722 ; RV32-NEXT: vmv.x.s a4, v12
723 ; RV32-NEXT: vslidedown.vi v12, v8, 4
724 ; RV32-NEXT: vsrl.vx v16, v12, a0
725 ; RV32-NEXT: vmv.x.s a5, v16
726 ; RV32-NEXT: vmv.x.s a6, v12
727 ; RV32-NEXT: vslidedown.vi v12, v8, 5
728 ; RV32-NEXT: vsrl.vx v16, v12, a0
729 ; RV32-NEXT: vmv.x.s a7, v16
730 ; RV32-NEXT: vmv.x.s t0, v12
731 ; RV32-NEXT: vslidedown.vi v12, v8, 6
732 ; RV32-NEXT: vsrl.vx v16, v12, a0
733 ; RV32-NEXT: vmv.x.s t1, v16
734 ; RV32-NEXT: vmv.x.s t2, v12
735 ; RV32-NEXT: vslidedown.vi v12, v8, 7
736 ; RV32-NEXT: vsrl.vx v16, v12, a0
737 ; RV32-NEXT: vmv.x.s t3, v16
738 ; RV32-NEXT: vmv.x.s t4, v12
739 ; RV32-NEXT: vmv.s.x v9, zero
740 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
741 ; RV32-NEXT: vredxor.vs v8, v8, v9
742 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
743 ; RV32-NEXT: vsrl.vx v9, v8, a0
744 ; RV32-NEXT: vmv.x.s a0, v9
745 ; RV32-NEXT: vmv.x.s t5, v8
746 ; RV32-NEXT: add a2, t5, a2
747 ; RV32-NEXT: sltu t5, a2, t5
748 ; RV32-NEXT: add a0, a0, a1
749 ; RV32-NEXT: add a0, a0, t5
750 ; RV32-NEXT: add a0, a0, a3
751 ; RV32-NEXT: add a4, a2, a4
752 ; RV32-NEXT: sltu a1, a4, a2
753 ; RV32-NEXT: add a1, a1, a5
754 ; RV32-NEXT: add a0, a0, a1
755 ; RV32-NEXT: add a6, a4, a6
756 ; RV32-NEXT: sltu a1, a6, a4
757 ; RV32-NEXT: add a1, a1, a7
758 ; RV32-NEXT: add a0, a0, a1
759 ; RV32-NEXT: add t0, a6, t0
760 ; RV32-NEXT: sltu a1, t0, a6
761 ; RV32-NEXT: add a1, a1, t1
762 ; RV32-NEXT: add a0, a0, a1
763 ; RV32-NEXT: add t2, t0, t2
764 ; RV32-NEXT: sltu a1, t2, t0
765 ; RV32-NEXT: add a1, a1, t3
766 ; RV32-NEXT: add a1, a0, a1
767 ; RV32-NEXT: add a0, t2, t4
768 ; RV32-NEXT: sltu a2, a0, t2
769 ; RV32-NEXT: add a1, a1, a2
772 ; RV64-LABEL: explode_8xi64:
774 ; RV64-NEXT: addi sp, sp, -128
775 ; RV64-NEXT: .cfi_def_cfa_offset 128
776 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
777 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
778 ; RV64-NEXT: .cfi_offset ra, -8
779 ; RV64-NEXT: .cfi_offset s0, -16
780 ; RV64-NEXT: addi s0, sp, 128
781 ; RV64-NEXT: .cfi_def_cfa s0, 0
782 ; RV64-NEXT: andi sp, sp, -64
783 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
784 ; RV64-NEXT: vslidedown.vi v12, v8, 2
785 ; RV64-NEXT: vmv.x.s a0, v12
786 ; RV64-NEXT: vslidedown.vi v12, v8, 3
787 ; RV64-NEXT: vmv.x.s a1, v12
788 ; RV64-NEXT: mv a2, sp
789 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
790 ; RV64-NEXT: vse64.v v8, (a2)
791 ; RV64-NEXT: ld a2, 32(sp)
792 ; RV64-NEXT: ld a3, 40(sp)
793 ; RV64-NEXT: ld a4, 48(sp)
794 ; RV64-NEXT: ld a5, 56(sp)
795 ; RV64-NEXT: vmv.s.x v9, zero
796 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
797 ; RV64-NEXT: vredxor.vs v8, v8, v9
798 ; RV64-NEXT: vmv.x.s a6, v8
799 ; RV64-NEXT: add a0, a0, a1
800 ; RV64-NEXT: add a0, a6, a0
801 ; RV64-NEXT: add a0, a0, a2
802 ; RV64-NEXT: add a3, a3, a4
803 ; RV64-NEXT: add a0, a0, a3
804 ; RV64-NEXT: add a0, a0, a5
805 ; RV64-NEXT: addi sp, s0, -128
806 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
807 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
808 ; RV64-NEXT: addi sp, sp, 128
810 %e0 = extractelement <8 x i64> %v, i32 0
811 %e1 = extractelement <8 x i64> %v, i32 1
812 %e2 = extractelement <8 x i64> %v, i32 2
813 %e3 = extractelement <8 x i64> %v, i32 3
814 %e4 = extractelement <8 x i64> %v, i32 4
815 %e5 = extractelement <8 x i64> %v, i32 5
816 %e6 = extractelement <8 x i64> %v, i32 6
817 %e7 = extractelement <8 x i64> %v, i32 7
818 %add0 = xor i64 %e0, %e1
819 %add1 = add i64 %add0, %e2
820 %add2 = add i64 %add1, %e3
821 %add3 = add i64 %add2, %e4
822 %add4 = add i64 %add3, %e5
823 %add5 = add i64 %add4, %e6
824 %add6 = add i64 %add5, %e7
828 define i64 @explode_16xi64(<16 x i64> %v) {
829 ; RV32-LABEL: explode_16xi64:
831 ; RV32-NEXT: addi sp, sp, -64
832 ; RV32-NEXT: .cfi_def_cfa_offset 64
833 ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
834 ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
835 ; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
836 ; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
837 ; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
838 ; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
839 ; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill
840 ; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill
841 ; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill
842 ; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
843 ; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill
844 ; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
845 ; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill
846 ; RV32-NEXT: .cfi_offset ra, -4
847 ; RV32-NEXT: .cfi_offset s0, -8
848 ; RV32-NEXT: .cfi_offset s1, -12
849 ; RV32-NEXT: .cfi_offset s2, -16
850 ; RV32-NEXT: .cfi_offset s3, -20
851 ; RV32-NEXT: .cfi_offset s4, -24
852 ; RV32-NEXT: .cfi_offset s5, -28
853 ; RV32-NEXT: .cfi_offset s6, -32
854 ; RV32-NEXT: .cfi_offset s7, -36
855 ; RV32-NEXT: .cfi_offset s8, -40
856 ; RV32-NEXT: .cfi_offset s9, -44
857 ; RV32-NEXT: .cfi_offset s10, -48
858 ; RV32-NEXT: .cfi_offset s11, -52
859 ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
860 ; RV32-NEXT: vslidedown.vi v16, v8, 2
861 ; RV32-NEXT: li a3, 32
862 ; RV32-NEXT: vsrl.vx v24, v16, a3
863 ; RV32-NEXT: vmv.x.s a0, v24
864 ; RV32-NEXT: vmv.x.s a1, v16
865 ; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
866 ; RV32-NEXT: vslidedown.vi v16, v8, 3
867 ; RV32-NEXT: vsrl.vx v24, v16, a3
868 ; RV32-NEXT: vmv.x.s a1, v24
869 ; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
870 ; RV32-NEXT: vmv.x.s a4, v16
871 ; RV32-NEXT: vslidedown.vi v16, v8, 4
872 ; RV32-NEXT: vsrl.vx v24, v16, a3
873 ; RV32-NEXT: vmv.x.s a5, v24
874 ; RV32-NEXT: vmv.x.s a6, v16
875 ; RV32-NEXT: vslidedown.vi v16, v8, 5
876 ; RV32-NEXT: vsrl.vx v24, v16, a3
877 ; RV32-NEXT: vmv.x.s a7, v24
878 ; RV32-NEXT: vmv.x.s t0, v16
879 ; RV32-NEXT: vslidedown.vi v16, v8, 6
880 ; RV32-NEXT: vsrl.vx v24, v16, a3
881 ; RV32-NEXT: vmv.x.s t1, v24
882 ; RV32-NEXT: vmv.x.s t2, v16
883 ; RV32-NEXT: vslidedown.vi v16, v8, 7
884 ; RV32-NEXT: vsrl.vx v24, v16, a3
885 ; RV32-NEXT: vmv.x.s t3, v24
886 ; RV32-NEXT: vmv.x.s t4, v16
887 ; RV32-NEXT: vslidedown.vi v16, v8, 8
888 ; RV32-NEXT: vsrl.vx v24, v16, a3
889 ; RV32-NEXT: vmv.x.s t5, v24
890 ; RV32-NEXT: vmv.x.s t6, v16
891 ; RV32-NEXT: vslidedown.vi v16, v8, 9
892 ; RV32-NEXT: vsrl.vx v24, v16, a3
893 ; RV32-NEXT: vmv.x.s s0, v24
894 ; RV32-NEXT: vmv.x.s s1, v16
895 ; RV32-NEXT: vslidedown.vi v16, v8, 10
896 ; RV32-NEXT: vsrl.vx v24, v16, a3
897 ; RV32-NEXT: vmv.x.s s2, v24
898 ; RV32-NEXT: vmv.x.s s3, v16
899 ; RV32-NEXT: vslidedown.vi v16, v8, 11
900 ; RV32-NEXT: vsrl.vx v24, v16, a3
901 ; RV32-NEXT: vmv.x.s s4, v24
902 ; RV32-NEXT: vmv.x.s s5, v16
903 ; RV32-NEXT: vslidedown.vi v16, v8, 12
904 ; RV32-NEXT: vsrl.vx v24, v16, a3
905 ; RV32-NEXT: vmv.x.s s6, v24
906 ; RV32-NEXT: vmv.x.s s7, v16
907 ; RV32-NEXT: vslidedown.vi v16, v8, 13
908 ; RV32-NEXT: vsrl.vx v24, v16, a3
909 ; RV32-NEXT: vmv.x.s s8, v24
910 ; RV32-NEXT: vmv.x.s s9, v16
911 ; RV32-NEXT: vslidedown.vi v16, v8, 14
912 ; RV32-NEXT: vsrl.vx v24, v16, a3
913 ; RV32-NEXT: vmv.x.s s10, v24
914 ; RV32-NEXT: vmv.x.s s11, v16
915 ; RV32-NEXT: vslidedown.vi v16, v8, 15
916 ; RV32-NEXT: vsrl.vx v24, v16, a3
917 ; RV32-NEXT: vmv.x.s ra, v24
918 ; RV32-NEXT: vmv.s.x v9, zero
919 ; RV32-NEXT: vmv.x.s a2, v16
920 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
921 ; RV32-NEXT: vredxor.vs v8, v8, v9
922 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
923 ; RV32-NEXT: vsrl.vx v9, v8, a3
924 ; RV32-NEXT: vmv.x.s a3, v9
925 ; RV32-NEXT: add a3, a3, a0
926 ; RV32-NEXT: vmv.x.s a1, v8
927 ; RV32-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
928 ; RV32-NEXT: add a0, a1, a0
929 ; RV32-NEXT: sltu a1, a0, a1
930 ; RV32-NEXT: add a1, a3, a1
931 ; RV32-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
932 ; RV32-NEXT: add a1, a1, a3
933 ; RV32-NEXT: add a4, a0, a4
934 ; RV32-NEXT: sltu a0, a4, a0
935 ; RV32-NEXT: add a0, a0, a5
936 ; RV32-NEXT: add a0, a1, a0
937 ; RV32-NEXT: add a6, a4, a6
938 ; RV32-NEXT: sltu a1, a6, a4
939 ; RV32-NEXT: add a1, a1, a7
940 ; RV32-NEXT: add a0, a0, a1
941 ; RV32-NEXT: add t0, a6, t0
942 ; RV32-NEXT: sltu a1, t0, a6
943 ; RV32-NEXT: add a1, a1, t1
944 ; RV32-NEXT: add a0, a0, a1
945 ; RV32-NEXT: add t2, t0, t2
946 ; RV32-NEXT: sltu a1, t2, t0
947 ; RV32-NEXT: add a1, a1, t3
948 ; RV32-NEXT: add a0, a0, a1
949 ; RV32-NEXT: add t4, t2, t4
950 ; RV32-NEXT: sltu a1, t4, t2
951 ; RV32-NEXT: add a1, a1, t5
952 ; RV32-NEXT: add a0, a0, a1
953 ; RV32-NEXT: add t6, t4, t6
954 ; RV32-NEXT: sltu a1, t6, t4
955 ; RV32-NEXT: add a1, a1, s0
956 ; RV32-NEXT: add a0, a0, a1
957 ; RV32-NEXT: add s1, t6, s1
958 ; RV32-NEXT: sltu a1, s1, t6
959 ; RV32-NEXT: add a1, a1, s2
960 ; RV32-NEXT: add a0, a0, a1
961 ; RV32-NEXT: add s3, s1, s3
962 ; RV32-NEXT: sltu a1, s3, s1
963 ; RV32-NEXT: add a1, a1, s4
964 ; RV32-NEXT: add a0, a0, a1
965 ; RV32-NEXT: add s5, s3, s5
966 ; RV32-NEXT: sltu a1, s5, s3
967 ; RV32-NEXT: add a1, a1, s6
968 ; RV32-NEXT: add a0, a0, a1
969 ; RV32-NEXT: add s7, s5, s7
970 ; RV32-NEXT: sltu a1, s7, s5
971 ; RV32-NEXT: add a1, a1, s8
972 ; RV32-NEXT: add a0, a0, a1
973 ; RV32-NEXT: add s9, s7, s9
974 ; RV32-NEXT: sltu a1, s9, s7
975 ; RV32-NEXT: add a1, a1, s10
976 ; RV32-NEXT: add a0, a0, a1
977 ; RV32-NEXT: add s11, s9, s11
978 ; RV32-NEXT: sltu a1, s11, s9
979 ; RV32-NEXT: add a1, a1, ra
980 ; RV32-NEXT: add a1, a0, a1
981 ; RV32-NEXT: add a0, s11, a2
982 ; RV32-NEXT: sltu a2, a0, s11
983 ; RV32-NEXT: add a1, a1, a2
984 ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
985 ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
986 ; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
987 ; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
988 ; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
989 ; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
990 ; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload
991 ; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload
992 ; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
993 ; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
994 ; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload
995 ; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload
996 ; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
997 ; RV32-NEXT: addi sp, sp, 64
1000 ; RV64-LABEL: explode_16xi64:
1002 ; RV64-NEXT: addi sp, sp, -256
1003 ; RV64-NEXT: .cfi_def_cfa_offset 256
1004 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
1005 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
1006 ; RV64-NEXT: .cfi_offset ra, -8
1007 ; RV64-NEXT: .cfi_offset s0, -16
1008 ; RV64-NEXT: addi s0, sp, 256
1009 ; RV64-NEXT: .cfi_def_cfa s0, 0
1010 ; RV64-NEXT: andi sp, sp, -128
1011 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
1012 ; RV64-NEXT: vslidedown.vi v16, v8, 2
1013 ; RV64-NEXT: vmv.x.s a0, v16
1014 ; RV64-NEXT: vslidedown.vi v16, v8, 3
1015 ; RV64-NEXT: vmv.x.s a1, v16
1016 ; RV64-NEXT: mv a2, sp
1017 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1018 ; RV64-NEXT: vse64.v v8, (a2)
1019 ; RV64-NEXT: ld a2, 32(sp)
1020 ; RV64-NEXT: ld a3, 40(sp)
1021 ; RV64-NEXT: ld a4, 48(sp)
1022 ; RV64-NEXT: ld a5, 56(sp)
1023 ; RV64-NEXT: ld a6, 64(sp)
1024 ; RV64-NEXT: ld a7, 72(sp)
1025 ; RV64-NEXT: ld t0, 80(sp)
1026 ; RV64-NEXT: ld t1, 88(sp)
1027 ; RV64-NEXT: ld t2, 96(sp)
1028 ; RV64-NEXT: ld t3, 104(sp)
1029 ; RV64-NEXT: ld t4, 112(sp)
1030 ; RV64-NEXT: ld t5, 120(sp)
1031 ; RV64-NEXT: vmv.s.x v9, zero
1032 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1033 ; RV64-NEXT: vredxor.vs v8, v8, v9
1034 ; RV64-NEXT: vmv.x.s t6, v8
1035 ; RV64-NEXT: add a0, a0, a1
1036 ; RV64-NEXT: add a0, t6, a0
1037 ; RV64-NEXT: add a0, a0, a2
1038 ; RV64-NEXT: add a3, a3, a4
1039 ; RV64-NEXT: add a0, a0, a3
1040 ; RV64-NEXT: add a5, a5, a6
1041 ; RV64-NEXT: add a5, a5, a7
1042 ; RV64-NEXT: add a0, a0, a5
1043 ; RV64-NEXT: add t0, t0, t1
1044 ; RV64-NEXT: add t0, t0, t2
1045 ; RV64-NEXT: add t0, t0, t3
1046 ; RV64-NEXT: add a0, a0, t0
1047 ; RV64-NEXT: add t4, t4, t5
1048 ; RV64-NEXT: add a0, a0, t4
1049 ; RV64-NEXT: addi sp, s0, -256
1050 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
1051 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
1052 ; RV64-NEXT: addi sp, sp, 256
1054 %e0 = extractelement <16 x i64> %v, i32 0
1055 %e1 = extractelement <16 x i64> %v, i32 1
1056 %e2 = extractelement <16 x i64> %v, i32 2
1057 %e3 = extractelement <16 x i64> %v, i32 3
1058 %e4 = extractelement <16 x i64> %v, i32 4
1059 %e5 = extractelement <16 x i64> %v, i32 5
1060 %e6 = extractelement <16 x i64> %v, i32 6
1061 %e7 = extractelement <16 x i64> %v, i32 7
1062 %e8 = extractelement <16 x i64> %v, i32 8
1063 %e9 = extractelement <16 x i64> %v, i32 9
1064 %e10 = extractelement <16 x i64> %v, i32 10
1065 %e11 = extractelement <16 x i64> %v, i32 11
1066 %e12 = extractelement <16 x i64> %v, i32 12
1067 %e13 = extractelement <16 x i64> %v, i32 13
1068 %e14 = extractelement <16 x i64> %v, i32 14
1069 %e15 = extractelement <16 x i64> %v, i32 15
1070 %add0 = xor i64 %e0, %e1
1071 %add1 = add i64 %add0, %e2
1072 %add2 = add i64 %add1, %e3
1073 %add3 = add i64 %add2, %e4
1074 %add4 = add i64 %add3, %e5
1075 %add5 = add i64 %add4, %e6
1076 %add6 = add i64 %add5, %e7
1077 %add7 = add i64 %add6, %e8
1078 %add8 = add i64 %add7, %e9
1079 %add9 = add i64 %add8, %e10
1080 %add10 = add i64 %add9, %e11
1081 %add11 = add i64 %add10, %e12
1082 %add12 = add i64 %add11, %e13
1083 %add13 = add i64 %add12, %e14
1084 %add14 = add i64 %add13, %e15
1088 define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) {
1089 ; RV32-LABEL: explode_16xi32_exact_vlen:
1091 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1092 ; RV32-NEXT: vslidedown.vi v12, v8, 2
1093 ; RV32-NEXT: vmv.x.s a0, v12
1094 ; RV32-NEXT: vslidedown.vi v12, v8, 3
1095 ; RV32-NEXT: vmv.x.s a1, v12
1096 ; RV32-NEXT: vmv.x.s a2, v9
1097 ; RV32-NEXT: vslidedown.vi v12, v9, 1
1098 ; RV32-NEXT: vmv.x.s a3, v12
1099 ; RV32-NEXT: vslidedown.vi v12, v9, 2
1100 ; RV32-NEXT: vmv.x.s a4, v12
1101 ; RV32-NEXT: vslidedown.vi v9, v9, 3
1102 ; RV32-NEXT: vmv.x.s a5, v9
1103 ; RV32-NEXT: vmv.x.s a6, v10
1104 ; RV32-NEXT: vslidedown.vi v9, v10, 1
1105 ; RV32-NEXT: vmv.x.s a7, v9
1106 ; RV32-NEXT: vslidedown.vi v9, v10, 2
1107 ; RV32-NEXT: vmv.x.s t0, v9
1108 ; RV32-NEXT: vslidedown.vi v9, v10, 3
1109 ; RV32-NEXT: vmv.x.s t1, v9
1110 ; RV32-NEXT: vmv.x.s t2, v11
1111 ; RV32-NEXT: vslidedown.vi v9, v11, 1
1112 ; RV32-NEXT: vmv.x.s t3, v9
1113 ; RV32-NEXT: vslidedown.vi v9, v11, 2
1114 ; RV32-NEXT: vmv.x.s t4, v9
1115 ; RV32-NEXT: vslidedown.vi v9, v11, 3
1116 ; RV32-NEXT: vmv.x.s t5, v9
1117 ; RV32-NEXT: vmv.s.x v9, zero
1118 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1119 ; RV32-NEXT: vredxor.vs v8, v8, v9
1120 ; RV32-NEXT: vmv.x.s t6, v8
1121 ; RV32-NEXT: add a0, a0, a1
1122 ; RV32-NEXT: add a0, t6, a0
1123 ; RV32-NEXT: add a2, a2, a3
1124 ; RV32-NEXT: add a2, a2, a4
1125 ; RV32-NEXT: add a0, a0, a2
1126 ; RV32-NEXT: add a5, a5, a6
1127 ; RV32-NEXT: add a5, a5, a7
1128 ; RV32-NEXT: add a5, a5, t0
1129 ; RV32-NEXT: add a0, a0, a5
1130 ; RV32-NEXT: add t1, t1, t2
1131 ; RV32-NEXT: add t1, t1, t3
1132 ; RV32-NEXT: add t1, t1, t4
1133 ; RV32-NEXT: add t1, t1, t5
1134 ; RV32-NEXT: add a0, a0, t1
1137 ; RV64-LABEL: explode_16xi32_exact_vlen:
1139 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1140 ; RV64-NEXT: vslidedown.vi v12, v8, 2
1141 ; RV64-NEXT: vmv.x.s a0, v12
1142 ; RV64-NEXT: vslidedown.vi v12, v8, 3
1143 ; RV64-NEXT: vmv.x.s a1, v12
1144 ; RV64-NEXT: vmv.x.s a2, v9
1145 ; RV64-NEXT: vslidedown.vi v12, v9, 1
1146 ; RV64-NEXT: vmv.x.s a3, v12
1147 ; RV64-NEXT: vslidedown.vi v12, v9, 2
1148 ; RV64-NEXT: vmv.x.s a4, v12
1149 ; RV64-NEXT: vslidedown.vi v9, v9, 3
1150 ; RV64-NEXT: vmv.x.s a5, v9
1151 ; RV64-NEXT: vmv.x.s a6, v10
1152 ; RV64-NEXT: vslidedown.vi v9, v10, 1
1153 ; RV64-NEXT: vmv.x.s a7, v9
1154 ; RV64-NEXT: vslidedown.vi v9, v10, 2
1155 ; RV64-NEXT: vmv.x.s t0, v9
1156 ; RV64-NEXT: vslidedown.vi v9, v10, 3
1157 ; RV64-NEXT: vmv.x.s t1, v9
1158 ; RV64-NEXT: vmv.x.s t2, v11
1159 ; RV64-NEXT: vslidedown.vi v9, v11, 1
1160 ; RV64-NEXT: vmv.x.s t3, v9
1161 ; RV64-NEXT: vslidedown.vi v9, v11, 2
1162 ; RV64-NEXT: vmv.x.s t4, v9
1163 ; RV64-NEXT: vslidedown.vi v9, v11, 3
1164 ; RV64-NEXT: vmv.x.s t5, v9
1165 ; RV64-NEXT: vmv.s.x v9, zero
1166 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1167 ; RV64-NEXT: vredxor.vs v8, v8, v9
1168 ; RV64-NEXT: vmv.x.s t6, v8
1169 ; RV64-NEXT: add a0, a0, a1
1170 ; RV64-NEXT: add a0, t6, a0
1171 ; RV64-NEXT: add a2, a2, a3
1172 ; RV64-NEXT: add a2, a2, a4
1173 ; RV64-NEXT: add a0, a0, a2
1174 ; RV64-NEXT: add a5, a5, a6
1175 ; RV64-NEXT: add a5, a5, a7
1176 ; RV64-NEXT: add a5, a5, t0
1177 ; RV64-NEXT: add a0, a0, a5
1178 ; RV64-NEXT: add t1, t1, t2
1179 ; RV64-NEXT: add t1, t1, t3
1180 ; RV64-NEXT: add t1, t1, t4
1181 ; RV64-NEXT: add t1, t1, t5
1182 ; RV64-NEXT: addw a0, a0, t1
1184 %e0 = extractelement <16 x i32> %v, i32 0
1185 %e1 = extractelement <16 x i32> %v, i32 1
1186 %e2 = extractelement <16 x i32> %v, i32 2
1187 %e3 = extractelement <16 x i32> %v, i32 3
1188 %e4 = extractelement <16 x i32> %v, i32 4
1189 %e5 = extractelement <16 x i32> %v, i32 5
1190 %e6 = extractelement <16 x i32> %v, i32 6
1191 %e7 = extractelement <16 x i32> %v, i32 7
1192 %e8 = extractelement <16 x i32> %v, i32 8
1193 %e9 = extractelement <16 x i32> %v, i32 9
1194 %e10 = extractelement <16 x i32> %v, i32 10
1195 %e11 = extractelement <16 x i32> %v, i32 11
1196 %e12 = extractelement <16 x i32> %v, i32 12
1197 %e13 = extractelement <16 x i32> %v, i32 13
1198 %e14 = extractelement <16 x i32> %v, i32 14
1199 %e15 = extractelement <16 x i32> %v, i32 15
1200 %add0 = xor i32 %e0, %e1
1201 %add1 = add i32 %add0, %e2
1202 %add2 = add i32 %add1, %e3
1203 %add3 = add i32 %add2, %e4
1204 %add4 = add i32 %add3, %e5
1205 %add5 = add i32 %add4, %e6
1206 %add6 = add i32 %add5, %e7
1207 %add7 = add i32 %add6, %e8
1208 %add8 = add i32 %add7, %e9
1209 %add9 = add i32 %add8, %e10
1210 %add10 = add i32 %add9, %e11
1211 %add11 = add i32 %add10, %e12
1212 %add12 = add i32 %add11, %e13
1213 %add13 = add i32 %add12, %e14
1214 %add14 = add i32 %add13, %e15