1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-ONLY
3 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB-PACK
5 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ONLY
6 ; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64
7 ; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64-PACK
8 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
10 define void @buildvec_vid_v16i8(ptr %x) {
11 ; CHECK-LABEL: buildvec_vid_v16i8:
13 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
14 ; CHECK-NEXT: vid.v v8
15 ; CHECK-NEXT: vse8.v v8, (a0)
17 store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
21 define void @buildvec_vid_undefelts_v16i8(ptr %x) {
22 ; CHECK-LABEL: buildvec_vid_undefelts_v16i8:
24 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
25 ; CHECK-NEXT: vid.v v8
26 ; CHECK-NEXT: vse8.v v8, (a0)
28 store <16 x i8> <i8 0, i8 1, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
32 ; TODO: Could do VID then insertelement on missing elements
33 define void @buildvec_notquite_vid_v16i8(ptr %x) {
34 ; CHECK-LABEL: buildvec_notquite_vid_v16i8:
36 ; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
37 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0)
38 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
39 ; CHECK-NEXT: vle8.v v8, (a1)
40 ; CHECK-NEXT: vse8.v v8, (a0)
42 store <16 x i8> <i8 0, i8 1, i8 3, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
46 define void @buildvec_vid_plus_imm_v16i8(ptr %x) {
47 ; CHECK-LABEL: buildvec_vid_plus_imm_v16i8:
49 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
50 ; CHECK-NEXT: vid.v v8
51 ; CHECK-NEXT: vadd.vi v8, v8, 2
52 ; CHECK-NEXT: vse8.v v8, (a0)
54 store <16 x i8> <i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17>, ptr %x
58 define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) {
59 ; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8:
61 ; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
62 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0)
63 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
64 ; CHECK-NEXT: vle8.v v8, (a1)
65 ; CHECK-NEXT: vse8.v v8, (a0)
67 store <16 x i8> <i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115>, ptr %x
71 define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
72 ; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8:
74 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
75 ; CHECK-NEXT: vid.v v8
76 ; CHECK-NEXT: li a1, 3
77 ; CHECK-NEXT: vmul.vx v8, v8, a1
78 ; CHECK-NEXT: vse8.v v8, (a0)
80 store <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>, ptr %x
84 define <4 x i8> @buildvec_vid_step2_add0_v4i8() {
85 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
87 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
88 ; CHECK-NEXT: vid.v v8
89 ; CHECK-NEXT: vadd.vv v8, v8, v8
91 ret <4 x i8> <i8 0, i8 2, i8 4, i8 6>
94 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef0() {
95 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef0:
97 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
98 ; CHECK-NEXT: vid.v v8
99 ; CHECK-NEXT: vadd.vv v8, v8, v8
101 ret <4 x i8> <i8 undef, i8 2, i8 4, i8 6>
104 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef1() {
105 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef1:
107 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
108 ; CHECK-NEXT: vid.v v8
109 ; CHECK-NEXT: vadd.vv v8, v8, v8
111 ret <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>
114 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef2() {
115 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef2:
117 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
118 ; CHECK-NEXT: vid.v v8
119 ; CHECK-NEXT: vadd.vv v8, v8, v8
121 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>
124 define <4 x i8> @buildvec_vid_step2_add1_v4i8() {
125 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
127 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
128 ; CHECK-NEXT: vid.v v8
129 ; CHECK-NEXT: vadd.vv v8, v8, v8
130 ; CHECK-NEXT: vadd.vi v8, v8, 1
132 ret <4 x i8> <i8 1, i8 3, i8 5, i8 7>
135 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef0() {
136 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef0:
138 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
139 ; CHECK-NEXT: vid.v v8
140 ; CHECK-NEXT: vadd.vv v8, v8, v8
141 ; CHECK-NEXT: vadd.vi v8, v8, 1
143 ret <4 x i8> <i8 undef, i8 3, i8 5, i8 7>
146 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef1() {
147 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef1:
149 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
150 ; CHECK-NEXT: vid.v v8
151 ; CHECK-NEXT: vadd.vv v8, v8, v8
152 ; CHECK-NEXT: vadd.vi v8, v8, 1
154 ret <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>
157 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef2() {
158 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef2:
160 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
161 ; CHECK-NEXT: vid.v v8
162 ; CHECK-NEXT: vadd.vv v8, v8, v8
163 ; CHECK-NEXT: vadd.vi v8, v8, 1
165 ret <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>
168 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8() {
169 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
171 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
172 ; CHECK-NEXT: vid.v v8
173 ; CHECK-NEXT: vrsub.vi v8, v8, 0
175 ret <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>
178 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef0() {
179 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef0:
181 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
182 ; CHECK-NEXT: vid.v v8
183 ; CHECK-NEXT: vrsub.vi v8, v8, 0
185 ret <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>
188 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef1() {
189 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef1:
191 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
192 ; CHECK-NEXT: vid.v v8
193 ; CHECK-NEXT: vrsub.vi v8, v8, 0
195 ret <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>
198 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef2() {
199 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef2:
201 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
202 ; CHECK-NEXT: vid.v v8
203 ; CHECK-NEXT: vrsub.vi v8, v8, 0
205 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>
208 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8() {
209 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
211 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
212 ; CHECK-NEXT: vid.v v8
213 ; CHECK-NEXT: vadd.vv v8, v8, v8
214 ; CHECK-NEXT: vrsub.vi v8, v8, 0
216 ret <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>
219 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef0() {
220 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef0:
222 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
223 ; CHECK-NEXT: vid.v v8
224 ; CHECK-NEXT: vadd.vv v8, v8, v8
225 ; CHECK-NEXT: vrsub.vi v8, v8, 0
227 ret <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>
230 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef1() {
231 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef1:
233 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
234 ; CHECK-NEXT: vid.v v8
235 ; CHECK-NEXT: vadd.vv v8, v8, v8
236 ; CHECK-NEXT: vrsub.vi v8, v8, 0
238 ret <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>
241 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef2() {
242 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef2:
244 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
245 ; CHECK-NEXT: vmv.v.i v8, -6
247 ret <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>
250 define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
251 ; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
253 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
254 ; CHECK-NEXT: vid.v v8
255 ; CHECK-NEXT: vadd.vv v8, v8, v8
256 ; CHECK-NEXT: vrsub.vi v8, v8, 3
258 ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
261 define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
262 ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
264 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
265 ; CHECK-NEXT: vmv.v.i v9, 3
266 ; CHECK-NEXT: vid.v v8
267 ; CHECK-NEXT: li a0, -3
268 ; CHECK-NEXT: vmadd.vx v8, a0, v9
270 ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
273 define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
274 ; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32:
276 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
277 ; CHECK-NEXT: vmv.v.i v8, -3
278 ; CHECK-NEXT: vid.v v9
279 ; CHECK-NEXT: li a4, -3
280 ; CHECK-NEXT: vmadd.vx v9, a4, v8
281 ; CHECK-NEXT: vse32.v v9, (a0)
282 ; CHECK-NEXT: vse32.v v9, (a1)
283 ; CHECK-NEXT: vse32.v v9, (a2)
284 ; CHECK-NEXT: vse32.v v9, (a3)
286 store <4 x i32> <i32 -3, i32 -6, i32 -9, i32 -12>, ptr %z0
287 store <4 x i32> <i32 undef, i32 -6, i32 -9, i32 -12>, ptr %z1
288 store <4 x i32> <i32 undef, i32 undef, i32 -9, i32 -12>, ptr %z2
289 store <4 x i32> <i32 -3, i32 undef, i32 undef, i32 -12>, ptr %z3
293 ; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
294 define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
295 ; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
297 ; RV32-NEXT: lui a0, %hi(.LCPI25_0)
298 ; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0)
299 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
300 ; RV32-NEXT: vle8.v v10, (a0)
301 ; RV32-NEXT: vsext.vf4 v8, v10
304 ; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
306 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
307 ; RV64V-NEXT: vid.v v8
310 ; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
311 ; RV64ZVE32: # %bb.0:
312 ; RV64ZVE32-NEXT: li a1, 3
313 ; RV64ZVE32-NEXT: sd a1, 24(a0)
314 ; RV64ZVE32-NEXT: li a1, 2
315 ; RV64ZVE32-NEXT: sd a1, 16(a0)
316 ; RV64ZVE32-NEXT: li a1, 1
317 ; RV64ZVE32-NEXT: sd a1, 8(a0)
318 ; RV64ZVE32-NEXT: sd zero, 0(a0)
319 ; RV64ZVE32-NEXT: ret
320 ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
323 define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
324 ; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
326 ; RV32-NEXT: lui a0, %hi(.LCPI26_0)
327 ; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0)
328 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
329 ; RV32-NEXT: vle8.v v10, (a0)
330 ; RV32-NEXT: vsext.vf4 v8, v10
333 ; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
335 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
336 ; RV64V-NEXT: vid.v v8
337 ; RV64V-NEXT: vadd.vv v8, v8, v8
340 ; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
341 ; RV64ZVE32: # %bb.0:
342 ; RV64ZVE32-NEXT: li a1, 6
343 ; RV64ZVE32-NEXT: sd a1, 24(a0)
344 ; RV64ZVE32-NEXT: li a1, 4
345 ; RV64ZVE32-NEXT: sd a1, 16(a0)
346 ; RV64ZVE32-NEXT: li a1, 2
347 ; RV64ZVE32-NEXT: sd a1, 8(a0)
348 ; RV64ZVE32-NEXT: sd zero, 0(a0)
349 ; RV64ZVE32-NEXT: ret
350 ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
353 define <4 x i8> @buildvec_no_vid_v4i8_0() {
354 ; CHECK-LABEL: buildvec_no_vid_v4i8_0:
356 ; CHECK-NEXT: lui a0, 28768
357 ; CHECK-NEXT: addi a0, a0, 769
358 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
359 ; CHECK-NEXT: vmv.s.x v8, a0
361 ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
364 define <4 x i8> @buildvec_no_vid_v4i8_1() {
365 ; CHECK-LABEL: buildvec_no_vid_v4i8_1:
367 ; CHECK-NEXT: lui a0, 28752
368 ; CHECK-NEXT: addi a0, a0, 512
369 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
370 ; CHECK-NEXT: vmv.s.x v8, a0
372 ret <4 x i8> <i8 undef, i8 2, i8 5, i8 7>
375 define <4 x i8> @buildvec_no_vid_v4i8_2() {
376 ; CHECK-LABEL: buildvec_no_vid_v4i8_2:
378 ; CHECK-NEXT: lui a0, 32768
379 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
380 ; CHECK-NEXT: vmv.s.x v8, a0
382 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>
385 define <4 x i8> @buildvec_no_vid_v4i8_3() {
386 ; CHECK-LABEL: buildvec_no_vid_v4i8_3:
388 ; CHECK-NEXT: lui a0, 28672
389 ; CHECK-NEXT: addi a0, a0, 255
390 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
391 ; CHECK-NEXT: vmv.s.x v8, a0
393 ret <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>
396 define <4 x i8> @buildvec_no_vid_v4i8_4() {
397 ; CHECK-LABEL: buildvec_no_vid_v4i8_4:
399 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
400 ; CHECK-NEXT: vmv.v.i v8, -2
402 ret <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>
405 define <4 x i8> @buildvec_no_vid_v4i8_5() {
406 ; CHECK-LABEL: buildvec_no_vid_v4i8_5:
408 ; CHECK-NEXT: lui a0, 1032144
409 ; CHECK-NEXT: addi a0, a0, -257
410 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
411 ; CHECK-NEXT: vmv.s.x v8, a0
413 ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
416 define void @buildvec_dominant0_v8i16(ptr %x) {
417 ; CHECK-LABEL: buildvec_dominant0_v8i16:
419 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
420 ; CHECK-NEXT: vmv.s.x v8, zero
421 ; CHECK-NEXT: vmv.v.i v9, 8
422 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
423 ; CHECK-NEXT: vslideup.vi v9, v8, 3
424 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
425 ; CHECK-NEXT: vse16.v v9, (a0)
427 store <8 x i16> <i16 8, i16 8, i16 undef, i16 0, i16 8, i16 undef, i16 8, i16 8>, ptr %x
431 define void @buildvec_dominant1_v8i16(ptr %x) {
432 ; CHECK-LABEL: buildvec_dominant1_v8i16:
434 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
435 ; CHECK-NEXT: vmv.v.i v8, 8
436 ; CHECK-NEXT: vse16.v v8, (a0)
438 store <8 x i16> <i16 undef, i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, ptr %x
442 define <2 x i8> @buildvec_dominant0_v2i8() {
443 ; CHECK-LABEL: buildvec_dominant0_v2i8:
446 ret <2 x i8> <i8 undef, i8 undef>
449 define <2 x i8> @buildvec_dominant1_v2i8() {
450 ; RV32-LABEL: buildvec_dominant1_v2i8:
452 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
453 ; RV32-NEXT: vmv.v.i v8, -1
456 ; RV64V-LABEL: buildvec_dominant1_v2i8:
458 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
459 ; RV64V-NEXT: vmv.v.i v8, -1
462 ; RV64ZVE32-LABEL: buildvec_dominant1_v2i8:
463 ; RV64ZVE32: # %bb.0:
464 ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
465 ; RV64ZVE32-NEXT: vmv.v.i v8, -1
466 ; RV64ZVE32-NEXT: ret
467 ret <2 x i8> <i8 undef, i8 -1>
470 define <2 x i8> @buildvec_dominant2_v2i8() {
471 ; RV32-LABEL: buildvec_dominant2_v2i8:
473 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
474 ; RV32-NEXT: vid.v v8
475 ; RV32-NEXT: vrsub.vi v8, v8, 0
478 ; RV64V-LABEL: buildvec_dominant2_v2i8:
480 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
481 ; RV64V-NEXT: vid.v v8
482 ; RV64V-NEXT: vrsub.vi v8, v8, 0
485 ; RV64ZVE32-LABEL: buildvec_dominant2_v2i8:
486 ; RV64ZVE32: # %bb.0:
487 ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
488 ; RV64ZVE32-NEXT: vid.v v8
489 ; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0
490 ; RV64ZVE32-NEXT: ret
491 ret <2 x i8> <i8 0, i8 -1>
494 define void @buildvec_dominant0_v2i32(ptr %x) {
495 ; RV32-LABEL: buildvec_dominant0_v2i32:
497 ; RV32-NEXT: lui a1, %hi(.LCPI38_0)
498 ; RV32-NEXT: addi a1, a1, %lo(.LCPI38_0)
499 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
500 ; RV32-NEXT: vle32.v v8, (a1)
501 ; RV32-NEXT: vse32.v v8, (a0)
504 ; RV64V-LABEL: buildvec_dominant0_v2i32:
506 ; RV64V-NEXT: lui a1, %hi(.LCPI38_0)
507 ; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1)
508 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
509 ; RV64V-NEXT: vmv.v.i v8, -1
510 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
511 ; RV64V-NEXT: vmv.s.x v8, a1
512 ; RV64V-NEXT: vse64.v v8, (a0)
515 ; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
516 ; RV64ZVE32: # %bb.0:
517 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
518 ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
519 ; RV64ZVE32-NEXT: li a2, -1
520 ; RV64ZVE32-NEXT: sd a2, 8(a0)
521 ; RV64ZVE32-NEXT: sd a1, 0(a0)
522 ; RV64ZVE32-NEXT: ret
523 store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
527 define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
528 ; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
530 ; RV32-NEXT: lui a1, %hi(.LCPI39_0)
531 ; RV32-NEXT: addi a1, a1, %lo(.LCPI39_0)
532 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
533 ; RV32-NEXT: vle32.v v8, (a1)
534 ; RV32-NEXT: vse32.v v8, (a0)
537 ; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
539 ; RV64V-NEXT: lui a1, %hi(.LCPI39_0)
540 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0)
541 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
542 ; RV64V-NEXT: vle64.v v8, (a1)
543 ; RV64V-NEXT: vse64.v v8, (a0)
546 ; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
547 ; RV64ZVE32: # %bb.0:
548 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
549 ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
550 ; RV64ZVE32-NEXT: li a2, -1
551 ; RV64ZVE32-NEXT: sd a2, 8(a0)
552 ; RV64ZVE32-NEXT: sd a1, 0(a0)
553 ; RV64ZVE32-NEXT: ret
554 store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
558 define void @buildvec_seq_v8i8_v4i16(ptr %x) {
559 ; CHECK-LABEL: buildvec_seq_v8i8_v4i16:
561 ; CHECK-NEXT: li a1, 513
562 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
563 ; CHECK-NEXT: vmv.v.x v8, a1
564 ; CHECK-NEXT: vse8.v v8, (a0)
566 store <8 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 undef, i8 2>, ptr %x
570 define void @buildvec_seq_v8i8_v2i32(ptr %x) {
571 ; RV32-LABEL: buildvec_seq_v8i8_v2i32:
573 ; RV32-NEXT: lui a1, 48
574 ; RV32-NEXT: addi a1, a1, 513
575 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
576 ; RV32-NEXT: vmv.v.x v8, a1
577 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
578 ; RV32-NEXT: vse8.v v8, (a0)
581 ; RV64V-LABEL: buildvec_seq_v8i8_v2i32:
583 ; RV64V-NEXT: lui a1, 48
584 ; RV64V-NEXT: addi a1, a1, 513
585 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
586 ; RV64V-NEXT: vmv.v.x v8, a1
587 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
588 ; RV64V-NEXT: vse8.v v8, (a0)
591 ; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32:
592 ; RV64ZVE32: # %bb.0:
593 ; RV64ZVE32-NEXT: lui a1, 48
594 ; RV64ZVE32-NEXT: addi a1, a1, 513
595 ; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
596 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
597 ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
598 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
599 ; RV64ZVE32-NEXT: ret
600 store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
604 define void @buildvec_seq_v16i8_v2i64(ptr %x) {
605 ; RV32-LABEL: buildvec_seq_v16i8_v2i64:
607 ; RV32-NEXT: lui a1, %hi(.LCPI42_0)
608 ; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0)
609 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
610 ; RV32-NEXT: vle8.v v8, (a1)
611 ; RV32-NEXT: vse8.v v8, (a0)
614 ; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
616 ; RV64V-NEXT: lui a1, %hi(.LCPI42_0)
617 ; RV64V-NEXT: ld a1, %lo(.LCPI42_0)(a1)
618 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
619 ; RV64V-NEXT: vmv.v.x v8, a1
620 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
621 ; RV64V-NEXT: vse8.v v8, (a0)
624 ; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
625 ; RV64ZVE32: # %bb.0:
626 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0)
627 ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0)
628 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
629 ; RV64ZVE32-NEXT: vle8.v v8, (a1)
630 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
631 ; RV64ZVE32-NEXT: ret
632 store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %x
636 define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
637 ; RV32-LABEL: buildvec_seq2_v16i8_v2i64:
639 ; RV32-NEXT: lui a1, 528432
640 ; RV32-NEXT: addi a1, a1, 513
641 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
642 ; RV32-NEXT: vmv.v.x v8, a1
643 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
644 ; RV32-NEXT: vse8.v v8, (a0)
647 ; RV64V-LABEL: buildvec_seq2_v16i8_v2i64:
649 ; RV64V-NEXT: lui a1, 528432
650 ; RV64V-NEXT: addiw a1, a1, 513
651 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
652 ; RV64V-NEXT: vmv.v.x v8, a1
653 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
654 ; RV64V-NEXT: vse8.v v8, (a0)
657 ; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
658 ; RV64ZVE32: # %bb.0:
659 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0)
660 ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0)
661 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
662 ; RV64ZVE32-NEXT: vle8.v v8, (a1)
663 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
664 ; RV64ZVE32-NEXT: ret
665 store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, ptr %x
669 define void @buildvec_seq_v9i8(ptr %x) {
670 ; CHECK-LABEL: buildvec_seq_v9i8:
672 ; CHECK-NEXT: li a1, 73
673 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
674 ; CHECK-NEXT: vmv.s.x v0, a1
675 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
676 ; CHECK-NEXT: vmv.v.i v9, 3
677 ; CHECK-NEXT: li a1, 146
678 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
679 ; CHECK-NEXT: vmv.s.x v8, a1
680 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
681 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
682 ; CHECK-NEXT: vmv1r.v v0, v8
683 ; CHECK-NEXT: vmerge.vim v8, v9, 2, v0
684 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
685 ; CHECK-NEXT: vse8.v v8, (a0)
687 store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
691 define void @buildvec_seq_v4i16_v2i32(ptr %x) {
692 ; CHECK-LABEL: buildvec_seq_v4i16_v2i32:
694 ; CHECK-NEXT: li a1, -127
695 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
696 ; CHECK-NEXT: vmv.v.x v8, a1
697 ; CHECK-NEXT: vse16.v v8, (a0)
699 store <4 x i16> <i16 -127, i16 -1, i16 -127, i16 -1>, ptr %x
703 define void @buildvec_vid_step1o2_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
704 ; RV32-LABEL: buildvec_vid_step1o2_v4i32:
706 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
707 ; RV32-NEXT: vid.v v8
708 ; RV32-NEXT: vsrl.vi v8, v8, 1
709 ; RV32-NEXT: vse32.v v8, (a0)
710 ; RV32-NEXT: vse32.v v8, (a1)
711 ; RV32-NEXT: vmv.v.i v9, 1
712 ; RV32-NEXT: vse32.v v8, (a2)
713 ; RV32-NEXT: vse32.v v8, (a3)
714 ; RV32-NEXT: vse32.v v8, (a4)
715 ; RV32-NEXT: vmv.s.x v8, zero
716 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
717 ; RV32-NEXT: vslideup.vi v9, v8, 1
718 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
719 ; RV32-NEXT: vse32.v v9, (a5)
720 ; RV32-NEXT: vmv.v.i v8, 0
721 ; RV32-NEXT: li a0, 1
722 ; RV32-NEXT: vslide1down.vx v8, v8, a0
723 ; RV32-NEXT: vse32.v v8, (a6)
726 ; RV64-LABEL: buildvec_vid_step1o2_v4i32:
728 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
729 ; RV64-NEXT: vid.v v8
730 ; RV64-NEXT: vsrl.vi v8, v8, 1
731 ; RV64-NEXT: vse32.v v8, (a0)
732 ; RV64-NEXT: vmv.v.i v9, 1
733 ; RV64-NEXT: vse32.v v8, (a1)
734 ; RV64-NEXT: vse32.v v8, (a2)
735 ; RV64-NEXT: vse32.v v8, (a3)
736 ; RV64-NEXT: vse32.v v8, (a4)
737 ; RV64-NEXT: vmv.s.x v8, zero
738 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
739 ; RV64-NEXT: vslideup.vi v9, v8, 1
740 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
741 ; RV64-NEXT: vse32.v v9, (a5)
742 ; RV64-NEXT: vmv.v.i v8, 0
743 ; RV64-NEXT: li a0, 1
744 ; RV64-NEXT: vslide1down.vx v8, v8, a0
745 ; RV64-NEXT: vse32.v v8, (a6)
747 store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, ptr %z0
748 store <4 x i32> <i32 0, i32 0, i32 1, i32 undef>, ptr %z1
749 store <4 x i32> <i32 0, i32 undef, i32 1, i32 1>, ptr %z2
750 store <4 x i32> <i32 undef, i32 0, i32 undef, i32 1>, ptr %z3
751 store <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>, ptr %z4
752 ; We don't catch this one
753 store <4 x i32> <i32 undef, i32 0, i32 1, i32 1>, ptr %z5
754 ; We catch this one but as VID/3 rather than VID/2
755 store <4 x i32> <i32 0, i32 0, i32 undef, i32 1>, ptr %z6
759 define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
760 ; CHECK-LABEL: buildvec_vid_step1o2_add3_v4i16:
762 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
763 ; CHECK-NEXT: vid.v v8
764 ; CHECK-NEXT: vsrl.vi v8, v8, 1
765 ; CHECK-NEXT: vadd.vi v8, v8, 3
766 ; CHECK-NEXT: vse16.v v8, (a0)
767 ; CHECK-NEXT: vmv.v.i v9, 3
768 ; CHECK-NEXT: vse16.v v8, (a1)
769 ; CHECK-NEXT: vse16.v v8, (a2)
770 ; CHECK-NEXT: vse16.v v8, (a3)
771 ; CHECK-NEXT: vse16.v v8, (a4)
772 ; CHECK-NEXT: vmv.v.i v8, 4
773 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
774 ; CHECK-NEXT: vslideup.vi v8, v9, 1
775 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
776 ; CHECK-NEXT: vse16.v v8, (a5)
777 ; CHECK-NEXT: li a0, 4
778 ; CHECK-NEXT: vslide1down.vx v8, v9, a0
779 ; CHECK-NEXT: vse16.v v8, (a6)
781 store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
782 store <4 x i16> <i16 3, i16 3, i16 4, i16 undef>, ptr %z1
783 store <4 x i16> <i16 3, i16 undef, i16 4, i16 4>, ptr %z2
784 store <4 x i16> <i16 undef, i16 3, i16 undef, i16 4>, ptr %z3
785 store <4 x i16> <i16 3, i16 undef, i16 4, i16 undef>, ptr %z4
786 ; We don't catch this one
787 store <4 x i16> <i16 undef, i16 3, i16 4, i16 4>, ptr %z5
788 ; We catch this one but as VID/3 rather than VID/2
789 store <4 x i16> <i16 3, i16 3, i16 undef, i16 4>, ptr %z6
793 define void @buildvec_vid_stepn1o4_addn5_v8i8(ptr %z0) {
794 ; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8:
796 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
797 ; CHECK-NEXT: vid.v v8
798 ; CHECK-NEXT: vsrl.vi v8, v8, 2
799 ; CHECK-NEXT: vrsub.vi v8, v8, -5
800 ; CHECK-NEXT: vse8.v v8, (a0)
802 store <8 x i8> <i8 -5, i8 -5, i8 -5, i8 -5, i8 -6, i8 -6, i8 -6, i8 -6>, ptr %z0
806 define void @buildvec_vid_mpy_imm_v8i16(ptr %x) {
807 ; CHECK-LABEL: buildvec_vid_mpy_imm_v8i16:
809 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
810 ; CHECK-NEXT: vid.v v8
811 ; CHECK-NEXT: li a1, 17
812 ; CHECK-NEXT: vmul.vx v8, v8, a1
813 ; CHECK-NEXT: vse16.v v8, (a0)
815 store <8 x i16> <i16 0, i16 17, i16 34, i16 51, i16 68, i16 85, i16 102, i16 119>, ptr %x
819 define void @buildvec_vid_shl_imm_v8i16(ptr %x) {
820 ; CHECK-LABEL: buildvec_vid_shl_imm_v8i16:
822 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
823 ; CHECK-NEXT: vid.v v8
824 ; CHECK-NEXT: vsll.vi v8, v8, 9
825 ; CHECK-NEXT: vse16.v v8, (a0)
827 store <8 x i16> <i16 0, i16 512, i16 1024, i16 1536, i16 2048, i16 2560, i16 3072, i16 3584>, ptr %x
831 define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) {
832 ; CHECK-LABEL: splat_c3_v4i32:
834 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
835 ; CHECK-NEXT: vrgather.vi v9, v8, 3
836 ; CHECK-NEXT: vmv.v.v v8, v9
838 %x = extractelement <4 x i32> %v, i32 3
839 %ins = insertelement <4 x i32> poison, i32 %x, i32 0
840 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
844 define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) {
845 ; CHECK-LABEL: splat_idx_v4i32:
847 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
848 ; CHECK-NEXT: vrgather.vx v9, v8, a0
849 ; CHECK-NEXT: vmv.v.v v8, v9
851 %x = extractelement <4 x i32> %v, i64 %idx
852 %ins = insertelement <4 x i32> poison, i32 %x, i32 0
853 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
857 define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) {
858 ; CHECK-LABEL: splat_c4_v8i16:
860 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
861 ; CHECK-NEXT: vrgather.vi v9, v8, 4
862 ; CHECK-NEXT: vmv.v.v v8, v9
864 %x = extractelement <8 x i16> %v, i32 4
865 %ins = insertelement <8 x i16> poison, i16 %x, i32 0
866 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
870 define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
871 ; CHECK-LABEL: splat_idx_v8i16:
873 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
874 ; CHECK-NEXT: vrgather.vx v9, v8, a0
875 ; CHECK-NEXT: vmv.v.v v8, v9
877 %x = extractelement <8 x i16> %v, i64 %idx
878 %ins = insertelement <8 x i16> poison, i16 %x, i32 0
879 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
883 define <4 x i8> @buildvec_not_vid_v4i8_1() {
884 ; CHECK-LABEL: buildvec_not_vid_v4i8_1:
886 ; CHECK-NEXT: lui a0, 12320
887 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
888 ; CHECK-NEXT: vmv.s.x v8, a0
890 ret <4 x i8> <i8 0, i8 0, i8 2, i8 3>
893 define <4 x i8> @buildvec_not_vid_v4i8_2() {
894 ; CHECK-LABEL: buildvec_not_vid_v4i8_2:
896 ; CHECK-NEXT: lui a0, 16
897 ; CHECK-NEXT: addi a0, a0, 771
898 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
899 ; CHECK-NEXT: vmv.s.x v8, a0
901 ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
904 ; We match this as a VID sequence (-3 / 8) + 5 but choose not to introduce
905 ; division to compute it.
906 define <16 x i8> @buildvec_not_vid_v16i8() {
907 ; CHECK-LABEL: buildvec_not_vid_v16i8:
909 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
910 ; CHECK-NEXT: vmv.v.i v9, 3
911 ; CHECK-NEXT: vmv.v.i v8, 0
912 ; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma
913 ; CHECK-NEXT: vslideup.vi v8, v9, 6
915 ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 3, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0>
918 define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
919 ; CHECK-LABEL: prefix_overwrite:
921 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
922 ; CHECK-NEXT: vmv.s.x v8, a0
923 ; CHECK-NEXT: vmv.s.x v10, a1
924 ; CHECK-NEXT: vslideup.vi v8, v10, 1
925 ; CHECK-NEXT: vmv.s.x v10, a2
926 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
927 ; CHECK-NEXT: vslideup.vi v8, v10, 2
928 ; CHECK-NEXT: vmv.s.x v10, a3
929 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
930 ; CHECK-NEXT: vslideup.vi v8, v10, 3
932 %v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
933 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
934 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
935 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
939 define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
940 ; CHECK-LABEL: suffix_overwrite:
942 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
943 ; CHECK-NEXT: vmv.s.x v10, a0
944 ; CHECK-NEXT: vslideup.vi v8, v10, 4
945 ; CHECK-NEXT: vmv.s.x v10, a1
946 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
947 ; CHECK-NEXT: vslideup.vi v8, v10, 5
948 ; CHECK-NEXT: vmv.s.x v10, a2
949 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
950 ; CHECK-NEXT: vslideup.vi v8, v10, 6
951 ; CHECK-NEXT: vmv.s.x v10, a3
952 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
953 ; CHECK-NEXT: vslideup.vi v8, v10, 7
955 %v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
956 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
957 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
958 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
962 define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
963 ; RV32-LABEL: v4xi64_exact:
965 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
966 ; RV32-NEXT: vmv.v.x v8, a4
967 ; RV32-NEXT: vslide1down.vx v8, v8, a5
968 ; RV32-NEXT: vslide1down.vx v8, v8, a6
969 ; RV32-NEXT: vslide1down.vx v9, v8, a7
970 ; RV32-NEXT: vmv.v.x v8, a0
971 ; RV32-NEXT: vslide1down.vx v8, v8, a1
972 ; RV32-NEXT: vslide1down.vx v8, v8, a2
973 ; RV32-NEXT: vslide1down.vx v8, v8, a3
976 ; RV64V-LABEL: v4xi64_exact:
978 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
979 ; RV64V-NEXT: vmv.v.x v8, a2
980 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
981 ; RV64V-NEXT: vmv.v.x v8, a0
982 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
985 ; RV64ZVE32-LABEL: v4xi64_exact:
986 ; RV64ZVE32: # %bb.0:
987 ; RV64ZVE32-NEXT: sd a4, 24(a0)
988 ; RV64ZVE32-NEXT: sd a3, 16(a0)
989 ; RV64ZVE32-NEXT: sd a2, 8(a0)
990 ; RV64ZVE32-NEXT: sd a1, 0(a0)
991 ; RV64ZVE32-NEXT: ret
992 %v1 = insertelement <4 x i64> poison, i64 %a, i32 0
993 %v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
994 %v3 = insertelement <4 x i64> %v2, i64 %c, i32 2
995 %v4 = insertelement <4 x i64> %v3, i64 %d, i32 3
999 define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) vscale_range(2,2) {
1000 ; RV32-LABEL: v8xi64_exact:
1002 ; RV32-NEXT: addi sp, sp, -16
1003 ; RV32-NEXT: .cfi_def_cfa_offset 16
1004 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1005 ; RV32-NEXT: .cfi_offset s0, -4
1006 ; RV32-NEXT: lw t0, 44(sp)
1007 ; RV32-NEXT: lw t1, 40(sp)
1008 ; RV32-NEXT: lw t2, 36(sp)
1009 ; RV32-NEXT: lw t3, 32(sp)
1010 ; RV32-NEXT: lw t4, 28(sp)
1011 ; RV32-NEXT: lw t5, 24(sp)
1012 ; RV32-NEXT: lw t6, 20(sp)
1013 ; RV32-NEXT: lw s0, 16(sp)
1014 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1015 ; RV32-NEXT: vmv.v.x v8, a4
1016 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1017 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1018 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1019 ; RV32-NEXT: vmv.v.x v8, a0
1020 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1021 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1022 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1023 ; RV32-NEXT: vmv.v.x v10, s0
1024 ; RV32-NEXT: vslide1down.vx v10, v10, t6
1025 ; RV32-NEXT: vslide1down.vx v10, v10, t5
1026 ; RV32-NEXT: vslide1down.vx v10, v10, t4
1027 ; RV32-NEXT: vmv.v.x v11, t3
1028 ; RV32-NEXT: vslide1down.vx v11, v11, t2
1029 ; RV32-NEXT: vslide1down.vx v11, v11, t1
1030 ; RV32-NEXT: vslide1down.vx v11, v11, t0
1031 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1032 ; RV32-NEXT: addi sp, sp, 16
1035 ; RV64V-LABEL: v8xi64_exact:
1037 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1038 ; RV64V-NEXT: vmv.v.x v8, a2
1039 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1040 ; RV64V-NEXT: vmv.v.x v8, a0
1041 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1042 ; RV64V-NEXT: vmv.v.x v10, a4
1043 ; RV64V-NEXT: vslide1down.vx v10, v10, a5
1044 ; RV64V-NEXT: vmv.v.x v11, a6
1045 ; RV64V-NEXT: vslide1down.vx v11, v11, a7
1048 ; RV64ZVE32-LABEL: v8xi64_exact:
1049 ; RV64ZVE32: # %bb.0:
1050 ; RV64ZVE32-NEXT: ld t0, 0(sp)
1051 ; RV64ZVE32-NEXT: sd t0, 56(a0)
1052 ; RV64ZVE32-NEXT: sd a7, 48(a0)
1053 ; RV64ZVE32-NEXT: sd a6, 40(a0)
1054 ; RV64ZVE32-NEXT: sd a5, 32(a0)
1055 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1056 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1057 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1058 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1059 ; RV64ZVE32-NEXT: ret
1060 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1061 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1062 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1063 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1064 %v5 = insertelement <8 x i64> %v4, i64 %e, i32 4
1065 %v6 = insertelement <8 x i64> %v5, i64 %f, i32 5
1066 %v7 = insertelement <8 x i64> %v6, i64 %g, i32 6
1067 %v8 = insertelement <8 x i64> %v7, i64 %h, i32 7
1071 define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1072 ; RV32-LABEL: v8xi64_exact_equal_halves:
1074 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1075 ; RV32-NEXT: vmv.v.x v8, a4
1076 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1077 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1078 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1079 ; RV32-NEXT: vmv.v.x v8, a0
1080 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1081 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1082 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1083 ; RV32-NEXT: vmv.v.v v10, v8
1084 ; RV32-NEXT: vmv.v.v v11, v9
1087 ; RV64V-LABEL: v8xi64_exact_equal_halves:
1089 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1090 ; RV64V-NEXT: vmv.v.x v8, a2
1091 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1092 ; RV64V-NEXT: vmv.v.x v8, a0
1093 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1094 ; RV64V-NEXT: vmv.v.v v10, v8
1095 ; RV64V-NEXT: vmv.v.v v11, v9
1098 ; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
1099 ; RV64ZVE32: # %bb.0:
1100 ; RV64ZVE32-NEXT: sd a4, 56(a0)
1101 ; RV64ZVE32-NEXT: sd a3, 48(a0)
1102 ; RV64ZVE32-NEXT: sd a2, 40(a0)
1103 ; RV64ZVE32-NEXT: sd a1, 32(a0)
1104 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1105 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1106 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1107 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1108 ; RV64ZVE32-NEXT: ret
1109 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1110 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1111 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1112 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1113 %v5 = insertelement <8 x i64> %v4, i64 %a, i32 4
1114 %v6 = insertelement <8 x i64> %v5, i64 %b, i32 5
1115 %v7 = insertelement <8 x i64> %v6, i64 %c, i32 6
1116 %v8 = insertelement <8 x i64> %v7, i64 %d, i32 7
1120 define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1121 ; RV32-LABEL: v8xi64_exact_undef_suffix:
1123 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1124 ; RV32-NEXT: vmv.v.x v8, a4
1125 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1126 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1127 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1128 ; RV32-NEXT: vmv.v.x v8, a0
1129 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1130 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1131 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1134 ; RV64V-LABEL: v8xi64_exact_undef_suffix:
1136 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1137 ; RV64V-NEXT: vmv.v.x v8, a2
1138 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1139 ; RV64V-NEXT: vmv.v.x v8, a0
1140 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1143 ; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
1144 ; RV64ZVE32: # %bb.0:
1145 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1146 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1147 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1148 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1149 ; RV64ZVE32-NEXT: ret
1150 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1151 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1152 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1153 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1157 define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1158 ; RV32-LABEL: v8xi64_exact_undef_prefix:
1160 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1161 ; RV32-NEXT: vmv.v.x v8, a4
1162 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1163 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1164 ; RV32-NEXT: vslide1down.vx v11, v8, a7
1165 ; RV32-NEXT: vmv.v.x v8, a0
1166 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1167 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1168 ; RV32-NEXT: vslide1down.vx v10, v8, a3
1171 ; RV64V-LABEL: v8xi64_exact_undef_prefix:
1173 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1174 ; RV64V-NEXT: vmv.v.x v8, a2
1175 ; RV64V-NEXT: vslide1down.vx v11, v8, a3
1176 ; RV64V-NEXT: vmv.v.x v8, a0
1177 ; RV64V-NEXT: vslide1down.vx v10, v8, a1
1180 ; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix:
1181 ; RV64ZVE32: # %bb.0:
1182 ; RV64ZVE32-NEXT: sd a4, 56(a0)
1183 ; RV64ZVE32-NEXT: sd a3, 48(a0)
1184 ; RV64ZVE32-NEXT: sd a2, 40(a0)
1185 ; RV64ZVE32-NEXT: sd a1, 32(a0)
1186 ; RV64ZVE32-NEXT: ret
1187 %v1 = insertelement <8 x i64> poison, i64 %a, i32 4
1188 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 5
1189 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 6
1190 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 7
1195 define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
1196 ; RV32-ONLY-LABEL: buildvec_v16i8_loads_contigous:
1197 ; RV32-ONLY: # %bb.0:
1198 ; RV32-ONLY-NEXT: addi sp, sp, -16
1199 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
1200 ; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1201 ; RV32-ONLY-NEXT: .cfi_offset s0, -4
1202 ; RV32-ONLY-NEXT: lbu a1, 0(a0)
1203 ; RV32-ONLY-NEXT: lbu a2, 1(a0)
1204 ; RV32-ONLY-NEXT: lbu a3, 2(a0)
1205 ; RV32-ONLY-NEXT: lbu a4, 3(a0)
1206 ; RV32-ONLY-NEXT: lbu a5, 4(a0)
1207 ; RV32-ONLY-NEXT: lbu a6, 5(a0)
1208 ; RV32-ONLY-NEXT: lbu a7, 6(a0)
1209 ; RV32-ONLY-NEXT: lbu t0, 7(a0)
1210 ; RV32-ONLY-NEXT: lbu t1, 8(a0)
1211 ; RV32-ONLY-NEXT: lbu t2, 9(a0)
1212 ; RV32-ONLY-NEXT: lbu t3, 10(a0)
1213 ; RV32-ONLY-NEXT: lbu t4, 11(a0)
1214 ; RV32-ONLY-NEXT: lbu t5, 12(a0)
1215 ; RV32-ONLY-NEXT: lbu t6, 13(a0)
1216 ; RV32-ONLY-NEXT: lbu s0, 14(a0)
1217 ; RV32-ONLY-NEXT: lbu a0, 15(a0)
1218 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1219 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
1220 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
1221 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
1222 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
1223 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
1224 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
1225 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
1226 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0
1227 ; RV32-ONLY-NEXT: vmv.v.x v8, t1
1228 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
1229 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
1230 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
1231 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
1232 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
1233 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
1234 ; RV32-ONLY-NEXT: li a1, 255
1235 ; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1236 ; RV32-ONLY-NEXT: vmv.s.x v0, a1
1237 ; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1238 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
1239 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
1240 ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1241 ; RV32-ONLY-NEXT: addi sp, sp, 16
1242 ; RV32-ONLY-NEXT: ret
1244 ; RV32VB-LABEL: buildvec_v16i8_loads_contigous:
1246 ; RV32VB-NEXT: lbu a1, 1(a0)
1247 ; RV32VB-NEXT: lbu a2, 0(a0)
1248 ; RV32VB-NEXT: lbu a3, 2(a0)
1249 ; RV32VB-NEXT: lbu a4, 3(a0)
1250 ; RV32VB-NEXT: slli a1, a1, 8
1251 ; RV32VB-NEXT: or a1, a2, a1
1252 ; RV32VB-NEXT: slli a3, a3, 16
1253 ; RV32VB-NEXT: slli a4, a4, 24
1254 ; RV32VB-NEXT: or a3, a4, a3
1255 ; RV32VB-NEXT: or a1, a1, a3
1256 ; RV32VB-NEXT: lbu a2, 5(a0)
1257 ; RV32VB-NEXT: lbu a3, 4(a0)
1258 ; RV32VB-NEXT: lbu a4, 6(a0)
1259 ; RV32VB-NEXT: lbu a5, 7(a0)
1260 ; RV32VB-NEXT: slli a2, a2, 8
1261 ; RV32VB-NEXT: or a2, a3, a2
1262 ; RV32VB-NEXT: slli a4, a4, 16
1263 ; RV32VB-NEXT: slli a5, a5, 24
1264 ; RV32VB-NEXT: or a4, a5, a4
1265 ; RV32VB-NEXT: or a2, a2, a4
1266 ; RV32VB-NEXT: lbu a3, 9(a0)
1267 ; RV32VB-NEXT: lbu a4, 8(a0)
1268 ; RV32VB-NEXT: lbu a5, 10(a0)
1269 ; RV32VB-NEXT: lbu a6, 11(a0)
1270 ; RV32VB-NEXT: slli a3, a3, 8
1271 ; RV32VB-NEXT: or a3, a4, a3
1272 ; RV32VB-NEXT: slli a5, a5, 16
1273 ; RV32VB-NEXT: slli a6, a6, 24
1274 ; RV32VB-NEXT: or a4, a6, a5
1275 ; RV32VB-NEXT: or a3, a3, a4
1276 ; RV32VB-NEXT: lbu a4, 13(a0)
1277 ; RV32VB-NEXT: lbu a5, 12(a0)
1278 ; RV32VB-NEXT: lbu a6, 14(a0)
1279 ; RV32VB-NEXT: lbu a0, 15(a0)
1280 ; RV32VB-NEXT: slli a4, a4, 8
1281 ; RV32VB-NEXT: or a4, a5, a4
1282 ; RV32VB-NEXT: slli a6, a6, 16
1283 ; RV32VB-NEXT: slli a0, a0, 24
1284 ; RV32VB-NEXT: or a0, a0, a6
1285 ; RV32VB-NEXT: or a0, a4, a0
1286 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1287 ; RV32VB-NEXT: vmv.v.x v8, a1
1288 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2
1289 ; RV32VB-NEXT: vslide1down.vx v8, v8, a3
1290 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
1293 ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_contigous:
1294 ; RV32VB-PACK: # %bb.0:
1295 ; RV32VB-PACK-NEXT: lbu a1, 0(a0)
1296 ; RV32VB-PACK-NEXT: lbu a2, 1(a0)
1297 ; RV32VB-PACK-NEXT: lbu a3, 2(a0)
1298 ; RV32VB-PACK-NEXT: lbu a4, 3(a0)
1299 ; RV32VB-PACK-NEXT: packh a1, a1, a2
1300 ; RV32VB-PACK-NEXT: packh a2, a3, a4
1301 ; RV32VB-PACK-NEXT: pack a1, a1, a2
1302 ; RV32VB-PACK-NEXT: lbu a2, 4(a0)
1303 ; RV32VB-PACK-NEXT: lbu a3, 5(a0)
1304 ; RV32VB-PACK-NEXT: lbu a4, 6(a0)
1305 ; RV32VB-PACK-NEXT: lbu a5, 7(a0)
1306 ; RV32VB-PACK-NEXT: lbu a6, 8(a0)
1307 ; RV32VB-PACK-NEXT: lbu a7, 9(a0)
1308 ; RV32VB-PACK-NEXT: packh a2, a2, a3
1309 ; RV32VB-PACK-NEXT: packh a3, a4, a5
1310 ; RV32VB-PACK-NEXT: pack a2, a2, a3
1311 ; RV32VB-PACK-NEXT: packh a3, a6, a7
1312 ; RV32VB-PACK-NEXT: lbu a4, 10(a0)
1313 ; RV32VB-PACK-NEXT: lbu a5, 11(a0)
1314 ; RV32VB-PACK-NEXT: lbu a6, 12(a0)
1315 ; RV32VB-PACK-NEXT: lbu a7, 13(a0)
1316 ; RV32VB-PACK-NEXT: lbu t0, 14(a0)
1317 ; RV32VB-PACK-NEXT: lbu a0, 15(a0)
1318 ; RV32VB-PACK-NEXT: packh a4, a4, a5
1319 ; RV32VB-PACK-NEXT: pack a3, a3, a4
1320 ; RV32VB-PACK-NEXT: packh a4, a6, a7
1321 ; RV32VB-PACK-NEXT: packh a0, t0, a0
1322 ; RV32VB-PACK-NEXT: pack a0, a4, a0
1323 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1324 ; RV32VB-PACK-NEXT: vmv.v.x v8, a1
1325 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
1326 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
1327 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
1328 ; RV32VB-PACK-NEXT: ret
1330 ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous:
1331 ; RV64V-ONLY: # %bb.0:
1332 ; RV64V-ONLY-NEXT: addi sp, sp, -16
1333 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
1334 ; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
1335 ; RV64V-ONLY-NEXT: .cfi_offset s0, -8
1336 ; RV64V-ONLY-NEXT: lbu a1, 0(a0)
1337 ; RV64V-ONLY-NEXT: lbu a2, 1(a0)
1338 ; RV64V-ONLY-NEXT: lbu a3, 2(a0)
1339 ; RV64V-ONLY-NEXT: lbu a4, 3(a0)
1340 ; RV64V-ONLY-NEXT: lbu a5, 4(a0)
1341 ; RV64V-ONLY-NEXT: lbu a6, 5(a0)
1342 ; RV64V-ONLY-NEXT: lbu a7, 6(a0)
1343 ; RV64V-ONLY-NEXT: lbu t0, 7(a0)
1344 ; RV64V-ONLY-NEXT: lbu t1, 8(a0)
1345 ; RV64V-ONLY-NEXT: lbu t2, 9(a0)
1346 ; RV64V-ONLY-NEXT: lbu t3, 10(a0)
1347 ; RV64V-ONLY-NEXT: lbu t4, 11(a0)
1348 ; RV64V-ONLY-NEXT: lbu t5, 12(a0)
1349 ; RV64V-ONLY-NEXT: lbu t6, 13(a0)
1350 ; RV64V-ONLY-NEXT: lbu s0, 14(a0)
1351 ; RV64V-ONLY-NEXT: lbu a0, 15(a0)
1352 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1353 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
1354 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
1355 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
1356 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
1357 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
1358 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
1359 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
1360 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0
1361 ; RV64V-ONLY-NEXT: vmv.v.x v8, t1
1362 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
1363 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
1364 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
1365 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
1366 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
1367 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
1368 ; RV64V-ONLY-NEXT: li a1, 255
1369 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1370 ; RV64V-ONLY-NEXT: vmv.s.x v0, a1
1371 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1372 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
1373 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
1374 ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
1375 ; RV64V-ONLY-NEXT: addi sp, sp, 16
1376 ; RV64V-ONLY-NEXT: ret
1378 ; RVA22U64-LABEL: buildvec_v16i8_loads_contigous:
1379 ; RVA22U64: # %bb.0:
1380 ; RVA22U64-NEXT: lbu a1, 1(a0)
1381 ; RVA22U64-NEXT: lbu a2, 0(a0)
1382 ; RVA22U64-NEXT: lbu a3, 2(a0)
1383 ; RVA22U64-NEXT: lbu a4, 3(a0)
1384 ; RVA22U64-NEXT: slli a1, a1, 8
1385 ; RVA22U64-NEXT: or a1, a1, a2
1386 ; RVA22U64-NEXT: slli a3, a3, 16
1387 ; RVA22U64-NEXT: slli a4, a4, 24
1388 ; RVA22U64-NEXT: or a3, a3, a4
1389 ; RVA22U64-NEXT: lbu a2, 4(a0)
1390 ; RVA22U64-NEXT: or a1, a1, a3
1391 ; RVA22U64-NEXT: lbu a3, 5(a0)
1392 ; RVA22U64-NEXT: lbu a4, 6(a0)
1393 ; RVA22U64-NEXT: slli a2, a2, 32
1394 ; RVA22U64-NEXT: lbu a5, 7(a0)
1395 ; RVA22U64-NEXT: slli a3, a3, 40
1396 ; RVA22U64-NEXT: or a2, a2, a3
1397 ; RVA22U64-NEXT: slli a4, a4, 48
1398 ; RVA22U64-NEXT: slli a5, a5, 56
1399 ; RVA22U64-NEXT: or a4, a4, a5
1400 ; RVA22U64-NEXT: or a2, a2, a4
1401 ; RVA22U64-NEXT: or a1, a1, a2
1402 ; RVA22U64-NEXT: lbu a2, 9(a0)
1403 ; RVA22U64-NEXT: lbu a3, 8(a0)
1404 ; RVA22U64-NEXT: lbu a4, 10(a0)
1405 ; RVA22U64-NEXT: lbu a5, 11(a0)
1406 ; RVA22U64-NEXT: slli a2, a2, 8
1407 ; RVA22U64-NEXT: or a2, a2, a3
1408 ; RVA22U64-NEXT: slli a4, a4, 16
1409 ; RVA22U64-NEXT: slli a5, a5, 24
1410 ; RVA22U64-NEXT: or a4, a4, a5
1411 ; RVA22U64-NEXT: lbu a3, 12(a0)
1412 ; RVA22U64-NEXT: or a2, a2, a4
1413 ; RVA22U64-NEXT: lbu a4, 13(a0)
1414 ; RVA22U64-NEXT: lbu a5, 14(a0)
1415 ; RVA22U64-NEXT: slli a3, a3, 32
1416 ; RVA22U64-NEXT: lbu a0, 15(a0)
1417 ; RVA22U64-NEXT: slli a4, a4, 40
1418 ; RVA22U64-NEXT: or a3, a3, a4
1419 ; RVA22U64-NEXT: slli a5, a5, 48
1420 ; RVA22U64-NEXT: slli a0, a0, 56
1421 ; RVA22U64-NEXT: or a0, a0, a5
1422 ; RVA22U64-NEXT: or a0, a0, a3
1423 ; RVA22U64-NEXT: or a0, a0, a2
1424 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1425 ; RVA22U64-NEXT: vmv.v.x v8, a1
1426 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
1427 ; RVA22U64-NEXT: ret
1429 ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_contigous:
1430 ; RVA22U64-PACK: # %bb.0:
1431 ; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
1432 ; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
1433 ; RVA22U64-PACK-NEXT: lbu a3, 2(a0)
1434 ; RVA22U64-PACK-NEXT: lbu a4, 3(a0)
1435 ; RVA22U64-PACK-NEXT: packh a1, a1, a2
1436 ; RVA22U64-PACK-NEXT: packh a2, a3, a4
1437 ; RVA22U64-PACK-NEXT: lbu a3, 4(a0)
1438 ; RVA22U64-PACK-NEXT: lbu a4, 5(a0)
1439 ; RVA22U64-PACK-NEXT: packw a6, a1, a2
1440 ; RVA22U64-PACK-NEXT: lbu a2, 6(a0)
1441 ; RVA22U64-PACK-NEXT: lbu a5, 7(a0)
1442 ; RVA22U64-PACK-NEXT: packh a3, a3, a4
1443 ; RVA22U64-PACK-NEXT: lbu a4, 8(a0)
1444 ; RVA22U64-PACK-NEXT: lbu a1, 9(a0)
1445 ; RVA22U64-PACK-NEXT: packh a2, a2, a5
1446 ; RVA22U64-PACK-NEXT: packw a2, a3, a2
1447 ; RVA22U64-PACK-NEXT: pack a6, a6, a2
1448 ; RVA22U64-PACK-NEXT: packh a7, a4, a1
1449 ; RVA22U64-PACK-NEXT: lbu a3, 10(a0)
1450 ; RVA22U64-PACK-NEXT: lbu a4, 11(a0)
1451 ; RVA22U64-PACK-NEXT: lbu a5, 12(a0)
1452 ; RVA22U64-PACK-NEXT: lbu a2, 13(a0)
1453 ; RVA22U64-PACK-NEXT: lbu a1, 14(a0)
1454 ; RVA22U64-PACK-NEXT: lbu a0, 15(a0)
1455 ; RVA22U64-PACK-NEXT: packh a3, a3, a4
1456 ; RVA22U64-PACK-NEXT: packw a3, a7, a3
1457 ; RVA22U64-PACK-NEXT: packh a2, a5, a2
1458 ; RVA22U64-PACK-NEXT: packh a0, a1, a0
1459 ; RVA22U64-PACK-NEXT: packw a0, a2, a0
1460 ; RVA22U64-PACK-NEXT: pack a0, a3, a0
1461 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1462 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
1463 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
1464 ; RVA22U64-PACK-NEXT: ret
1466 ; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous:
1467 ; RV64ZVE32: # %bb.0:
1468 ; RV64ZVE32-NEXT: addi sp, sp, -16
1469 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
1470 ; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
1471 ; RV64ZVE32-NEXT: .cfi_offset s0, -8
1472 ; RV64ZVE32-NEXT: lbu a1, 0(a0)
1473 ; RV64ZVE32-NEXT: lbu a2, 1(a0)
1474 ; RV64ZVE32-NEXT: lbu a3, 2(a0)
1475 ; RV64ZVE32-NEXT: lbu a4, 3(a0)
1476 ; RV64ZVE32-NEXT: lbu a5, 4(a0)
1477 ; RV64ZVE32-NEXT: lbu a6, 5(a0)
1478 ; RV64ZVE32-NEXT: lbu a7, 6(a0)
1479 ; RV64ZVE32-NEXT: lbu t0, 7(a0)
1480 ; RV64ZVE32-NEXT: lbu t1, 8(a0)
1481 ; RV64ZVE32-NEXT: lbu t2, 9(a0)
1482 ; RV64ZVE32-NEXT: lbu t3, 10(a0)
1483 ; RV64ZVE32-NEXT: lbu t4, 11(a0)
1484 ; RV64ZVE32-NEXT: lbu t5, 12(a0)
1485 ; RV64ZVE32-NEXT: lbu t6, 13(a0)
1486 ; RV64ZVE32-NEXT: lbu s0, 14(a0)
1487 ; RV64ZVE32-NEXT: lbu a0, 15(a0)
1488 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1489 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
1490 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
1491 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
1492 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
1493 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
1494 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
1495 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
1496 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0
1497 ; RV64ZVE32-NEXT: vmv.v.x v8, t1
1498 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
1499 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
1500 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
1501 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
1502 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
1503 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
1504 ; RV64ZVE32-NEXT: li a1, 255
1505 ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1506 ; RV64ZVE32-NEXT: vmv.s.x v0, a1
1507 ; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1508 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
1509 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
1510 ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
1511 ; RV64ZVE32-NEXT: addi sp, sp, 16
1512 ; RV64ZVE32-NEXT: ret
1513 %p2 = getelementptr i8, ptr %p, i32 1
1514 %p3 = getelementptr i8, ptr %p, i32 2
1515 %p4 = getelementptr i8, ptr %p, i32 3
1516 %p5 = getelementptr i8, ptr %p, i32 4
1517 %p6 = getelementptr i8, ptr %p, i32 5
1518 %p7 = getelementptr i8, ptr %p, i32 6
1519 %p8 = getelementptr i8, ptr %p, i32 7
1520 %p9 = getelementptr i8, ptr %p, i32 8
1521 %p10 = getelementptr i8, ptr %p, i32 9
1522 %p11 = getelementptr i8, ptr %p, i32 10
1523 %p12 = getelementptr i8, ptr %p, i32 11
1524 %p13 = getelementptr i8, ptr %p, i32 12
1525 %p14 = getelementptr i8, ptr %p, i32 13
1526 %p15 = getelementptr i8, ptr %p, i32 14
1527 %p16 = getelementptr i8, ptr %p, i32 15
1529 %ld1 = load i8, ptr %p
1530 %ld2 = load i8, ptr %p2
1531 %ld3 = load i8, ptr %p3
1532 %ld4 = load i8, ptr %p4
1533 %ld5 = load i8, ptr %p5
1534 %ld6 = load i8, ptr %p6
1535 %ld7 = load i8, ptr %p7
1536 %ld8 = load i8, ptr %p8
1537 %ld9 = load i8, ptr %p9
1538 %ld10 = load i8, ptr %p10
1539 %ld11 = load i8, ptr %p11
1540 %ld12 = load i8, ptr %p12
1541 %ld13 = load i8, ptr %p13
1542 %ld14 = load i8, ptr %p14
1543 %ld15 = load i8, ptr %p15
1544 %ld16 = load i8, ptr %p16
1546 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
1547 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
1548 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
1549 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
1550 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
1551 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
1552 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
1553 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
1554 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
1555 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
1556 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
1557 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
1558 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
1559 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
1560 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
1561 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
1566 define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
1567 ; RV32-ONLY-LABEL: buildvec_v16i8_loads_gather:
1568 ; RV32-ONLY: # %bb.0:
1569 ; RV32-ONLY-NEXT: addi sp, sp, -16
1570 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
1571 ; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1572 ; RV32-ONLY-NEXT: .cfi_offset s0, -4
1573 ; RV32-ONLY-NEXT: lbu a1, 0(a0)
1574 ; RV32-ONLY-NEXT: lbu a2, 1(a0)
1575 ; RV32-ONLY-NEXT: lbu a3, 22(a0)
1576 ; RV32-ONLY-NEXT: lbu a4, 31(a0)
1577 ; RV32-ONLY-NEXT: lbu a5, 44(a0)
1578 ; RV32-ONLY-NEXT: lbu a6, 55(a0)
1579 ; RV32-ONLY-NEXT: lbu a7, 623(a0)
1580 ; RV32-ONLY-NEXT: lbu t0, 75(a0)
1581 ; RV32-ONLY-NEXT: lbu t1, 82(a0)
1582 ; RV32-ONLY-NEXT: lbu t2, 93(a0)
1583 ; RV32-ONLY-NEXT: lbu t3, 105(a0)
1584 ; RV32-ONLY-NEXT: lbu t4, 161(a0)
1585 ; RV32-ONLY-NEXT: lbu t5, 124(a0)
1586 ; RV32-ONLY-NEXT: lbu t6, 163(a0)
1587 ; RV32-ONLY-NEXT: lbu s0, 144(a0)
1588 ; RV32-ONLY-NEXT: lbu a0, 154(a0)
1589 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1590 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
1591 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
1592 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
1593 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
1594 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
1595 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
1596 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
1597 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0
1598 ; RV32-ONLY-NEXT: vmv.v.x v8, t1
1599 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
1600 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
1601 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
1602 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
1603 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
1604 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
1605 ; RV32-ONLY-NEXT: li a1, 255
1606 ; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1607 ; RV32-ONLY-NEXT: vmv.s.x v0, a1
1608 ; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1609 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
1610 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
1611 ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1612 ; RV32-ONLY-NEXT: addi sp, sp, 16
1613 ; RV32-ONLY-NEXT: ret
1615 ; RV32VB-LABEL: buildvec_v16i8_loads_gather:
1617 ; RV32VB-NEXT: lbu a1, 1(a0)
1618 ; RV32VB-NEXT: lbu a2, 0(a0)
1619 ; RV32VB-NEXT: lbu a3, 22(a0)
1620 ; RV32VB-NEXT: lbu a4, 31(a0)
1621 ; RV32VB-NEXT: slli a1, a1, 8
1622 ; RV32VB-NEXT: or a1, a2, a1
1623 ; RV32VB-NEXT: slli a3, a3, 16
1624 ; RV32VB-NEXT: slli a4, a4, 24
1625 ; RV32VB-NEXT: or a3, a4, a3
1626 ; RV32VB-NEXT: or a1, a1, a3
1627 ; RV32VB-NEXT: lbu a2, 55(a0)
1628 ; RV32VB-NEXT: lbu a3, 44(a0)
1629 ; RV32VB-NEXT: lbu a4, 623(a0)
1630 ; RV32VB-NEXT: lbu a5, 75(a0)
1631 ; RV32VB-NEXT: slli a2, a2, 8
1632 ; RV32VB-NEXT: or a2, a3, a2
1633 ; RV32VB-NEXT: slli a4, a4, 16
1634 ; RV32VB-NEXT: slli a5, a5, 24
1635 ; RV32VB-NEXT: or a4, a5, a4
1636 ; RV32VB-NEXT: or a2, a2, a4
1637 ; RV32VB-NEXT: lbu a3, 93(a0)
1638 ; RV32VB-NEXT: lbu a4, 82(a0)
1639 ; RV32VB-NEXT: lbu a5, 105(a0)
1640 ; RV32VB-NEXT: lbu a6, 161(a0)
1641 ; RV32VB-NEXT: slli a3, a3, 8
1642 ; RV32VB-NEXT: or a3, a4, a3
1643 ; RV32VB-NEXT: slli a5, a5, 16
1644 ; RV32VB-NEXT: slli a6, a6, 24
1645 ; RV32VB-NEXT: or a4, a6, a5
1646 ; RV32VB-NEXT: or a3, a3, a4
1647 ; RV32VB-NEXT: lbu a4, 163(a0)
1648 ; RV32VB-NEXT: lbu a5, 124(a0)
1649 ; RV32VB-NEXT: lbu a6, 144(a0)
1650 ; RV32VB-NEXT: lbu a0, 154(a0)
1651 ; RV32VB-NEXT: slli a4, a4, 8
1652 ; RV32VB-NEXT: or a4, a5, a4
1653 ; RV32VB-NEXT: slli a6, a6, 16
1654 ; RV32VB-NEXT: slli a0, a0, 24
1655 ; RV32VB-NEXT: or a0, a0, a6
1656 ; RV32VB-NEXT: or a0, a4, a0
1657 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1658 ; RV32VB-NEXT: vmv.v.x v8, a1
1659 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2
1660 ; RV32VB-NEXT: vslide1down.vx v8, v8, a3
1661 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
1664 ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_gather:
1665 ; RV32VB-PACK: # %bb.0:
1666 ; RV32VB-PACK-NEXT: lbu a1, 0(a0)
1667 ; RV32VB-PACK-NEXT: lbu a2, 1(a0)
1668 ; RV32VB-PACK-NEXT: lbu a3, 22(a0)
1669 ; RV32VB-PACK-NEXT: lbu a4, 31(a0)
1670 ; RV32VB-PACK-NEXT: packh a1, a1, a2
1671 ; RV32VB-PACK-NEXT: packh a2, a3, a4
1672 ; RV32VB-PACK-NEXT: pack a1, a1, a2
1673 ; RV32VB-PACK-NEXT: lbu a2, 44(a0)
1674 ; RV32VB-PACK-NEXT: lbu a3, 55(a0)
1675 ; RV32VB-PACK-NEXT: lbu a4, 623(a0)
1676 ; RV32VB-PACK-NEXT: lbu a5, 75(a0)
1677 ; RV32VB-PACK-NEXT: lbu a6, 82(a0)
1678 ; RV32VB-PACK-NEXT: lbu a7, 93(a0)
1679 ; RV32VB-PACK-NEXT: packh a2, a2, a3
1680 ; RV32VB-PACK-NEXT: packh a3, a4, a5
1681 ; RV32VB-PACK-NEXT: pack a2, a2, a3
1682 ; RV32VB-PACK-NEXT: packh a3, a6, a7
1683 ; RV32VB-PACK-NEXT: lbu a4, 105(a0)
1684 ; RV32VB-PACK-NEXT: lbu a5, 161(a0)
1685 ; RV32VB-PACK-NEXT: lbu a6, 124(a0)
1686 ; RV32VB-PACK-NEXT: lbu a7, 163(a0)
1687 ; RV32VB-PACK-NEXT: lbu t0, 144(a0)
1688 ; RV32VB-PACK-NEXT: lbu a0, 154(a0)
1689 ; RV32VB-PACK-NEXT: packh a4, a4, a5
1690 ; RV32VB-PACK-NEXT: pack a3, a3, a4
1691 ; RV32VB-PACK-NEXT: packh a4, a6, a7
1692 ; RV32VB-PACK-NEXT: packh a0, t0, a0
1693 ; RV32VB-PACK-NEXT: pack a0, a4, a0
1694 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1695 ; RV32VB-PACK-NEXT: vmv.v.x v8, a1
1696 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
1697 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
1698 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
1699 ; RV32VB-PACK-NEXT: ret
1701 ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather:
1702 ; RV64V-ONLY: # %bb.0:
1703 ; RV64V-ONLY-NEXT: addi sp, sp, -16
1704 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
1705 ; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
1706 ; RV64V-ONLY-NEXT: .cfi_offset s0, -8
1707 ; RV64V-ONLY-NEXT: lbu a1, 0(a0)
1708 ; RV64V-ONLY-NEXT: lbu a2, 1(a0)
1709 ; RV64V-ONLY-NEXT: lbu a3, 22(a0)
1710 ; RV64V-ONLY-NEXT: lbu a4, 31(a0)
1711 ; RV64V-ONLY-NEXT: lbu a5, 44(a0)
1712 ; RV64V-ONLY-NEXT: lbu a6, 55(a0)
1713 ; RV64V-ONLY-NEXT: lbu a7, 623(a0)
1714 ; RV64V-ONLY-NEXT: lbu t0, 75(a0)
1715 ; RV64V-ONLY-NEXT: lbu t1, 82(a0)
1716 ; RV64V-ONLY-NEXT: lbu t2, 93(a0)
1717 ; RV64V-ONLY-NEXT: lbu t3, 105(a0)
1718 ; RV64V-ONLY-NEXT: lbu t4, 161(a0)
1719 ; RV64V-ONLY-NEXT: lbu t5, 124(a0)
1720 ; RV64V-ONLY-NEXT: lbu t6, 163(a0)
1721 ; RV64V-ONLY-NEXT: lbu s0, 144(a0)
1722 ; RV64V-ONLY-NEXT: lbu a0, 154(a0)
1723 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1724 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
1725 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
1726 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
1727 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
1728 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
1729 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
1730 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
1731 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0
1732 ; RV64V-ONLY-NEXT: vmv.v.x v8, t1
1733 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
1734 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
1735 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
1736 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
1737 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
1738 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
1739 ; RV64V-ONLY-NEXT: li a1, 255
1740 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1741 ; RV64V-ONLY-NEXT: vmv.s.x v0, a1
1742 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1743 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
1744 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
1745 ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
1746 ; RV64V-ONLY-NEXT: addi sp, sp, 16
1747 ; RV64V-ONLY-NEXT: ret
1749 ; RVA22U64-LABEL: buildvec_v16i8_loads_gather:
1750 ; RVA22U64: # %bb.0:
1751 ; RVA22U64-NEXT: lbu a1, 1(a0)
1752 ; RVA22U64-NEXT: lbu a2, 0(a0)
1753 ; RVA22U64-NEXT: lbu a3, 22(a0)
1754 ; RVA22U64-NEXT: lbu a4, 31(a0)
1755 ; RVA22U64-NEXT: slli a1, a1, 8
1756 ; RVA22U64-NEXT: or a1, a1, a2
1757 ; RVA22U64-NEXT: slli a3, a3, 16
1758 ; RVA22U64-NEXT: slli a4, a4, 24
1759 ; RVA22U64-NEXT: or a3, a3, a4
1760 ; RVA22U64-NEXT: lbu a2, 44(a0)
1761 ; RVA22U64-NEXT: or a1, a1, a3
1762 ; RVA22U64-NEXT: lbu a3, 55(a0)
1763 ; RVA22U64-NEXT: lbu a4, 623(a0)
1764 ; RVA22U64-NEXT: slli a2, a2, 32
1765 ; RVA22U64-NEXT: lbu a5, 75(a0)
1766 ; RVA22U64-NEXT: slli a3, a3, 40
1767 ; RVA22U64-NEXT: or a2, a2, a3
1768 ; RVA22U64-NEXT: slli a4, a4, 48
1769 ; RVA22U64-NEXT: slli a5, a5, 56
1770 ; RVA22U64-NEXT: or a4, a4, a5
1771 ; RVA22U64-NEXT: or a2, a2, a4
1772 ; RVA22U64-NEXT: or a1, a1, a2
1773 ; RVA22U64-NEXT: lbu a2, 93(a0)
1774 ; RVA22U64-NEXT: lbu a3, 82(a0)
1775 ; RVA22U64-NEXT: lbu a4, 105(a0)
1776 ; RVA22U64-NEXT: lbu a5, 161(a0)
1777 ; RVA22U64-NEXT: slli a2, a2, 8
1778 ; RVA22U64-NEXT: or a2, a2, a3
1779 ; RVA22U64-NEXT: slli a4, a4, 16
1780 ; RVA22U64-NEXT: slli a5, a5, 24
1781 ; RVA22U64-NEXT: or a4, a4, a5
1782 ; RVA22U64-NEXT: lbu a3, 124(a0)
1783 ; RVA22U64-NEXT: or a2, a2, a4
1784 ; RVA22U64-NEXT: lbu a4, 163(a0)
1785 ; RVA22U64-NEXT: lbu a5, 144(a0)
1786 ; RVA22U64-NEXT: slli a3, a3, 32
1787 ; RVA22U64-NEXT: lbu a0, 154(a0)
1788 ; RVA22U64-NEXT: slli a4, a4, 40
1789 ; RVA22U64-NEXT: or a3, a3, a4
1790 ; RVA22U64-NEXT: slli a5, a5, 48
1791 ; RVA22U64-NEXT: slli a0, a0, 56
1792 ; RVA22U64-NEXT: or a0, a0, a5
1793 ; RVA22U64-NEXT: or a0, a0, a3
1794 ; RVA22U64-NEXT: or a0, a0, a2
1795 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1796 ; RVA22U64-NEXT: vmv.v.x v8, a1
1797 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
1798 ; RVA22U64-NEXT: ret
1800 ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_gather:
1801 ; RVA22U64-PACK: # %bb.0:
1802 ; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
1803 ; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
1804 ; RVA22U64-PACK-NEXT: lbu a3, 22(a0)
1805 ; RVA22U64-PACK-NEXT: lbu a4, 31(a0)
1806 ; RVA22U64-PACK-NEXT: packh a1, a1, a2
1807 ; RVA22U64-PACK-NEXT: packh a2, a3, a4
1808 ; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
1809 ; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
1810 ; RVA22U64-PACK-NEXT: packw a6, a1, a2
1811 ; RVA22U64-PACK-NEXT: lbu a2, 623(a0)
1812 ; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
1813 ; RVA22U64-PACK-NEXT: packh a3, a3, a4
1814 ; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
1815 ; RVA22U64-PACK-NEXT: lbu a1, 93(a0)
1816 ; RVA22U64-PACK-NEXT: packh a2, a2, a5
1817 ; RVA22U64-PACK-NEXT: packw a2, a3, a2
1818 ; RVA22U64-PACK-NEXT: pack a6, a6, a2
1819 ; RVA22U64-PACK-NEXT: packh a7, a4, a1
1820 ; RVA22U64-PACK-NEXT: lbu a3, 105(a0)
1821 ; RVA22U64-PACK-NEXT: lbu a4, 161(a0)
1822 ; RVA22U64-PACK-NEXT: lbu a5, 124(a0)
1823 ; RVA22U64-PACK-NEXT: lbu a2, 163(a0)
1824 ; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
1825 ; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
1826 ; RVA22U64-PACK-NEXT: packh a3, a3, a4
1827 ; RVA22U64-PACK-NEXT: packw a3, a7, a3
1828 ; RVA22U64-PACK-NEXT: packh a2, a5, a2
1829 ; RVA22U64-PACK-NEXT: packh a0, a1, a0
1830 ; RVA22U64-PACK-NEXT: packw a0, a2, a0
1831 ; RVA22U64-PACK-NEXT: pack a0, a3, a0
1832 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1833 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
1834 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
1835 ; RVA22U64-PACK-NEXT: ret
1837 ; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather:
1838 ; RV64ZVE32: # %bb.0:
1839 ; RV64ZVE32-NEXT: addi sp, sp, -16
1840 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
1841 ; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
1842 ; RV64ZVE32-NEXT: .cfi_offset s0, -8
1843 ; RV64ZVE32-NEXT: lbu a1, 0(a0)
1844 ; RV64ZVE32-NEXT: lbu a2, 1(a0)
1845 ; RV64ZVE32-NEXT: lbu a3, 22(a0)
1846 ; RV64ZVE32-NEXT: lbu a4, 31(a0)
1847 ; RV64ZVE32-NEXT: lbu a5, 44(a0)
1848 ; RV64ZVE32-NEXT: lbu a6, 55(a0)
1849 ; RV64ZVE32-NEXT: lbu a7, 623(a0)
1850 ; RV64ZVE32-NEXT: lbu t0, 75(a0)
1851 ; RV64ZVE32-NEXT: lbu t1, 82(a0)
1852 ; RV64ZVE32-NEXT: lbu t2, 93(a0)
1853 ; RV64ZVE32-NEXT: lbu t3, 105(a0)
1854 ; RV64ZVE32-NEXT: lbu t4, 161(a0)
1855 ; RV64ZVE32-NEXT: lbu t5, 124(a0)
1856 ; RV64ZVE32-NEXT: lbu t6, 163(a0)
1857 ; RV64ZVE32-NEXT: lbu s0, 144(a0)
1858 ; RV64ZVE32-NEXT: lbu a0, 154(a0)
1859 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1860 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
1861 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
1862 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
1863 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
1864 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
1865 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
1866 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
1867 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0
1868 ; RV64ZVE32-NEXT: vmv.v.x v8, t1
1869 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
1870 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
1871 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
1872 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
1873 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
1874 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
1875 ; RV64ZVE32-NEXT: li a1, 255
1876 ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1877 ; RV64ZVE32-NEXT: vmv.s.x v0, a1
1878 ; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
1879 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
1880 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
1881 ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
1882 ; RV64ZVE32-NEXT: addi sp, sp, 16
1883 ; RV64ZVE32-NEXT: ret
1884 %p2 = getelementptr i8, ptr %p, i32 1
1885 %p3 = getelementptr i8, ptr %p, i32 22
1886 %p4 = getelementptr i8, ptr %p, i32 31
1887 %p5 = getelementptr i8, ptr %p, i32 44
1888 %p6 = getelementptr i8, ptr %p, i32 55
1889 %p7 = getelementptr i8, ptr %p, i32 623
1890 %p8 = getelementptr i8, ptr %p, i32 75
1891 %p9 = getelementptr i8, ptr %p, i32 82
1892 %p10 = getelementptr i8, ptr %p, i32 93
1893 %p11 = getelementptr i8, ptr %p, i32 105
1894 %p12 = getelementptr i8, ptr %p, i32 161
1895 %p13 = getelementptr i8, ptr %p, i32 124
1896 %p14 = getelementptr i8, ptr %p, i32 163
1897 %p15 = getelementptr i8, ptr %p, i32 144
1898 %p16 = getelementptr i8, ptr %p, i32 154
1900 %ld1 = load i8, ptr %p
1901 %ld2 = load i8, ptr %p2
1902 %ld3 = load i8, ptr %p3
1903 %ld4 = load i8, ptr %p4
1904 %ld5 = load i8, ptr %p5
1905 %ld6 = load i8, ptr %p6
1906 %ld7 = load i8, ptr %p7
1907 %ld8 = load i8, ptr %p8
1908 %ld9 = load i8, ptr %p9
1909 %ld10 = load i8, ptr %p10
1910 %ld11 = load i8, ptr %p11
1911 %ld12 = load i8, ptr %p12
1912 %ld13 = load i8, ptr %p13
1913 %ld14 = load i8, ptr %p14
1914 %ld15 = load i8, ptr %p15
1915 %ld16 = load i8, ptr %p16
1917 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
1918 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
1919 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
1920 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
1921 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
1922 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
1923 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
1924 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
1925 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
1926 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
1927 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
1928 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
1929 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
1930 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
1931 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
1932 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
1936 define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
1937 ; RV32-ONLY-LABEL: buildvec_v16i8_undef_low_half:
1938 ; RV32-ONLY: # %bb.0:
1939 ; RV32-ONLY-NEXT: lbu a1, 82(a0)
1940 ; RV32-ONLY-NEXT: lbu a2, 93(a0)
1941 ; RV32-ONLY-NEXT: lbu a3, 105(a0)
1942 ; RV32-ONLY-NEXT: lbu a4, 161(a0)
1943 ; RV32-ONLY-NEXT: lbu a5, 124(a0)
1944 ; RV32-ONLY-NEXT: lbu a6, 163(a0)
1945 ; RV32-ONLY-NEXT: lbu a7, 144(a0)
1946 ; RV32-ONLY-NEXT: lbu a0, 154(a0)
1947 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1948 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
1949 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
1950 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
1951 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
1952 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
1953 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
1954 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
1955 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
1956 ; RV32-ONLY-NEXT: ret
1958 ; RV32VB-LABEL: buildvec_v16i8_undef_low_half:
1960 ; RV32VB-NEXT: lbu a1, 93(a0)
1961 ; RV32VB-NEXT: lbu a2, 82(a0)
1962 ; RV32VB-NEXT: lbu a3, 105(a0)
1963 ; RV32VB-NEXT: lbu a4, 161(a0)
1964 ; RV32VB-NEXT: slli a1, a1, 8
1965 ; RV32VB-NEXT: or a1, a2, a1
1966 ; RV32VB-NEXT: slli a3, a3, 16
1967 ; RV32VB-NEXT: slli a4, a4, 24
1968 ; RV32VB-NEXT: or a3, a4, a3
1969 ; RV32VB-NEXT: or a1, a1, a3
1970 ; RV32VB-NEXT: lbu a2, 163(a0)
1971 ; RV32VB-NEXT: lbu a3, 124(a0)
1972 ; RV32VB-NEXT: lbu a4, 144(a0)
1973 ; RV32VB-NEXT: lbu a0, 154(a0)
1974 ; RV32VB-NEXT: slli a2, a2, 8
1975 ; RV32VB-NEXT: or a2, a3, a2
1976 ; RV32VB-NEXT: slli a4, a4, 16
1977 ; RV32VB-NEXT: slli a0, a0, 24
1978 ; RV32VB-NEXT: or a0, a0, a4
1979 ; RV32VB-NEXT: or a0, a2, a0
1980 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1981 ; RV32VB-NEXT: vmv.v.i v8, 0
1982 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero
1983 ; RV32VB-NEXT: vslide1down.vx v8, v8, a1
1984 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
1987 ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half:
1988 ; RV32VB-PACK: # %bb.0:
1989 ; RV32VB-PACK-NEXT: lbu a1, 82(a0)
1990 ; RV32VB-PACK-NEXT: lbu a2, 93(a0)
1991 ; RV32VB-PACK-NEXT: packh a1, a1, a2
1992 ; RV32VB-PACK-NEXT: lbu a2, 105(a0)
1993 ; RV32VB-PACK-NEXT: lbu a3, 161(a0)
1994 ; RV32VB-PACK-NEXT: lbu a4, 124(a0)
1995 ; RV32VB-PACK-NEXT: lbu a5, 163(a0)
1996 ; RV32VB-PACK-NEXT: lbu a6, 144(a0)
1997 ; RV32VB-PACK-NEXT: lbu a0, 154(a0)
1998 ; RV32VB-PACK-NEXT: packh a2, a2, a3
1999 ; RV32VB-PACK-NEXT: pack a1, a1, a2
2000 ; RV32VB-PACK-NEXT: packh a2, a4, a5
2001 ; RV32VB-PACK-NEXT: packh a0, a6, a0
2002 ; RV32VB-PACK-NEXT: pack a0, a2, a0
2003 ; RV32VB-PACK-NEXT: packh a2, a0, a0
2004 ; RV32VB-PACK-NEXT: pack a2, a2, a2
2005 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2006 ; RV32VB-PACK-NEXT: vmv.v.x v8, a2
2007 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
2008 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
2009 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2010 ; RV32VB-PACK-NEXT: ret
2012 ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half:
2013 ; RV64V-ONLY: # %bb.0:
2014 ; RV64V-ONLY-NEXT: lbu a1, 82(a0)
2015 ; RV64V-ONLY-NEXT: lbu a2, 93(a0)
2016 ; RV64V-ONLY-NEXT: lbu a3, 105(a0)
2017 ; RV64V-ONLY-NEXT: lbu a4, 161(a0)
2018 ; RV64V-ONLY-NEXT: lbu a5, 124(a0)
2019 ; RV64V-ONLY-NEXT: lbu a6, 163(a0)
2020 ; RV64V-ONLY-NEXT: lbu a7, 144(a0)
2021 ; RV64V-ONLY-NEXT: lbu a0, 154(a0)
2022 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2023 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
2024 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
2025 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
2026 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
2027 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
2028 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
2029 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
2030 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
2031 ; RV64V-ONLY-NEXT: ret
2033 ; RVA22U64-LABEL: buildvec_v16i8_undef_low_half:
2034 ; RVA22U64: # %bb.0:
2035 ; RVA22U64-NEXT: lbu a1, 93(a0)
2036 ; RVA22U64-NEXT: lbu a2, 82(a0)
2037 ; RVA22U64-NEXT: lbu a3, 105(a0)
2038 ; RVA22U64-NEXT: lbu a4, 161(a0)
2039 ; RVA22U64-NEXT: slli a1, a1, 8
2040 ; RVA22U64-NEXT: or a1, a1, a2
2041 ; RVA22U64-NEXT: slli a3, a3, 16
2042 ; RVA22U64-NEXT: slli a4, a4, 24
2043 ; RVA22U64-NEXT: or a3, a3, a4
2044 ; RVA22U64-NEXT: lbu a2, 124(a0)
2045 ; RVA22U64-NEXT: or a1, a1, a3
2046 ; RVA22U64-NEXT: lbu a3, 163(a0)
2047 ; RVA22U64-NEXT: lbu a4, 144(a0)
2048 ; RVA22U64-NEXT: slli a2, a2, 32
2049 ; RVA22U64-NEXT: lbu a0, 154(a0)
2050 ; RVA22U64-NEXT: slli a3, a3, 40
2051 ; RVA22U64-NEXT: or a2, a2, a3
2052 ; RVA22U64-NEXT: slli a4, a4, 48
2053 ; RVA22U64-NEXT: slli a0, a0, 56
2054 ; RVA22U64-NEXT: or a0, a0, a4
2055 ; RVA22U64-NEXT: or a0, a0, a2
2056 ; RVA22U64-NEXT: or a0, a0, a1
2057 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2058 ; RVA22U64-NEXT: vmv.v.i v8, 0
2059 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
2060 ; RVA22U64-NEXT: ret
2062 ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half:
2063 ; RVA22U64-PACK: # %bb.0:
2064 ; RVA22U64-PACK-NEXT: lbu a1, 82(a0)
2065 ; RVA22U64-PACK-NEXT: lbu a2, 93(a0)
2066 ; RVA22U64-PACK-NEXT: packh a6, a1, a2
2067 ; RVA22U64-PACK-NEXT: lbu a2, 105(a0)
2068 ; RVA22U64-PACK-NEXT: lbu a3, 161(a0)
2069 ; RVA22U64-PACK-NEXT: lbu a4, 124(a0)
2070 ; RVA22U64-PACK-NEXT: lbu a5, 163(a0)
2071 ; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
2072 ; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
2073 ; RVA22U64-PACK-NEXT: packh a2, a2, a3
2074 ; RVA22U64-PACK-NEXT: packw a2, a6, a2
2075 ; RVA22U64-PACK-NEXT: packh a3, a4, a5
2076 ; RVA22U64-PACK-NEXT: packh a0, a1, a0
2077 ; RVA22U64-PACK-NEXT: packw a0, a3, a0
2078 ; RVA22U64-PACK-NEXT: pack a0, a2, a0
2079 ; RVA22U64-PACK-NEXT: packh a1, a0, a0
2080 ; RVA22U64-PACK-NEXT: packw a1, a1, a1
2081 ; RVA22U64-PACK-NEXT: pack a1, a1, a1
2082 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2083 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
2084 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
2085 ; RVA22U64-PACK-NEXT: ret
2087 ; RV64ZVE32-LABEL: buildvec_v16i8_undef_low_half:
2088 ; RV64ZVE32: # %bb.0:
2089 ; RV64ZVE32-NEXT: lbu a1, 82(a0)
2090 ; RV64ZVE32-NEXT: lbu a2, 93(a0)
2091 ; RV64ZVE32-NEXT: lbu a3, 105(a0)
2092 ; RV64ZVE32-NEXT: lbu a4, 161(a0)
2093 ; RV64ZVE32-NEXT: lbu a5, 124(a0)
2094 ; RV64ZVE32-NEXT: lbu a6, 163(a0)
2095 ; RV64ZVE32-NEXT: lbu a7, 144(a0)
2096 ; RV64ZVE32-NEXT: lbu a0, 154(a0)
2097 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2098 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
2099 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
2100 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
2101 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
2102 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
2103 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
2104 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
2105 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
2106 ; RV64ZVE32-NEXT: ret
2107 %p9 = getelementptr i8, ptr %p, i32 82
2108 %p10 = getelementptr i8, ptr %p, i32 93
2109 %p11 = getelementptr i8, ptr %p, i32 105
2110 %p12 = getelementptr i8, ptr %p, i32 161
2111 %p13 = getelementptr i8, ptr %p, i32 124
2112 %p14 = getelementptr i8, ptr %p, i32 163
2113 %p15 = getelementptr i8, ptr %p, i32 144
2114 %p16 = getelementptr i8, ptr %p, i32 154
2116 %ld9 = load i8, ptr %p9
2117 %ld10 = load i8, ptr %p10
2118 %ld11 = load i8, ptr %p11
2119 %ld12 = load i8, ptr %p12
2120 %ld13 = load i8, ptr %p13
2121 %ld14 = load i8, ptr %p14
2122 %ld15 = load i8, ptr %p15
2123 %ld16 = load i8, ptr %p16
2125 %v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8
2126 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
2127 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
2128 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
2129 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
2130 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
2131 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
2132 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
2136 define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
2137 ; RV32-ONLY-LABEL: buildvec_v16i8_undef_high_half:
2138 ; RV32-ONLY: # %bb.0:
2139 ; RV32-ONLY-NEXT: lbu a1, 0(a0)
2140 ; RV32-ONLY-NEXT: lbu a2, 1(a0)
2141 ; RV32-ONLY-NEXT: lbu a3, 22(a0)
2142 ; RV32-ONLY-NEXT: lbu a4, 31(a0)
2143 ; RV32-ONLY-NEXT: lbu a5, 44(a0)
2144 ; RV32-ONLY-NEXT: lbu a6, 55(a0)
2145 ; RV32-ONLY-NEXT: lbu a7, 623(a0)
2146 ; RV32-ONLY-NEXT: lbu a0, 75(a0)
2147 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2148 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
2149 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
2150 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
2151 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
2152 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
2153 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
2154 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
2155 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
2156 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8
2157 ; RV32-ONLY-NEXT: ret
2159 ; RV32VB-LABEL: buildvec_v16i8_undef_high_half:
2161 ; RV32VB-NEXT: lbu a1, 1(a0)
2162 ; RV32VB-NEXT: lbu a2, 0(a0)
2163 ; RV32VB-NEXT: lbu a3, 22(a0)
2164 ; RV32VB-NEXT: lbu a4, 31(a0)
2165 ; RV32VB-NEXT: slli a1, a1, 8
2166 ; RV32VB-NEXT: or a1, a2, a1
2167 ; RV32VB-NEXT: slli a3, a3, 16
2168 ; RV32VB-NEXT: slli a4, a4, 24
2169 ; RV32VB-NEXT: or a3, a4, a3
2170 ; RV32VB-NEXT: or a1, a1, a3
2171 ; RV32VB-NEXT: lbu a2, 55(a0)
2172 ; RV32VB-NEXT: lbu a3, 44(a0)
2173 ; RV32VB-NEXT: lbu a4, 623(a0)
2174 ; RV32VB-NEXT: lbu a0, 75(a0)
2175 ; RV32VB-NEXT: slli a2, a2, 8
2176 ; RV32VB-NEXT: or a2, a3, a2
2177 ; RV32VB-NEXT: slli a4, a4, 16
2178 ; RV32VB-NEXT: slli a0, a0, 24
2179 ; RV32VB-NEXT: or a0, a0, a4
2180 ; RV32VB-NEXT: or a0, a2, a0
2181 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2182 ; RV32VB-NEXT: vmv.v.x v8, a1
2183 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
2184 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero
2185 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero
2188 ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_high_half:
2189 ; RV32VB-PACK: # %bb.0:
2190 ; RV32VB-PACK-NEXT: lbu a1, 0(a0)
2191 ; RV32VB-PACK-NEXT: lbu a2, 1(a0)
2192 ; RV32VB-PACK-NEXT: packh a1, a1, a2
2193 ; RV32VB-PACK-NEXT: lbu a2, 22(a0)
2194 ; RV32VB-PACK-NEXT: lbu a3, 31(a0)
2195 ; RV32VB-PACK-NEXT: lbu a4, 44(a0)
2196 ; RV32VB-PACK-NEXT: lbu a5, 55(a0)
2197 ; RV32VB-PACK-NEXT: lbu a6, 623(a0)
2198 ; RV32VB-PACK-NEXT: lbu a0, 75(a0)
2199 ; RV32VB-PACK-NEXT: packh a2, a2, a3
2200 ; RV32VB-PACK-NEXT: pack a1, a1, a2
2201 ; RV32VB-PACK-NEXT: packh a2, a4, a5
2202 ; RV32VB-PACK-NEXT: packh a0, a6, a0
2203 ; RV32VB-PACK-NEXT: pack a0, a2, a0
2204 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2205 ; RV32VB-PACK-NEXT: vmv.v.x v8, a1
2206 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2207 ; RV32VB-PACK-NEXT: packh a0, a0, a0
2208 ; RV32VB-PACK-NEXT: pack a0, a0, a0
2209 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2210 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2211 ; RV32VB-PACK-NEXT: ret
2213 ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half:
2214 ; RV64V-ONLY: # %bb.0:
2215 ; RV64V-ONLY-NEXT: lbu a1, 0(a0)
2216 ; RV64V-ONLY-NEXT: lbu a2, 1(a0)
2217 ; RV64V-ONLY-NEXT: lbu a3, 22(a0)
2218 ; RV64V-ONLY-NEXT: lbu a4, 31(a0)
2219 ; RV64V-ONLY-NEXT: lbu a5, 44(a0)
2220 ; RV64V-ONLY-NEXT: lbu a6, 55(a0)
2221 ; RV64V-ONLY-NEXT: lbu a7, 623(a0)
2222 ; RV64V-ONLY-NEXT: lbu a0, 75(a0)
2223 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2224 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
2225 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
2226 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
2227 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
2228 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
2229 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
2230 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
2231 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
2232 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8
2233 ; RV64V-ONLY-NEXT: ret
2235 ; RVA22U64-LABEL: buildvec_v16i8_undef_high_half:
2236 ; RVA22U64: # %bb.0:
2237 ; RVA22U64-NEXT: lbu a1, 1(a0)
2238 ; RVA22U64-NEXT: lbu a2, 0(a0)
2239 ; RVA22U64-NEXT: lbu a3, 22(a0)
2240 ; RVA22U64-NEXT: lbu a4, 31(a0)
2241 ; RVA22U64-NEXT: slli a1, a1, 8
2242 ; RVA22U64-NEXT: or a1, a1, a2
2243 ; RVA22U64-NEXT: slli a3, a3, 16
2244 ; RVA22U64-NEXT: slli a4, a4, 24
2245 ; RVA22U64-NEXT: or a3, a3, a4
2246 ; RVA22U64-NEXT: lbu a2, 44(a0)
2247 ; RVA22U64-NEXT: or a1, a1, a3
2248 ; RVA22U64-NEXT: lbu a3, 55(a0)
2249 ; RVA22U64-NEXT: lbu a4, 623(a0)
2250 ; RVA22U64-NEXT: slli a2, a2, 32
2251 ; RVA22U64-NEXT: lbu a0, 75(a0)
2252 ; RVA22U64-NEXT: slli a3, a3, 40
2253 ; RVA22U64-NEXT: or a2, a2, a3
2254 ; RVA22U64-NEXT: slli a4, a4, 48
2255 ; RVA22U64-NEXT: slli a0, a0, 56
2256 ; RVA22U64-NEXT: or a0, a0, a4
2257 ; RVA22U64-NEXT: or a0, a0, a2
2258 ; RVA22U64-NEXT: or a0, a0, a1
2259 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2260 ; RVA22U64-NEXT: vmv.v.x v8, a0
2261 ; RVA22U64-NEXT: vslide1down.vx v8, v8, zero
2262 ; RVA22U64-NEXT: ret
2264 ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_high_half:
2265 ; RVA22U64-PACK: # %bb.0:
2266 ; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
2267 ; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
2268 ; RVA22U64-PACK-NEXT: packh a6, a1, a2
2269 ; RVA22U64-PACK-NEXT: lbu a2, 22(a0)
2270 ; RVA22U64-PACK-NEXT: lbu a3, 31(a0)
2271 ; RVA22U64-PACK-NEXT: lbu a4, 44(a0)
2272 ; RVA22U64-PACK-NEXT: lbu a5, 55(a0)
2273 ; RVA22U64-PACK-NEXT: lbu a1, 623(a0)
2274 ; RVA22U64-PACK-NEXT: lbu a0, 75(a0)
2275 ; RVA22U64-PACK-NEXT: packh a2, a2, a3
2276 ; RVA22U64-PACK-NEXT: packw a2, a6, a2
2277 ; RVA22U64-PACK-NEXT: packh a3, a4, a5
2278 ; RVA22U64-PACK-NEXT: packh a0, a1, a0
2279 ; RVA22U64-PACK-NEXT: packw a0, a3, a0
2280 ; RVA22U64-PACK-NEXT: pack a0, a2, a0
2281 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2282 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
2283 ; RVA22U64-PACK-NEXT: packh a0, a0, a0
2284 ; RVA22U64-PACK-NEXT: packw a0, a0, a0
2285 ; RVA22U64-PACK-NEXT: pack a0, a0, a0
2286 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
2287 ; RVA22U64-PACK-NEXT: ret
2289 ; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half:
2290 ; RV64ZVE32: # %bb.0:
2291 ; RV64ZVE32-NEXT: lbu a1, 0(a0)
2292 ; RV64ZVE32-NEXT: lbu a2, 1(a0)
2293 ; RV64ZVE32-NEXT: lbu a3, 22(a0)
2294 ; RV64ZVE32-NEXT: lbu a4, 31(a0)
2295 ; RV64ZVE32-NEXT: lbu a5, 44(a0)
2296 ; RV64ZVE32-NEXT: lbu a6, 55(a0)
2297 ; RV64ZVE32-NEXT: lbu a7, 623(a0)
2298 ; RV64ZVE32-NEXT: lbu a0, 75(a0)
2299 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2300 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
2301 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
2302 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
2303 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
2304 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
2305 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
2306 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
2307 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
2308 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8
2309 ; RV64ZVE32-NEXT: ret
2310 %p2 = getelementptr i8, ptr %p, i32 1
2311 %p3 = getelementptr i8, ptr %p, i32 22
2312 %p4 = getelementptr i8, ptr %p, i32 31
2313 %p5 = getelementptr i8, ptr %p, i32 44
2314 %p6 = getelementptr i8, ptr %p, i32 55
2315 %p7 = getelementptr i8, ptr %p, i32 623
2316 %p8 = getelementptr i8, ptr %p, i32 75
2318 %ld1 = load i8, ptr %p
2319 %ld2 = load i8, ptr %p2
2320 %ld3 = load i8, ptr %p3
2321 %ld4 = load i8, ptr %p4
2322 %ld5 = load i8, ptr %p5
2323 %ld6 = load i8, ptr %p6
2324 %ld7 = load i8, ptr %p7
2325 %ld8 = load i8, ptr %p8
2327 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
2328 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
2329 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
2330 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
2331 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
2332 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
2333 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
2334 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
2338 define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
2339 ; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges:
2340 ; RV32-ONLY: # %bb.0:
2341 ; RV32-ONLY-NEXT: lbu a1, 31(a0)
2342 ; RV32-ONLY-NEXT: lbu a2, 44(a0)
2343 ; RV32-ONLY-NEXT: lbu a3, 55(a0)
2344 ; RV32-ONLY-NEXT: lbu a4, 623(a0)
2345 ; RV32-ONLY-NEXT: lbu a5, 75(a0)
2346 ; RV32-ONLY-NEXT: lbu a6, 82(a0)
2347 ; RV32-ONLY-NEXT: lbu a7, 93(a0)
2348 ; RV32-ONLY-NEXT: lbu t0, 105(a0)
2349 ; RV32-ONLY-NEXT: lbu a0, 161(a0)
2350 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2351 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
2352 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
2353 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
2354 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
2355 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5
2356 ; RV32-ONLY-NEXT: vmv.v.x v8, a6
2357 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
2358 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
2359 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
2360 ; RV32-ONLY-NEXT: li a0, 255
2361 ; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2362 ; RV32-ONLY-NEXT: vmv.s.x v0, a0
2363 ; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2364 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 4
2365 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
2366 ; RV32-ONLY-NEXT: ret
2368 ; RV32VB-LABEL: buildvec_v16i8_undef_edges:
2370 ; RV32VB-NEXT: lbu a1, 31(a0)
2371 ; RV32VB-NEXT: lbu a2, 55(a0)
2372 ; RV32VB-NEXT: lbu a3, 44(a0)
2373 ; RV32VB-NEXT: lbu a4, 623(a0)
2374 ; RV32VB-NEXT: lbu a5, 75(a0)
2375 ; RV32VB-NEXT: slli a2, a2, 8
2376 ; RV32VB-NEXT: or a2, a3, a2
2377 ; RV32VB-NEXT: slli a4, a4, 16
2378 ; RV32VB-NEXT: slli a5, a5, 24
2379 ; RV32VB-NEXT: lbu a3, 93(a0)
2380 ; RV32VB-NEXT: or a4, a5, a4
2381 ; RV32VB-NEXT: or a2, a2, a4
2382 ; RV32VB-NEXT: lbu a4, 82(a0)
2383 ; RV32VB-NEXT: slli a3, a3, 8
2384 ; RV32VB-NEXT: lbu a5, 105(a0)
2385 ; RV32VB-NEXT: lbu a0, 161(a0)
2386 ; RV32VB-NEXT: or a3, a4, a3
2387 ; RV32VB-NEXT: slli a1, a1, 24
2388 ; RV32VB-NEXT: slli a5, a5, 16
2389 ; RV32VB-NEXT: slli a0, a0, 24
2390 ; RV32VB-NEXT: or a0, a0, a5
2391 ; RV32VB-NEXT: or a0, a3, a0
2392 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2393 ; RV32VB-NEXT: vmv.v.x v8, a1
2394 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2
2395 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
2396 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero
2399 ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges:
2400 ; RV32VB-PACK: # %bb.0:
2401 ; RV32VB-PACK-NEXT: lbu a1, 31(a0)
2402 ; RV32VB-PACK-NEXT: lbu a2, 44(a0)
2403 ; RV32VB-PACK-NEXT: lbu a3, 55(a0)
2404 ; RV32VB-PACK-NEXT: lbu a4, 623(a0)
2405 ; RV32VB-PACK-NEXT: lbu a5, 75(a0)
2406 ; RV32VB-PACK-NEXT: packh a1, a0, a1
2407 ; RV32VB-PACK-NEXT: packh a2, a2, a3
2408 ; RV32VB-PACK-NEXT: packh a3, a4, a5
2409 ; RV32VB-PACK-NEXT: lbu a4, 82(a0)
2410 ; RV32VB-PACK-NEXT: lbu a5, 93(a0)
2411 ; RV32VB-PACK-NEXT: pack a2, a2, a3
2412 ; RV32VB-PACK-NEXT: lbu a3, 105(a0)
2413 ; RV32VB-PACK-NEXT: lbu a0, 161(a0)
2414 ; RV32VB-PACK-NEXT: packh a4, a4, a5
2415 ; RV32VB-PACK-NEXT: packh a5, a0, a0
2416 ; RV32VB-PACK-NEXT: pack a1, a5, a1
2417 ; RV32VB-PACK-NEXT: packh a0, a3, a0
2418 ; RV32VB-PACK-NEXT: pack a0, a4, a0
2419 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2420 ; RV32VB-PACK-NEXT: vmv.v.x v8, a1
2421 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
2422 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2423 ; RV32VB-PACK-NEXT: pack a0, a5, a5
2424 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2425 ; RV32VB-PACK-NEXT: ret
2427 ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges:
2428 ; RV64V-ONLY: # %bb.0:
2429 ; RV64V-ONLY-NEXT: lbu a1, 31(a0)
2430 ; RV64V-ONLY-NEXT: lbu a2, 44(a0)
2431 ; RV64V-ONLY-NEXT: lbu a3, 55(a0)
2432 ; RV64V-ONLY-NEXT: lbu a4, 623(a0)
2433 ; RV64V-ONLY-NEXT: lbu a5, 75(a0)
2434 ; RV64V-ONLY-NEXT: lbu a6, 82(a0)
2435 ; RV64V-ONLY-NEXT: lbu a7, 93(a0)
2436 ; RV64V-ONLY-NEXT: lbu t0, 105(a0)
2437 ; RV64V-ONLY-NEXT: lbu a0, 161(a0)
2438 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2439 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
2440 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
2441 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
2442 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
2443 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5
2444 ; RV64V-ONLY-NEXT: vmv.v.x v8, a6
2445 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
2446 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
2447 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
2448 ; RV64V-ONLY-NEXT: li a0, 255
2449 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2450 ; RV64V-ONLY-NEXT: vmv.s.x v0, a0
2451 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2452 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 4
2453 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
2454 ; RV64V-ONLY-NEXT: ret
2456 ; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
2457 ; RVA22U64: # %bb.0:
2458 ; RVA22U64-NEXT: lbu a1, 44(a0)
2459 ; RVA22U64-NEXT: lbu a2, 55(a0)
2460 ; RVA22U64-NEXT: lbu a3, 31(a0)
2461 ; RVA22U64-NEXT: lbu a4, 623(a0)
2462 ; RVA22U64-NEXT: slli a1, a1, 32
2463 ; RVA22U64-NEXT: slli a2, a2, 40
2464 ; RVA22U64-NEXT: lbu a5, 75(a0)
2465 ; RVA22U64-NEXT: or a1, a1, a2
2466 ; RVA22U64-NEXT: slli a3, a3, 24
2467 ; RVA22U64-NEXT: slli a4, a4, 48
2468 ; RVA22U64-NEXT: slli a5, a5, 56
2469 ; RVA22U64-NEXT: or a4, a4, a5
2470 ; RVA22U64-NEXT: or a1, a1, a4
2471 ; RVA22U64-NEXT: add.uw a1, a3, a1
2472 ; RVA22U64-NEXT: lbu a2, 93(a0)
2473 ; RVA22U64-NEXT: lbu a3, 82(a0)
2474 ; RVA22U64-NEXT: lbu a4, 105(a0)
2475 ; RVA22U64-NEXT: lbu a0, 161(a0)
2476 ; RVA22U64-NEXT: slli a2, a2, 8
2477 ; RVA22U64-NEXT: or a2, a2, a3
2478 ; RVA22U64-NEXT: slli a4, a4, 16
2479 ; RVA22U64-NEXT: slli a0, a0, 24
2480 ; RVA22U64-NEXT: or a0, a0, a4
2481 ; RVA22U64-NEXT: or a0, a0, a2
2482 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2483 ; RVA22U64-NEXT: vmv.v.x v8, a1
2484 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
2485 ; RVA22U64-NEXT: ret
2487 ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges:
2488 ; RVA22U64-PACK: # %bb.0:
2489 ; RVA22U64-PACK-NEXT: lbu a1, 31(a0)
2490 ; RVA22U64-PACK-NEXT: lbu a2, 44(a0)
2491 ; RVA22U64-PACK-NEXT: lbu a3, 55(a0)
2492 ; RVA22U64-PACK-NEXT: lbu a4, 623(a0)
2493 ; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
2494 ; RVA22U64-PACK-NEXT: packh a6, a0, a1
2495 ; RVA22U64-PACK-NEXT: packh a1, a0, a0
2496 ; RVA22U64-PACK-NEXT: packh a2, a2, a3
2497 ; RVA22U64-PACK-NEXT: packh a3, a4, a5
2498 ; RVA22U64-PACK-NEXT: packw a7, a2, a3
2499 ; RVA22U64-PACK-NEXT: lbu a3, 82(a0)
2500 ; RVA22U64-PACK-NEXT: lbu a4, 93(a0)
2501 ; RVA22U64-PACK-NEXT: lbu a5, 105(a0)
2502 ; RVA22U64-PACK-NEXT: lbu a0, 161(a0)
2503 ; RVA22U64-PACK-NEXT: packw a2, a1, a6
2504 ; RVA22U64-PACK-NEXT: pack a2, a2, a7
2505 ; RVA22U64-PACK-NEXT: packh a3, a3, a4
2506 ; RVA22U64-PACK-NEXT: packh a0, a5, a0
2507 ; RVA22U64-PACK-NEXT: packw a0, a3, a0
2508 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2509 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a2
2510 ; RVA22U64-PACK-NEXT: packw a1, a1, a1
2511 ; RVA22U64-PACK-NEXT: pack a0, a0, a1
2512 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
2513 ; RVA22U64-PACK-NEXT: ret
2515 ; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges:
2516 ; RV64ZVE32: # %bb.0:
2517 ; RV64ZVE32-NEXT: lbu a1, 31(a0)
2518 ; RV64ZVE32-NEXT: lbu a2, 44(a0)
2519 ; RV64ZVE32-NEXT: lbu a3, 55(a0)
2520 ; RV64ZVE32-NEXT: lbu a4, 623(a0)
2521 ; RV64ZVE32-NEXT: lbu a5, 75(a0)
2522 ; RV64ZVE32-NEXT: lbu a6, 82(a0)
2523 ; RV64ZVE32-NEXT: lbu a7, 93(a0)
2524 ; RV64ZVE32-NEXT: lbu t0, 105(a0)
2525 ; RV64ZVE32-NEXT: lbu a0, 161(a0)
2526 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2527 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
2528 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
2529 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
2530 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
2531 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5
2532 ; RV64ZVE32-NEXT: vmv.v.x v8, a6
2533 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
2534 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
2535 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
2536 ; RV64ZVE32-NEXT: li a0, 255
2537 ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2538 ; RV64ZVE32-NEXT: vmv.s.x v0, a0
2539 ; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2540 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 4
2541 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
2542 ; RV64ZVE32-NEXT: ret
2543 %p4 = getelementptr i8, ptr %p, i32 31
2544 %p5 = getelementptr i8, ptr %p, i32 44
2545 %p6 = getelementptr i8, ptr %p, i32 55
2546 %p7 = getelementptr i8, ptr %p, i32 623
2547 %p8 = getelementptr i8, ptr %p, i32 75
2548 %p9 = getelementptr i8, ptr %p, i32 82
2549 %p10 = getelementptr i8, ptr %p, i32 93
2550 %p11 = getelementptr i8, ptr %p, i32 105
2551 %p12 = getelementptr i8, ptr %p, i32 161
2553 %ld4 = load i8, ptr %p4
2554 %ld5 = load i8, ptr %p5
2555 %ld6 = load i8, ptr %p6
2556 %ld7 = load i8, ptr %p7
2557 %ld8 = load i8, ptr %p8
2558 %ld9 = load i8, ptr %p9
2559 %ld10 = load i8, ptr %p10
2560 %ld11 = load i8, ptr %p11
2561 %ld12 = load i8, ptr %p12
2563 %v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3
2564 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
2565 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
2566 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
2567 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
2568 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
2569 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
2570 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
2571 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
2575 define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
2576 ; RV32-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
2577 ; RV32-ONLY: # %bb.0:
2578 ; RV32-ONLY-NEXT: lbu a1, 0(a0)
2579 ; RV32-ONLY-NEXT: lbu a2, 1(a0)
2580 ; RV32-ONLY-NEXT: lbu a3, 44(a0)
2581 ; RV32-ONLY-NEXT: lbu a4, 55(a0)
2582 ; RV32-ONLY-NEXT: lbu a5, 75(a0)
2583 ; RV32-ONLY-NEXT: lbu a6, 82(a0)
2584 ; RV32-ONLY-NEXT: lbu a7, 93(a0)
2585 ; RV32-ONLY-NEXT: lbu t0, 124(a0)
2586 ; RV32-ONLY-NEXT: lbu t1, 144(a0)
2587 ; RV32-ONLY-NEXT: lbu a0, 154(a0)
2588 ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2589 ; RV32-ONLY-NEXT: vmv.v.x v8, a1
2590 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
2591 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
2592 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
2593 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
2594 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
2595 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5
2596 ; RV32-ONLY-NEXT: vmv.v.x v8, a6
2597 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
2598 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
2599 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
2600 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
2601 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t1
2602 ; RV32-ONLY-NEXT: li a1, 255
2603 ; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2604 ; RV32-ONLY-NEXT: vmv.s.x v0, a1
2605 ; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2606 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
2607 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
2608 ; RV32-ONLY-NEXT: ret
2610 ; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered:
2612 ; RV32VB-NEXT: lbu a1, 1(a0)
2613 ; RV32VB-NEXT: lbu a2, 0(a0)
2614 ; RV32VB-NEXT: slli a1, a1, 8
2615 ; RV32VB-NEXT: lbu a3, 55(a0)
2616 ; RV32VB-NEXT: lbu a4, 44(a0)
2617 ; RV32VB-NEXT: or a1, a2, a1
2618 ; RV32VB-NEXT: lbu a2, 75(a0)
2619 ; RV32VB-NEXT: slli a3, a3, 8
2620 ; RV32VB-NEXT: or a3, a4, a3
2621 ; RV32VB-NEXT: lbu a4, 93(a0)
2622 ; RV32VB-NEXT: slli a2, a2, 24
2623 ; RV32VB-NEXT: or a2, a3, a2
2624 ; RV32VB-NEXT: lbu a3, 82(a0)
2625 ; RV32VB-NEXT: slli a4, a4, 8
2626 ; RV32VB-NEXT: lbu a5, 144(a0)
2627 ; RV32VB-NEXT: lbu a6, 154(a0)
2628 ; RV32VB-NEXT: or a3, a3, a4
2629 ; RV32VB-NEXT: lbu a0, 124(a0)
2630 ; RV32VB-NEXT: slli a5, a5, 16
2631 ; RV32VB-NEXT: slli a6, a6, 24
2632 ; RV32VB-NEXT: or a4, a6, a5
2633 ; RV32VB-NEXT: or a0, a0, a4
2634 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2635 ; RV32VB-NEXT: vmv.v.x v8, a1
2636 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2
2637 ; RV32VB-NEXT: vslide1down.vx v8, v8, a3
2638 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0
2641 ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
2642 ; RV32VB-PACK: # %bb.0:
2643 ; RV32VB-PACK-NEXT: lbu a1, 0(a0)
2644 ; RV32VB-PACK-NEXT: lbu a2, 1(a0)
2645 ; RV32VB-PACK-NEXT: lbu a3, 44(a0)
2646 ; RV32VB-PACK-NEXT: lbu a4, 55(a0)
2647 ; RV32VB-PACK-NEXT: lbu a5, 75(a0)
2648 ; RV32VB-PACK-NEXT: packh a1, a1, a2
2649 ; RV32VB-PACK-NEXT: packh a2, a3, a4
2650 ; RV32VB-PACK-NEXT: packh a3, a0, a5
2651 ; RV32VB-PACK-NEXT: lbu a4, 82(a0)
2652 ; RV32VB-PACK-NEXT: lbu a5, 93(a0)
2653 ; RV32VB-PACK-NEXT: lbu a6, 144(a0)
2654 ; RV32VB-PACK-NEXT: lbu a7, 154(a0)
2655 ; RV32VB-PACK-NEXT: lbu a0, 124(a0)
2656 ; RV32VB-PACK-NEXT: pack a2, a2, a3
2657 ; RV32VB-PACK-NEXT: packh a3, a4, a5
2658 ; RV32VB-PACK-NEXT: packh a4, a6, a7
2659 ; RV32VB-PACK-NEXT: packh a0, a0, a0
2660 ; RV32VB-PACK-NEXT: pack a0, a0, a4
2661 ; RV32VB-PACK-NEXT: packh a4, a0, a0
2662 ; RV32VB-PACK-NEXT: pack a1, a1, a4
2663 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2664 ; RV32VB-PACK-NEXT: vmv.v.x v8, a1
2665 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
2666 ; RV32VB-PACK-NEXT: pack a1, a3, a4
2667 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
2668 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
2669 ; RV32VB-PACK-NEXT: ret
2671 ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
2672 ; RV64V-ONLY: # %bb.0:
2673 ; RV64V-ONLY-NEXT: lbu a1, 0(a0)
2674 ; RV64V-ONLY-NEXT: lbu a2, 1(a0)
2675 ; RV64V-ONLY-NEXT: lbu a3, 44(a0)
2676 ; RV64V-ONLY-NEXT: lbu a4, 55(a0)
2677 ; RV64V-ONLY-NEXT: lbu a5, 75(a0)
2678 ; RV64V-ONLY-NEXT: lbu a6, 82(a0)
2679 ; RV64V-ONLY-NEXT: lbu a7, 93(a0)
2680 ; RV64V-ONLY-NEXT: lbu t0, 124(a0)
2681 ; RV64V-ONLY-NEXT: lbu t1, 144(a0)
2682 ; RV64V-ONLY-NEXT: lbu a0, 154(a0)
2683 ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2684 ; RV64V-ONLY-NEXT: vmv.v.x v8, a1
2685 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
2686 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
2687 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
2688 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
2689 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
2690 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5
2691 ; RV64V-ONLY-NEXT: vmv.v.x v8, a6
2692 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
2693 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
2694 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
2695 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
2696 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t1
2697 ; RV64V-ONLY-NEXT: li a1, 255
2698 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2699 ; RV64V-ONLY-NEXT: vmv.s.x v0, a1
2700 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2701 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
2702 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
2703 ; RV64V-ONLY-NEXT: ret
2705 ; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered:
2706 ; RVA22U64: # %bb.0:
2707 ; RVA22U64-NEXT: lbu a1, 1(a0)
2708 ; RVA22U64-NEXT: lbu a2, 0(a0)
2709 ; RVA22U64-NEXT: slli a1, a1, 8
2710 ; RVA22U64-NEXT: lbu a3, 44(a0)
2711 ; RVA22U64-NEXT: lbu a4, 55(a0)
2712 ; RVA22U64-NEXT: or a1, a1, a2
2713 ; RVA22U64-NEXT: lbu a2, 75(a0)
2714 ; RVA22U64-NEXT: slli a3, a3, 32
2715 ; RVA22U64-NEXT: slli a4, a4, 40
2716 ; RVA22U64-NEXT: or a3, a3, a4
2717 ; RVA22U64-NEXT: slli a2, a2, 56
2718 ; RVA22U64-NEXT: lbu a4, 93(a0)
2719 ; RVA22U64-NEXT: or a2, a2, a3
2720 ; RVA22U64-NEXT: or a1, a1, a2
2721 ; RVA22U64-NEXT: lbu a2, 82(a0)
2722 ; RVA22U64-NEXT: slli a4, a4, 8
2723 ; RVA22U64-NEXT: lbu a3, 144(a0)
2724 ; RVA22U64-NEXT: lbu a5, 154(a0)
2725 ; RVA22U64-NEXT: or a2, a2, a4
2726 ; RVA22U64-NEXT: lbu a0, 124(a0)
2727 ; RVA22U64-NEXT: slli a3, a3, 48
2728 ; RVA22U64-NEXT: slli a5, a5, 56
2729 ; RVA22U64-NEXT: or a3, a3, a5
2730 ; RVA22U64-NEXT: slli a0, a0, 32
2731 ; RVA22U64-NEXT: or a0, a0, a3
2732 ; RVA22U64-NEXT: or a0, a0, a2
2733 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2734 ; RVA22U64-NEXT: vmv.v.x v8, a1
2735 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
2736 ; RVA22U64-NEXT: ret
2738 ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
2739 ; RVA22U64-PACK: # %bb.0:
2740 ; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
2741 ; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
2742 ; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
2743 ; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
2744 ; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
2745 ; RVA22U64-PACK-NEXT: packh a1, a1, a2
2746 ; RVA22U64-PACK-NEXT: packh a2, a3, a4
2747 ; RVA22U64-PACK-NEXT: packh a3, a0, a5
2748 ; RVA22U64-PACK-NEXT: packw a6, a2, a3
2749 ; RVA22U64-PACK-NEXT: packh a3, a0, a0
2750 ; RVA22U64-PACK-NEXT: packw a7, a1, a3
2751 ; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
2752 ; RVA22U64-PACK-NEXT: lbu a5, 93(a0)
2753 ; RVA22U64-PACK-NEXT: lbu a2, 144(a0)
2754 ; RVA22U64-PACK-NEXT: lbu a1, 154(a0)
2755 ; RVA22U64-PACK-NEXT: lbu a0, 124(a0)
2756 ; RVA22U64-PACK-NEXT: pack a6, a7, a6
2757 ; RVA22U64-PACK-NEXT: packh a4, a4, a5
2758 ; RVA22U64-PACK-NEXT: packh a1, a2, a1
2759 ; RVA22U64-PACK-NEXT: packh a0, a0, a0
2760 ; RVA22U64-PACK-NEXT: packw a0, a0, a1
2761 ; RVA22U64-PACK-NEXT: packw a1, a4, a3
2762 ; RVA22U64-PACK-NEXT: pack a0, a1, a0
2763 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2764 ; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
2765 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
2766 ; RVA22U64-PACK-NEXT: ret
2768 ; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered:
2769 ; RV64ZVE32: # %bb.0:
2770 ; RV64ZVE32-NEXT: lbu a1, 0(a0)
2771 ; RV64ZVE32-NEXT: lbu a2, 1(a0)
2772 ; RV64ZVE32-NEXT: lbu a3, 44(a0)
2773 ; RV64ZVE32-NEXT: lbu a4, 55(a0)
2774 ; RV64ZVE32-NEXT: lbu a5, 75(a0)
2775 ; RV64ZVE32-NEXT: lbu a6, 82(a0)
2776 ; RV64ZVE32-NEXT: lbu a7, 93(a0)
2777 ; RV64ZVE32-NEXT: lbu t0, 124(a0)
2778 ; RV64ZVE32-NEXT: lbu t1, 144(a0)
2779 ; RV64ZVE32-NEXT: lbu a0, 154(a0)
2780 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2781 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
2782 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
2783 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
2784 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
2785 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
2786 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
2787 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5
2788 ; RV64ZVE32-NEXT: vmv.v.x v8, a6
2789 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
2790 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
2791 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
2792 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
2793 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t1
2794 ; RV64ZVE32-NEXT: li a1, 255
2795 ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2796 ; RV64ZVE32-NEXT: vmv.s.x v0, a1
2797 ; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2798 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
2799 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
2800 ; RV64ZVE32-NEXT: ret
2801 %p2 = getelementptr i8, ptr %p, i32 1
2802 %p3 = getelementptr i8, ptr %p, i32 22
2803 %p4 = getelementptr i8, ptr %p, i32 31
2804 %p5 = getelementptr i8, ptr %p, i32 44
2805 %p6 = getelementptr i8, ptr %p, i32 55
2806 %p7 = getelementptr i8, ptr %p, i32 623
2807 %p8 = getelementptr i8, ptr %p, i32 75
2808 %p9 = getelementptr i8, ptr %p, i32 82
2809 %p10 = getelementptr i8, ptr %p, i32 93
2810 %p11 = getelementptr i8, ptr %p, i32 105
2811 %p12 = getelementptr i8, ptr %p, i32 161
2812 %p13 = getelementptr i8, ptr %p, i32 124
2813 %p14 = getelementptr i8, ptr %p, i32 163
2814 %p15 = getelementptr i8, ptr %p, i32 144
2815 %p16 = getelementptr i8, ptr %p, i32 154
2817 %ld1 = load i8, ptr %p
2818 %ld2 = load i8, ptr %p2
2819 %ld3 = load i8, ptr %p3
2820 %ld4 = load i8, ptr %p4
2821 %ld5 = load i8, ptr %p5
2822 %ld6 = load i8, ptr %p6
2823 %ld7 = load i8, ptr %p7
2824 %ld8 = load i8, ptr %p8
2825 %ld9 = load i8, ptr %p9
2826 %ld10 = load i8, ptr %p10
2827 %ld11 = load i8, ptr %p11
2828 %ld12 = load i8, ptr %p12
2829 %ld13 = load i8, ptr %p13
2830 %ld14 = load i8, ptr %p14
2831 %ld15 = load i8, ptr %p15
2832 %ld16 = load i8, ptr %p16
2834 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
2835 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
2836 %v3 = insertelement <16 x i8> %v2, i8 undef, i32 2
2837 %v4 = insertelement <16 x i8> %v3, i8 undef, i32 3
2838 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
2839 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
2840 %v7 = insertelement <16 x i8> %v6, i8 undef, i32 6
2841 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
2842 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
2843 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
2844 %v11 = insertelement <16 x i8> %v10, i8 undef, i32 10
2845 %v12 = insertelement <16 x i8> %v11, i8 undef, i32 11
2846 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
2847 %v14 = insertelement <16 x i8> %v13, i8 undef, i32 13
2848 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
2849 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
2853 define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) {
2854 ; RV32-ONLY-LABEL: buildvec_v8i8_pack:
2855 ; RV32-ONLY: # %bb.0:
2856 ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
2857 ; RV32-ONLY-NEXT: vmv.v.x v8, a0
2858 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
2859 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
2860 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a3
2861 ; RV32-ONLY-NEXT: vmv.v.x v8, a4
2862 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
2863 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
2864 ; RV32-ONLY-NEXT: vmv.v.i v0, 15
2865 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
2866 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
2867 ; RV32-ONLY-NEXT: ret
2869 ; RV32VB-LABEL: buildvec_v8i8_pack:
2871 ; RV32VB-NEXT: slli a7, a7, 24
2872 ; RV32VB-NEXT: andi a6, a6, 255
2873 ; RV32VB-NEXT: slli a6, a6, 16
2874 ; RV32VB-NEXT: or a6, a7, a6
2875 ; RV32VB-NEXT: andi a4, a4, 255
2876 ; RV32VB-NEXT: andi a5, a5, 255
2877 ; RV32VB-NEXT: slli a5, a5, 8
2878 ; RV32VB-NEXT: or a4, a4, a5
2879 ; RV32VB-NEXT: or a4, a4, a6
2880 ; RV32VB-NEXT: slli a3, a3, 24
2881 ; RV32VB-NEXT: andi a2, a2, 255
2882 ; RV32VB-NEXT: slli a2, a2, 16
2883 ; RV32VB-NEXT: or a2, a3, a2
2884 ; RV32VB-NEXT: andi a0, a0, 255
2885 ; RV32VB-NEXT: andi a1, a1, 255
2886 ; RV32VB-NEXT: slli a1, a1, 8
2887 ; RV32VB-NEXT: or a0, a0, a1
2888 ; RV32VB-NEXT: or a0, a0, a2
2889 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2890 ; RV32VB-NEXT: vmv.v.x v8, a0
2891 ; RV32VB-NEXT: vslide1down.vx v8, v8, a4
2894 ; RV32VB-PACK-LABEL: buildvec_v8i8_pack:
2895 ; RV32VB-PACK: # %bb.0:
2896 ; RV32VB-PACK-NEXT: packh a6, a6, a7
2897 ; RV32VB-PACK-NEXT: packh a4, a4, a5
2898 ; RV32VB-PACK-NEXT: pack a4, a4, a6
2899 ; RV32VB-PACK-NEXT: packh a2, a2, a3
2900 ; RV32VB-PACK-NEXT: packh a0, a0, a1
2901 ; RV32VB-PACK-NEXT: pack a0, a0, a2
2902 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2903 ; RV32VB-PACK-NEXT: vmv.v.x v8, a0
2904 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a4
2905 ; RV32VB-PACK-NEXT: ret
2907 ; RV64V-ONLY-LABEL: buildvec_v8i8_pack:
2908 ; RV64V-ONLY: # %bb.0:
2909 ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
2910 ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
2911 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
2912 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
2913 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a3
2914 ; RV64V-ONLY-NEXT: vmv.v.x v8, a4
2915 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
2916 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
2917 ; RV64V-ONLY-NEXT: vmv.v.i v0, 15
2918 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
2919 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
2920 ; RV64V-ONLY-NEXT: ret
2922 ; RVA22U64-LABEL: buildvec_v8i8_pack:
2923 ; RVA22U64: # %bb.0:
2924 ; RVA22U64-NEXT: andi a4, a4, 255
2925 ; RVA22U64-NEXT: slli a4, a4, 32
2926 ; RVA22U64-NEXT: andi a5, a5, 255
2927 ; RVA22U64-NEXT: slli a5, a5, 40
2928 ; RVA22U64-NEXT: or a4, a4, a5
2929 ; RVA22U64-NEXT: slli a7, a7, 56
2930 ; RVA22U64-NEXT: andi a5, a6, 255
2931 ; RVA22U64-NEXT: slli a5, a5, 48
2932 ; RVA22U64-NEXT: or a5, a7, a5
2933 ; RVA22U64-NEXT: or a4, a4, a5
2934 ; RVA22U64-NEXT: andi a2, a2, 255
2935 ; RVA22U64-NEXT: slli a2, a2, 16
2936 ; RVA22U64-NEXT: andi a3, a3, 255
2937 ; RVA22U64-NEXT: slli a3, a3, 24
2938 ; RVA22U64-NEXT: or a2, a2, a3
2939 ; RVA22U64-NEXT: andi a0, a0, 255
2940 ; RVA22U64-NEXT: andi a1, a1, 255
2941 ; RVA22U64-NEXT: slli a1, a1, 8
2942 ; RVA22U64-NEXT: or a0, a0, a1
2943 ; RVA22U64-NEXT: or a0, a0, a2
2944 ; RVA22U64-NEXT: or a0, a0, a4
2945 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2946 ; RVA22U64-NEXT: vmv.s.x v8, a0
2947 ; RVA22U64-NEXT: ret
2949 ; RVA22U64-PACK-LABEL: buildvec_v8i8_pack:
2950 ; RVA22U64-PACK: # %bb.0:
2951 ; RVA22U64-PACK-NEXT: packh a6, a6, a7
2952 ; RVA22U64-PACK-NEXT: packh a4, a4, a5
2953 ; RVA22U64-PACK-NEXT: packw a4, a4, a6
2954 ; RVA22U64-PACK-NEXT: packh a2, a2, a3
2955 ; RVA22U64-PACK-NEXT: packh a0, a0, a1
2956 ; RVA22U64-PACK-NEXT: packw a0, a0, a2
2957 ; RVA22U64-PACK-NEXT: pack a0, a0, a4
2958 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2959 ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
2960 ; RVA22U64-PACK-NEXT: ret
2962 ; RV64ZVE32-LABEL: buildvec_v8i8_pack:
2963 ; RV64ZVE32: # %bb.0:
2964 ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
2965 ; RV64ZVE32-NEXT: vmv.v.x v8, a0
2966 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
2967 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
2968 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a3
2969 ; RV64ZVE32-NEXT: vmv.v.x v8, a4
2970 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
2971 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
2972 ; RV64ZVE32-NEXT: vmv.v.i v0, 15
2973 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
2974 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t
2975 ; RV64ZVE32-NEXT: ret
2976 %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0
2977 %v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1
2978 %v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2
2979 %v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3
2980 %v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4
2981 %v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5
2982 %v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6
2983 %v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7
2987 define <6 x i8> @buildvec_v6i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) {
2988 ; RV32-ONLY-LABEL: buildvec_v6i8_pack:
2989 ; RV32-ONLY: # %bb.0:
2990 ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
2991 ; RV32-ONLY-NEXT: vmv.v.x v8, a0
2992 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
2993 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
2994 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
2995 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
2996 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
2997 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
2998 ; RV32-ONLY-NEXT: ret
3000 ; RV32VB-LABEL: buildvec_v6i8_pack:
3002 ; RV32VB-NEXT: slli a3, a3, 24
3003 ; RV32VB-NEXT: andi a2, a2, 255
3004 ; RV32VB-NEXT: slli a2, a2, 16
3005 ; RV32VB-NEXT: or a2, a3, a2
3006 ; RV32VB-NEXT: andi a0, a0, 255
3007 ; RV32VB-NEXT: andi a1, a1, 255
3008 ; RV32VB-NEXT: slli a1, a1, 8
3009 ; RV32VB-NEXT: or a0, a0, a1
3010 ; RV32VB-NEXT: or a0, a0, a2
3011 ; RV32VB-NEXT: andi a1, a4, 255
3012 ; RV32VB-NEXT: andi a2, a5, 255
3013 ; RV32VB-NEXT: slli a2, a2, 8
3014 ; RV32VB-NEXT: or a1, a1, a2
3015 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3016 ; RV32VB-NEXT: vmv.v.x v8, a0
3017 ; RV32VB-NEXT: vslide1down.vx v8, v8, a1
3020 ; RV32VB-PACK-LABEL: buildvec_v6i8_pack:
3021 ; RV32VB-PACK: # %bb.0:
3022 ; RV32VB-PACK-NEXT: packh a2, a2, a3
3023 ; RV32VB-PACK-NEXT: packh a0, a0, a1
3024 ; RV32VB-PACK-NEXT: pack a0, a0, a2
3025 ; RV32VB-PACK-NEXT: packh a1, a4, a5
3026 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3027 ; RV32VB-PACK-NEXT: vmv.v.x v8, a0
3028 ; RV32VB-PACK-NEXT: packh a0, a0, a0
3029 ; RV32VB-PACK-NEXT: pack a0, a1, a0
3030 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
3031 ; RV32VB-PACK-NEXT: ret
3033 ; RV64V-ONLY-LABEL: buildvec_v6i8_pack:
3034 ; RV64V-ONLY: # %bb.0:
3035 ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3036 ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3037 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3038 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3039 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3040 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
3041 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
3042 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
3043 ; RV64V-ONLY-NEXT: ret
3045 ; RVA22U64-LABEL: buildvec_v6i8_pack:
3046 ; RVA22U64: # %bb.0:
3047 ; RVA22U64-NEXT: andi a2, a2, 255
3048 ; RVA22U64-NEXT: slli a2, a2, 16
3049 ; RVA22U64-NEXT: andi a3, a3, 255
3050 ; RVA22U64-NEXT: slli a3, a3, 24
3051 ; RVA22U64-NEXT: or a2, a2, a3
3052 ; RVA22U64-NEXT: andi a0, a0, 255
3053 ; RVA22U64-NEXT: andi a1, a1, 255
3054 ; RVA22U64-NEXT: slli a1, a1, 8
3055 ; RVA22U64-NEXT: or a0, a0, a1
3056 ; RVA22U64-NEXT: or a0, a0, a2
3057 ; RVA22U64-NEXT: andi a1, a4, 255
3058 ; RVA22U64-NEXT: slli a1, a1, 32
3059 ; RVA22U64-NEXT: andi a2, a5, 255
3060 ; RVA22U64-NEXT: slli a2, a2, 40
3061 ; RVA22U64-NEXT: or a1, a1, a2
3062 ; RVA22U64-NEXT: or a0, a0, a1
3063 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3064 ; RVA22U64-NEXT: vmv.s.x v8, a0
3065 ; RVA22U64-NEXT: ret
3067 ; RVA22U64-PACK-LABEL: buildvec_v6i8_pack:
3068 ; RVA22U64-PACK: # %bb.0:
3069 ; RVA22U64-PACK-NEXT: packh a2, a2, a3
3070 ; RVA22U64-PACK-NEXT: packh a0, a0, a1
3071 ; RVA22U64-PACK-NEXT: packw a0, a0, a2
3072 ; RVA22U64-PACK-NEXT: packh a1, a4, a5
3073 ; RVA22U64-PACK-NEXT: packh a2, a0, a0
3074 ; RVA22U64-PACK-NEXT: packw a1, a1, a2
3075 ; RVA22U64-PACK-NEXT: pack a0, a0, a1
3076 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3077 ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
3078 ; RVA22U64-PACK-NEXT: ret
3080 ; RV64ZVE32-LABEL: buildvec_v6i8_pack:
3081 ; RV64ZVE32: # %bb.0:
3082 ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3083 ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3084 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3085 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3086 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3087 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
3088 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
3089 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
3090 ; RV64ZVE32-NEXT: ret
3091 %v1 = insertelement <6 x i8> poison, i8 %e1, i32 0
3092 %v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1
3093 %v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2
3094 %v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3
3095 %v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4
3096 %v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5
3100 define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
3101 ; RV32-ONLY-LABEL: buildvec_v4i16_pack:
3102 ; RV32-ONLY: # %bb.0:
3103 ; RV32-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
3104 ; RV32-ONLY-NEXT: vmv.v.x v8, a0
3105 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
3106 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
3107 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
3108 ; RV32-ONLY-NEXT: ret
3110 ; RV32VB-LABEL: buildvec_v4i16_pack:
3112 ; RV32VB-NEXT: slli a3, a3, 16
3113 ; RV32VB-NEXT: zext.h a2, a2
3114 ; RV32VB-NEXT: or a2, a2, a3
3115 ; RV32VB-NEXT: slli a1, a1, 16
3116 ; RV32VB-NEXT: zext.h a0, a0
3117 ; RV32VB-NEXT: or a0, a0, a1
3118 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3119 ; RV32VB-NEXT: vmv.v.x v8, a0
3120 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2
3123 ; RV32VB-PACK-LABEL: buildvec_v4i16_pack:
3124 ; RV32VB-PACK: # %bb.0:
3125 ; RV32VB-PACK-NEXT: pack a2, a2, a3
3126 ; RV32VB-PACK-NEXT: pack a0, a0, a1
3127 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3128 ; RV32VB-PACK-NEXT: vmv.v.x v8, a0
3129 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
3130 ; RV32VB-PACK-NEXT: ret
3132 ; RV64V-ONLY-LABEL: buildvec_v4i16_pack:
3133 ; RV64V-ONLY: # %bb.0:
3134 ; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
3135 ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3136 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3137 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3138 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3139 ; RV64V-ONLY-NEXT: ret
3141 ; RVA22U64-LABEL: buildvec_v4i16_pack:
3142 ; RVA22U64: # %bb.0:
3143 ; RVA22U64-NEXT: slli a3, a3, 48
3144 ; RVA22U64-NEXT: zext.h a2, a2
3145 ; RVA22U64-NEXT: slli a2, a2, 32
3146 ; RVA22U64-NEXT: or a2, a2, a3
3147 ; RVA22U64-NEXT: zext.h a0, a0
3148 ; RVA22U64-NEXT: zext.h a1, a1
3149 ; RVA22U64-NEXT: slli a1, a1, 16
3150 ; RVA22U64-NEXT: or a0, a0, a1
3151 ; RVA22U64-NEXT: or a0, a0, a2
3152 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3153 ; RVA22U64-NEXT: vmv.s.x v8, a0
3154 ; RVA22U64-NEXT: ret
3156 ; RVA22U64-PACK-LABEL: buildvec_v4i16_pack:
3157 ; RVA22U64-PACK: # %bb.0:
3158 ; RVA22U64-PACK-NEXT: packw a2, a2, a3
3159 ; RVA22U64-PACK-NEXT: packw a0, a0, a1
3160 ; RVA22U64-PACK-NEXT: pack a0, a0, a2
3161 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3162 ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
3163 ; RVA22U64-PACK-NEXT: ret
3165 ; RV64ZVE32-LABEL: buildvec_v4i16_pack:
3166 ; RV64ZVE32: # %bb.0:
3167 ; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
3168 ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3169 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3170 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3171 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3172 ; RV64ZVE32-NEXT: ret
3173 %v1 = insertelement <4 x i16> poison, i16 %e1, i32 0
3174 %v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1
3175 %v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2
3176 %v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3
3180 define <2 x i32> @buildvec_v2i32_pack(i32 %e1, i32 %e2) {
3181 ; RV32-LABEL: buildvec_v2i32_pack:
3183 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3184 ; RV32-NEXT: vmv.v.x v8, a0
3185 ; RV32-NEXT: vslide1down.vx v8, v8, a1
3188 ; RV64V-ONLY-LABEL: buildvec_v2i32_pack:
3189 ; RV64V-ONLY: # %bb.0:
3190 ; RV64V-ONLY-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
3191 ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3192 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3193 ; RV64V-ONLY-NEXT: ret
3195 ; RVA22U64-LABEL: buildvec_v2i32_pack:
3196 ; RVA22U64: # %bb.0:
3197 ; RVA22U64-NEXT: slli a1, a1, 32
3198 ; RVA22U64-NEXT: add.uw a0, a0, a1
3199 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3200 ; RVA22U64-NEXT: vmv.s.x v8, a0
3201 ; RVA22U64-NEXT: ret
3203 ; RVA22U64-PACK-LABEL: buildvec_v2i32_pack:
3204 ; RVA22U64-PACK: # %bb.0:
3205 ; RVA22U64-PACK-NEXT: pack a0, a0, a1
3206 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
3207 ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
3208 ; RVA22U64-PACK-NEXT: ret
3210 ; RV64ZVE32-LABEL: buildvec_v2i32_pack:
3211 ; RV64ZVE32: # %bb.0:
3212 ; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
3213 ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3214 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3215 ; RV64ZVE32-NEXT: ret
3216 %v1 = insertelement <2 x i32> poison, i32 %e1, i32 0
3217 %v2 = insertelement <2 x i32> %v1, i32 %e2, i32 1
3221 define <1 x i16> @buildvec_v1i16_pack(i16 %e1) {
3222 ; CHECK-LABEL: buildvec_v1i16_pack:
3224 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
3225 ; CHECK-NEXT: vmv.s.x v8, a0
3227 %v1 = insertelement <1 x i16> poison, i16 %e1, i32 0
3231 define <1 x i32> @buildvec_v1i32_pack(i32 %e1) {
3232 ; CHECK-LABEL: buildvec_v1i32_pack:
3234 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3235 ; CHECK-NEXT: vmv.s.x v8, a0
3237 %v1 = insertelement <1 x i32> poison, i32 %e1, i32 0