1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s
5 ; ------------------------------------------------------------------------------
7 ; ------------------------------------------------------------------------------
9 ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3
10 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
11 ; CHECK-LABEL: load_factor2_v3:
13 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
14 ; CHECK-NEXT: vle32.v v10, (a0)
15 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
16 ; CHECK-NEXT: vid.v v8
17 ; CHECK-NEXT: vadd.vv v9, v8, v8
18 ; CHECK-NEXT: vrgather.vv v8, v10, v9
19 ; CHECK-NEXT: vmv.v.i v0, 4
20 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
21 ; CHECK-NEXT: vslidedown.vi v12, v10, 4
22 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
23 ; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t
24 ; CHECK-NEXT: vadd.vi v11, v9, 1
25 ; CHECK-NEXT: vrgather.vv v9, v10, v11
26 ; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t
28 %interleaved.vec = load <6 x i32>, ptr %ptr
29 %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
30 %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>
31 %res0 = insertvalue {<3 x i32>, <3 x i32>} undef, <3 x i32> %v0, 0
32 %res1 = insertvalue {<3 x i32>, <3 x i32>} %res0, <3 x i32> %v1, 1
33 ret {<3 x i32>, <3 x i32>} %res1
36 define {<4 x i32>, <4 x i32>} @load_factor2(ptr %ptr) {
37 ; CHECK-LABEL: load_factor2:
39 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
40 ; CHECK-NEXT: vlseg2e32.v v8, (a0)
42 %interleaved.vec = load <8 x i32>, ptr %ptr
43 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
44 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
45 %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
46 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
47 ret {<4 x i32>, <4 x i32>} %res1
51 define {<4 x i32>, <4 x i32>, <4 x i32>} @load_factor3(ptr %ptr) {
52 ; CHECK-LABEL: load_factor3:
54 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
55 ; CHECK-NEXT: vlseg3e32.v v8, (a0)
57 %interleaved.vec = load <12 x i32>, ptr %ptr
58 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
59 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
60 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
61 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
62 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
63 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
64 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
67 define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor4(ptr %ptr) {
68 ; CHECK-LABEL: load_factor4:
70 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
71 ; CHECK-NEXT: vlseg4e32.v v8, (a0)
73 %interleaved.vec = load <16 x i32>, ptr %ptr
74 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
75 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
76 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
77 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
78 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
79 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
80 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
81 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
82 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3
85 define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor5(ptr %ptr) {
86 ; CHECK-LABEL: load_factor5:
88 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
89 ; CHECK-NEXT: vlseg5e32.v v8, (a0)
91 %interleaved.vec = load <20 x i32>, ptr %ptr
92 %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
93 %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
94 %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
95 %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
96 %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
97 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
98 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
99 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
100 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
101 %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
102 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
105 define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor6(ptr %ptr) {
106 ; CHECK-LABEL: load_factor6:
108 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
109 ; CHECK-NEXT: vlseg6e16.v v8, (a0)
111 %interleaved.vec = load <12 x i16>, ptr %ptr
112 %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 0, i32 6>
113 %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 1, i32 7>
114 %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 2, i32 8>
115 %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 3, i32 9>
116 %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 4, i32 10>
117 %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11>
118 %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
119 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
120 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
121 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
122 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
123 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
124 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5
127 ; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg
128 define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) {
129 ; RV32-LABEL: load_factor6_too_big:
131 ; RV32-NEXT: addi sp, sp, -16
132 ; RV32-NEXT: .cfi_def_cfa_offset 16
133 ; RV32-NEXT: csrr a2, vlenb
134 ; RV32-NEXT: li a3, 62
135 ; RV32-NEXT: mul a2, a2, a3
136 ; RV32-NEXT: sub sp, sp, a2
137 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 62 * vlenb
138 ; RV32-NEXT: addi a3, a1, 128
139 ; RV32-NEXT: addi a4, a1, 256
140 ; RV32-NEXT: li a2, 32
141 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
142 ; RV32-NEXT: vle32.v v16, (a4)
143 ; RV32-NEXT: csrr a4, vlenb
144 ; RV32-NEXT: li a5, 29
145 ; RV32-NEXT: mul a4, a4, a5
146 ; RV32-NEXT: add a4, sp, a4
147 ; RV32-NEXT: addi a4, a4, 16
148 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
149 ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
150 ; RV32-NEXT: vid.v v10
151 ; RV32-NEXT: csrr a4, vlenb
152 ; RV32-NEXT: slli a5, a4, 3
153 ; RV32-NEXT: add a4, a5, a4
154 ; RV32-NEXT: add a4, sp, a4
155 ; RV32-NEXT: addi a4, a4, 16
156 ; RV32-NEXT: vs2r.v v10, (a4) # Unknown-size Folded Spill
157 ; RV32-NEXT: vadd.vi v8, v10, -4
158 ; RV32-NEXT: csrr a4, vlenb
159 ; RV32-NEXT: li a5, 13
160 ; RV32-NEXT: mul a4, a4, a5
161 ; RV32-NEXT: add a4, sp, a4
162 ; RV32-NEXT: addi a4, a4, 16
163 ; RV32-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
164 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
165 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8
166 ; RV32-NEXT: csrr a4, vlenb
167 ; RV32-NEXT: li a5, 21
168 ; RV32-NEXT: mul a4, a4, a5
169 ; RV32-NEXT: add a4, sp, a4
170 ; RV32-NEXT: addi a4, a4, 16
171 ; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill
172 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
173 ; RV32-NEXT: vadd.vi v8, v10, -10
174 ; RV32-NEXT: lui a4, 12
175 ; RV32-NEXT: vmv.s.x v0, a4
176 ; RV32-NEXT: csrr a4, vlenb
177 ; RV32-NEXT: slli a4, a4, 3
178 ; RV32-NEXT: add a4, sp, a4
179 ; RV32-NEXT: addi a4, a4, 16
180 ; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
181 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
182 ; RV32-NEXT: vslidedown.vi v16, v16, 16
183 ; RV32-NEXT: csrr a4, vlenb
184 ; RV32-NEXT: li a5, 45
185 ; RV32-NEXT: mul a4, a4, a5
186 ; RV32-NEXT: add a4, sp, a4
187 ; RV32-NEXT: addi a4, a4, 16
188 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
189 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
190 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
191 ; RV32-NEXT: csrr a4, vlenb
192 ; RV32-NEXT: li a5, 25
193 ; RV32-NEXT: mul a4, a4, a5
194 ; RV32-NEXT: add a4, sp, a4
195 ; RV32-NEXT: addi a4, a4, 16
196 ; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill
197 ; RV32-NEXT: lui a4, %hi(.LCPI6_0)
198 ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0)
199 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
200 ; RV32-NEXT: lui a5, %hi(.LCPI6_1)
201 ; RV32-NEXT: addi a5, a5, %lo(.LCPI6_1)
202 ; RV32-NEXT: lui a6, 1
203 ; RV32-NEXT: vle16.v v8, (a4)
204 ; RV32-NEXT: addi a4, sp, 16
205 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
206 ; RV32-NEXT: vle16.v v8, (a5)
207 ; RV32-NEXT: csrr a4, vlenb
208 ; RV32-NEXT: slli a4, a4, 2
209 ; RV32-NEXT: add a4, sp, a4
210 ; RV32-NEXT: addi a4, a4, 16
211 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
212 ; RV32-NEXT: vle32.v v16, (a1)
213 ; RV32-NEXT: csrr a1, vlenb
214 ; RV32-NEXT: li a4, 37
215 ; RV32-NEXT: mul a1, a1, a4
216 ; RV32-NEXT: add a1, sp, a1
217 ; RV32-NEXT: addi a1, a1, 16
218 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
219 ; RV32-NEXT: vle32.v v24, (a3)
220 ; RV32-NEXT: csrr a1, vlenb
221 ; RV32-NEXT: li a3, 53
222 ; RV32-NEXT: mul a1, a1, a3
223 ; RV32-NEXT: add a1, sp, a1
224 ; RV32-NEXT: addi a1, a1, 16
225 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
226 ; RV32-NEXT: addi a1, a6, -64
227 ; RV32-NEXT: vmv.s.x v0, a1
228 ; RV32-NEXT: csrr a1, vlenb
229 ; RV32-NEXT: slli a3, a1, 4
230 ; RV32-NEXT: add a1, a3, a1
231 ; RV32-NEXT: add a1, sp, a1
232 ; RV32-NEXT: addi a1, a1, 16
233 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
234 ; RV32-NEXT: addi a1, sp, 16
235 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
236 ; RV32-NEXT: vrgatherei16.vv v8, v16, v4
237 ; RV32-NEXT: csrr a1, vlenb
238 ; RV32-NEXT: slli a1, a1, 2
239 ; RV32-NEXT: add a1, sp, a1
240 ; RV32-NEXT: addi a1, a1, 16
241 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
242 ; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
243 ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
244 ; RV32-NEXT: csrr a1, vlenb
245 ; RV32-NEXT: li a3, 25
246 ; RV32-NEXT: mul a1, a1, a3
247 ; RV32-NEXT: add a1, sp, a1
248 ; RV32-NEXT: addi a1, a1, 16
249 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
250 ; RV32-NEXT: vmv.v.v v12, v8
251 ; RV32-NEXT: csrr a1, vlenb
252 ; RV32-NEXT: li a3, 25
253 ; RV32-NEXT: mul a1, a1, a3
254 ; RV32-NEXT: add a1, sp, a1
255 ; RV32-NEXT: addi a1, a1, 16
256 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
257 ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
258 ; RV32-NEXT: csrr a1, vlenb
259 ; RV32-NEXT: slli a3, a1, 3
260 ; RV32-NEXT: add a1, a3, a1
261 ; RV32-NEXT: add a1, sp, a1
262 ; RV32-NEXT: addi a1, a1, 16
263 ; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload
264 ; RV32-NEXT: vadd.vi v8, v10, -2
265 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
266 ; RV32-NEXT: csrr a1, vlenb
267 ; RV32-NEXT: li a3, 29
268 ; RV32-NEXT: mul a1, a1, a3
269 ; RV32-NEXT: add a1, sp, a1
270 ; RV32-NEXT: addi a1, a1, 16
271 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
272 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8
273 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
274 ; RV32-NEXT: vadd.vi v8, v10, -8
275 ; RV32-NEXT: vmv2r.v v30, v10
276 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
277 ; RV32-NEXT: csrr a1, vlenb
278 ; RV32-NEXT: slli a1, a1, 3
279 ; RV32-NEXT: add a1, sp, a1
280 ; RV32-NEXT: addi a1, a1, 16
281 ; RV32-NEXT: vl1r.v v28, (a1) # Unknown-size Folded Reload
282 ; RV32-NEXT: vmv1r.v v0, v28
283 ; RV32-NEXT: csrr a1, vlenb
284 ; RV32-NEXT: li a3, 45
285 ; RV32-NEXT: mul a1, a1, a3
286 ; RV32-NEXT: add a1, sp, a1
287 ; RV32-NEXT: addi a1, a1, 16
288 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
289 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
290 ; RV32-NEXT: vmv.v.v v24, v12
291 ; RV32-NEXT: lui a1, %hi(.LCPI6_2)
292 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
293 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
294 ; RV32-NEXT: lui a3, %hi(.LCPI6_3)
295 ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
296 ; RV32-NEXT: vle16.v v16, (a1)
297 ; RV32-NEXT: vle16.v v8, (a3)
298 ; RV32-NEXT: csrr a1, vlenb
299 ; RV32-NEXT: slli a1, a1, 2
300 ; RV32-NEXT: add a1, sp, a1
301 ; RV32-NEXT: addi a1, a1, 16
302 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
303 ; RV32-NEXT: csrr a1, vlenb
304 ; RV32-NEXT: li a3, 37
305 ; RV32-NEXT: mul a1, a1, a3
306 ; RV32-NEXT: add a1, sp, a1
307 ; RV32-NEXT: addi a1, a1, 16
308 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
309 ; RV32-NEXT: vrgatherei16.vv v8, v0, v16
310 ; RV32-NEXT: csrr a1, vlenb
311 ; RV32-NEXT: slli a3, a1, 4
312 ; RV32-NEXT: add a1, a3, a1
313 ; RV32-NEXT: add a1, sp, a1
314 ; RV32-NEXT: addi a1, a1, 16
315 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
316 ; RV32-NEXT: csrr a1, vlenb
317 ; RV32-NEXT: li a3, 53
318 ; RV32-NEXT: mul a1, a1, a3
319 ; RV32-NEXT: add a1, sp, a1
320 ; RV32-NEXT: addi a1, a1, 16
321 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
322 ; RV32-NEXT: csrr a1, vlenb
323 ; RV32-NEXT: slli a1, a1, 2
324 ; RV32-NEXT: add a1, sp, a1
325 ; RV32-NEXT: addi a1, a1, 16
326 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
327 ; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
328 ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
329 ; RV32-NEXT: vmv.v.v v24, v8
330 ; RV32-NEXT: csrr a1, vlenb
331 ; RV32-NEXT: slli a3, a1, 4
332 ; RV32-NEXT: add a1, a3, a1
333 ; RV32-NEXT: add a1, sp, a1
334 ; RV32-NEXT: addi a1, a1, 16
335 ; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill
336 ; RV32-NEXT: lui a1, %hi(.LCPI6_4)
337 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4)
338 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
339 ; RV32-NEXT: vle16.v v8, (a1)
340 ; RV32-NEXT: csrr a1, vlenb
341 ; RV32-NEXT: li a3, 29
342 ; RV32-NEXT: mul a1, a1, a3
343 ; RV32-NEXT: add a1, sp, a1
344 ; RV32-NEXT: addi a1, a1, 16
345 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
346 ; RV32-NEXT: vrgatherei16.vv v4, v16, v8
347 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
348 ; RV32-NEXT: vadd.vi v8, v30, -6
349 ; RV32-NEXT: csrr a1, vlenb
350 ; RV32-NEXT: slli a1, a1, 2
351 ; RV32-NEXT: add a1, sp, a1
352 ; RV32-NEXT: addi a1, a1, 16
353 ; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill
354 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
355 ; RV32-NEXT: vmv1r.v v0, v28
356 ; RV32-NEXT: vmv1r.v v2, v28
357 ; RV32-NEXT: csrr a1, vlenb
358 ; RV32-NEXT: li a3, 45
359 ; RV32-NEXT: mul a1, a1, a3
360 ; RV32-NEXT: add a1, sp, a1
361 ; RV32-NEXT: addi a1, a1, 16
362 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
363 ; RV32-NEXT: vrgatherei16.vv v4, v16, v8, v0.t
364 ; RV32-NEXT: lui a1, %hi(.LCPI6_5)
365 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
366 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
367 ; RV32-NEXT: lui a3, %hi(.LCPI6_6)
368 ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
369 ; RV32-NEXT: vle16.v v20, (a1)
370 ; RV32-NEXT: vle16.v v8, (a3)
371 ; RV32-NEXT: csrr a1, vlenb
372 ; RV32-NEXT: slli a3, a1, 3
373 ; RV32-NEXT: add a1, a3, a1
374 ; RV32-NEXT: add a1, sp, a1
375 ; RV32-NEXT: addi a1, a1, 16
376 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
377 ; RV32-NEXT: li a1, 960
378 ; RV32-NEXT: vmv.s.x v1, a1
379 ; RV32-NEXT: csrr a1, vlenb
380 ; RV32-NEXT: li a3, 37
381 ; RV32-NEXT: mul a1, a1, a3
382 ; RV32-NEXT: add a1, sp, a1
383 ; RV32-NEXT: addi a1, a1, 16
384 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
385 ; RV32-NEXT: vrgatherei16.vv v24, v8, v20
386 ; RV32-NEXT: vmv1r.v v0, v1
387 ; RV32-NEXT: csrr a1, vlenb
388 ; RV32-NEXT: li a3, 53
389 ; RV32-NEXT: mul a1, a1, a3
390 ; RV32-NEXT: add a1, sp, a1
391 ; RV32-NEXT: addi a1, a1, 16
392 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
393 ; RV32-NEXT: csrr a1, vlenb
394 ; RV32-NEXT: slli a3, a1, 3
395 ; RV32-NEXT: add a1, a3, a1
396 ; RV32-NEXT: add a1, sp, a1
397 ; RV32-NEXT: addi a1, a1, 16
398 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
399 ; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t
400 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
401 ; RV32-NEXT: vmv.v.v v4, v24
402 ; RV32-NEXT: csrr a1, vlenb
403 ; RV32-NEXT: slli a3, a1, 3
404 ; RV32-NEXT: add a1, a3, a1
405 ; RV32-NEXT: add a1, sp, a1
406 ; RV32-NEXT: addi a1, a1, 16
407 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
408 ; RV32-NEXT: lui a1, %hi(.LCPI6_7)
409 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
410 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
411 ; RV32-NEXT: vle16.v v8, (a1)
412 ; RV32-NEXT: csrr a1, vlenb
413 ; RV32-NEXT: li a3, 29
414 ; RV32-NEXT: mul a1, a1, a3
415 ; RV32-NEXT: add a1, sp, a1
416 ; RV32-NEXT: addi a1, a1, 16
417 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
418 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8
419 ; RV32-NEXT: vmv1r.v v0, v2
420 ; RV32-NEXT: csrr a1, vlenb
421 ; RV32-NEXT: li a3, 13
422 ; RV32-NEXT: mul a1, a1, a3
423 ; RV32-NEXT: add a1, sp, a1
424 ; RV32-NEXT: addi a1, a1, 16
425 ; RV32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload
426 ; RV32-NEXT: csrr a1, vlenb
427 ; RV32-NEXT: li a3, 45
428 ; RV32-NEXT: mul a1, a1, a3
429 ; RV32-NEXT: add a1, sp, a1
430 ; RV32-NEXT: addi a1, a1, 16
431 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
432 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
433 ; RV32-NEXT: vmv.v.v v4, v12
434 ; RV32-NEXT: lui a1, %hi(.LCPI6_8)
435 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8)
436 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
437 ; RV32-NEXT: lui a3, %hi(.LCPI6_9)
438 ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9)
439 ; RV32-NEXT: vle16.v v16, (a1)
440 ; RV32-NEXT: vle16.v v20, (a3)
441 ; RV32-NEXT: csrr a1, vlenb
442 ; RV32-NEXT: li a3, 37
443 ; RV32-NEXT: mul a1, a1, a3
444 ; RV32-NEXT: add a1, sp, a1
445 ; RV32-NEXT: addi a1, a1, 16
446 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
447 ; RV32-NEXT: vrgatherei16.vv v8, v24, v16
448 ; RV32-NEXT: vmv1r.v v0, v1
449 ; RV32-NEXT: csrr a1, vlenb
450 ; RV32-NEXT: li a3, 53
451 ; RV32-NEXT: mul a1, a1, a3
452 ; RV32-NEXT: add a1, sp, a1
453 ; RV32-NEXT: addi a1, a1, 16
454 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
455 ; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t
456 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
457 ; RV32-NEXT: vmv.v.v v4, v8
458 ; RV32-NEXT: csrr a1, vlenb
459 ; RV32-NEXT: li a3, 13
460 ; RV32-NEXT: mul a1, a1, a3
461 ; RV32-NEXT: add a1, sp, a1
462 ; RV32-NEXT: addi a1, a1, 16
463 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
464 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
465 ; RV32-NEXT: lui a1, %hi(.LCPI6_10)
466 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10)
467 ; RV32-NEXT: vle16.v v8, (a1)
468 ; RV32-NEXT: lui a1, 15
469 ; RV32-NEXT: vmv.s.x v0, a1
470 ; RV32-NEXT: csrr a1, vlenb
471 ; RV32-NEXT: slli a1, a1, 3
472 ; RV32-NEXT: add a1, sp, a1
473 ; RV32-NEXT: addi a1, a1, 16
474 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
475 ; RV32-NEXT: csrr a1, vlenb
476 ; RV32-NEXT: li a3, 29
477 ; RV32-NEXT: mul a1, a1, a3
478 ; RV32-NEXT: add a1, sp, a1
479 ; RV32-NEXT: addi a1, a1, 16
480 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
481 ; RV32-NEXT: csrr a1, vlenb
482 ; RV32-NEXT: slli a1, a1, 2
483 ; RV32-NEXT: add a1, sp, a1
484 ; RV32-NEXT: addi a1, a1, 16
485 ; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload
486 ; RV32-NEXT: vrgatherei16.vv v20, v16, v10
487 ; RV32-NEXT: csrr a1, vlenb
488 ; RV32-NEXT: li a3, 45
489 ; RV32-NEXT: mul a1, a1, a3
490 ; RV32-NEXT: add a1, sp, a1
491 ; RV32-NEXT: addi a1, a1, 16
492 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
493 ; RV32-NEXT: vrgatherei16.vv v20, v24, v8, v0.t
494 ; RV32-NEXT: lui a1, %hi(.LCPI6_11)
495 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
496 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
497 ; RV32-NEXT: lui a3, %hi(.LCPI6_12)
498 ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
499 ; RV32-NEXT: vle16.v v24, (a1)
500 ; RV32-NEXT: vle16.v v16, (a3)
501 ; RV32-NEXT: li a1, 1008
502 ; RV32-NEXT: vmv.s.x v28, a1
503 ; RV32-NEXT: csrr a1, vlenb
504 ; RV32-NEXT: li a3, 29
505 ; RV32-NEXT: mul a1, a1, a3
506 ; RV32-NEXT: add a1, sp, a1
507 ; RV32-NEXT: addi a1, a1, 16
508 ; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill
509 ; RV32-NEXT: csrr a1, vlenb
510 ; RV32-NEXT: li a3, 37
511 ; RV32-NEXT: mul a1, a1, a3
512 ; RV32-NEXT: add a1, sp, a1
513 ; RV32-NEXT: addi a1, a1, 16
514 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
515 ; RV32-NEXT: vrgatherei16.vv v8, v0, v24
516 ; RV32-NEXT: vmv1r.v v0, v28
517 ; RV32-NEXT: csrr a1, vlenb
518 ; RV32-NEXT: li a3, 53
519 ; RV32-NEXT: mul a1, a1, a3
520 ; RV32-NEXT: add a1, sp, a1
521 ; RV32-NEXT: addi a1, a1, 16
522 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
523 ; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
524 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
525 ; RV32-NEXT: vmv.v.v v20, v8
526 ; RV32-NEXT: lui a1, %hi(.LCPI6_13)
527 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13)
528 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
529 ; RV32-NEXT: vle16.v v8, (a1)
530 ; RV32-NEXT: csrr a1, vlenb
531 ; RV32-NEXT: slli a1, a1, 3
532 ; RV32-NEXT: add a1, sp, a1
533 ; RV32-NEXT: addi a1, a1, 16
534 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
535 ; RV32-NEXT: csrr a1, vlenb
536 ; RV32-NEXT: li a3, 21
537 ; RV32-NEXT: mul a1, a1, a3
538 ; RV32-NEXT: add a1, sp, a1
539 ; RV32-NEXT: addi a1, a1, 16
540 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
541 ; RV32-NEXT: csrr a1, vlenb
542 ; RV32-NEXT: li a3, 45
543 ; RV32-NEXT: mul a1, a1, a3
544 ; RV32-NEXT: add a1, sp, a1
545 ; RV32-NEXT: addi a1, a1, 16
546 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
547 ; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
548 ; RV32-NEXT: lui a1, %hi(.LCPI6_14)
549 ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14)
550 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
551 ; RV32-NEXT: lui a2, %hi(.LCPI6_15)
552 ; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15)
553 ; RV32-NEXT: vle16.v v24, (a1)
554 ; RV32-NEXT: vle16.v v8, (a2)
555 ; RV32-NEXT: csrr a1, vlenb
556 ; RV32-NEXT: li a2, 45
557 ; RV32-NEXT: mul a1, a1, a2
558 ; RV32-NEXT: add a1, sp, a1
559 ; RV32-NEXT: addi a1, a1, 16
560 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
561 ; RV32-NEXT: csrr a1, vlenb
562 ; RV32-NEXT: li a2, 37
563 ; RV32-NEXT: mul a1, a1, a2
564 ; RV32-NEXT: add a1, sp, a1
565 ; RV32-NEXT: addi a1, a1, 16
566 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
567 ; RV32-NEXT: vrgatherei16.vv v8, v0, v24
568 ; RV32-NEXT: csrr a1, vlenb
569 ; RV32-NEXT: li a2, 29
570 ; RV32-NEXT: mul a1, a1, a2
571 ; RV32-NEXT: add a1, sp, a1
572 ; RV32-NEXT: addi a1, a1, 16
573 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
574 ; RV32-NEXT: csrr a1, vlenb
575 ; RV32-NEXT: li a2, 53
576 ; RV32-NEXT: mul a1, a1, a2
577 ; RV32-NEXT: add a1, sp, a1
578 ; RV32-NEXT: addi a1, a1, 16
579 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
580 ; RV32-NEXT: csrr a1, vlenb
581 ; RV32-NEXT: li a2, 45
582 ; RV32-NEXT: mul a1, a1, a2
583 ; RV32-NEXT: add a1, sp, a1
584 ; RV32-NEXT: addi a1, a1, 16
585 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
586 ; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
587 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
588 ; RV32-NEXT: vmv.v.v v16, v8
589 ; RV32-NEXT: addi a1, a0, 320
590 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
591 ; RV32-NEXT: vse32.v v16, (a1)
592 ; RV32-NEXT: addi a1, a0, 256
593 ; RV32-NEXT: vse32.v v20, (a1)
594 ; RV32-NEXT: addi a1, a0, 192
595 ; RV32-NEXT: csrr a2, vlenb
596 ; RV32-NEXT: li a3, 13
597 ; RV32-NEXT: mul a2, a2, a3
598 ; RV32-NEXT: add a2, sp, a2
599 ; RV32-NEXT: addi a2, a2, 16
600 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
601 ; RV32-NEXT: vse32.v v8, (a1)
602 ; RV32-NEXT: addi a1, a0, 128
603 ; RV32-NEXT: csrr a2, vlenb
604 ; RV32-NEXT: slli a3, a2, 3
605 ; RV32-NEXT: add a2, a3, a2
606 ; RV32-NEXT: add a2, sp, a2
607 ; RV32-NEXT: addi a2, a2, 16
608 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
609 ; RV32-NEXT: vse32.v v8, (a1)
610 ; RV32-NEXT: addi a1, a0, 64
611 ; RV32-NEXT: csrr a2, vlenb
612 ; RV32-NEXT: slli a3, a2, 4
613 ; RV32-NEXT: add a2, a3, a2
614 ; RV32-NEXT: add a2, sp, a2
615 ; RV32-NEXT: addi a2, a2, 16
616 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
617 ; RV32-NEXT: vse32.v v8, (a1)
618 ; RV32-NEXT: csrr a1, vlenb
619 ; RV32-NEXT: li a2, 25
620 ; RV32-NEXT: mul a1, a1, a2
621 ; RV32-NEXT: add a1, sp, a1
622 ; RV32-NEXT: addi a1, a1, 16
623 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
624 ; RV32-NEXT: vse32.v v8, (a0)
625 ; RV32-NEXT: csrr a0, vlenb
626 ; RV32-NEXT: li a1, 62
627 ; RV32-NEXT: mul a0, a0, a1
628 ; RV32-NEXT: add sp, sp, a0
629 ; RV32-NEXT: addi sp, sp, 16
632 ; RV64-LABEL: load_factor6_too_big:
634 ; RV64-NEXT: addi sp, sp, -16
635 ; RV64-NEXT: .cfi_def_cfa_offset 16
636 ; RV64-NEXT: csrr a2, vlenb
637 ; RV64-NEXT: li a3, 52
638 ; RV64-NEXT: mul a2, a2, a3
639 ; RV64-NEXT: sub sp, sp, a2
640 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x34, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 52 * vlenb
641 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
642 ; RV64-NEXT: addi a2, a1, 256
643 ; RV64-NEXT: vle64.v v16, (a2)
644 ; RV64-NEXT: csrr a2, vlenb
645 ; RV64-NEXT: li a3, 27
646 ; RV64-NEXT: mul a2, a2, a3
647 ; RV64-NEXT: add a2, sp, a2
648 ; RV64-NEXT: addi a2, a2, 16
649 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
650 ; RV64-NEXT: addi a2, a1, 128
651 ; RV64-NEXT: vle64.v v8, (a2)
652 ; RV64-NEXT: csrr a2, vlenb
653 ; RV64-NEXT: li a3, 35
654 ; RV64-NEXT: mul a2, a2, a3
655 ; RV64-NEXT: add a2, sp, a2
656 ; RV64-NEXT: addi a2, a2, 16
657 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
658 ; RV64-NEXT: vle64.v v8, (a1)
659 ; RV64-NEXT: csrr a1, vlenb
660 ; RV64-NEXT: li a2, 43
661 ; RV64-NEXT: mul a1, a1, a2
662 ; RV64-NEXT: add a1, sp, a1
663 ; RV64-NEXT: addi a1, a1, 16
664 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
665 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
666 ; RV64-NEXT: vrgather.vi v8, v16, 4
667 ; RV64-NEXT: li a1, 128
668 ; RV64-NEXT: vmv.s.x v4, a1
669 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma
670 ; RV64-NEXT: vslidedown.vi v24, v16, 8
671 ; RV64-NEXT: csrr a1, vlenb
672 ; RV64-NEXT: li a2, 19
673 ; RV64-NEXT: mul a1, a1, a2
674 ; RV64-NEXT: add a1, sp, a1
675 ; RV64-NEXT: addi a1, a1, 16
676 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
677 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
678 ; RV64-NEXT: vmv1r.v v0, v4
679 ; RV64-NEXT: csrr a1, vlenb
680 ; RV64-NEXT: slli a2, a1, 1
681 ; RV64-NEXT: add a1, a2, a1
682 ; RV64-NEXT: add a1, sp, a1
683 ; RV64-NEXT: addi a1, a1, 16
684 ; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill
685 ; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t
686 ; RV64-NEXT: vmv.v.v v20, v8
687 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
688 ; RV64-NEXT: li a1, 6
689 ; RV64-NEXT: vid.v v8
690 ; RV64-NEXT: vmul.vx v2, v8, a1
691 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
692 ; RV64-NEXT: csrr a1, vlenb
693 ; RV64-NEXT: li a2, 43
694 ; RV64-NEXT: mul a1, a1, a2
695 ; RV64-NEXT: add a1, sp, a1
696 ; RV64-NEXT: addi a1, a1, 16
697 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
698 ; RV64-NEXT: vrgatherei16.vv v8, v24, v2
699 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
700 ; RV64-NEXT: li a1, 56
701 ; RV64-NEXT: vmv.s.x v1, a1
702 ; RV64-NEXT: vadd.vi v16, v2, -16
703 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
704 ; RV64-NEXT: vmv1r.v v0, v1
705 ; RV64-NEXT: csrr a1, vlenb
706 ; RV64-NEXT: li a2, 35
707 ; RV64-NEXT: mul a1, a1, a2
708 ; RV64-NEXT: add a1, sp, a1
709 ; RV64-NEXT: addi a1, a1, 16
710 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
711 ; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
712 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
713 ; RV64-NEXT: vmv.v.v v20, v8
714 ; RV64-NEXT: csrr a1, vlenb
715 ; RV64-NEXT: slli a2, a1, 4
716 ; RV64-NEXT: sub a1, a2, a1
717 ; RV64-NEXT: add a1, sp, a1
718 ; RV64-NEXT: addi a1, a1, 16
719 ; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
720 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
721 ; RV64-NEXT: csrr a1, vlenb
722 ; RV64-NEXT: li a2, 27
723 ; RV64-NEXT: mul a1, a1, a2
724 ; RV64-NEXT: add a1, sp, a1
725 ; RV64-NEXT: addi a1, a1, 16
726 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
727 ; RV64-NEXT: vrgather.vi v8, v16, 5
728 ; RV64-NEXT: vmv1r.v v0, v4
729 ; RV64-NEXT: csrr a1, vlenb
730 ; RV64-NEXT: li a2, 19
731 ; RV64-NEXT: mul a1, a1, a2
732 ; RV64-NEXT: add a1, sp, a1
733 ; RV64-NEXT: addi a1, a1, 16
734 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
735 ; RV64-NEXT: vrgather.vi v8, v16, 3, v0.t
736 ; RV64-NEXT: vmv.v.v v4, v8
737 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
738 ; RV64-NEXT: addi a1, sp, 16
739 ; RV64-NEXT: vs2r.v v2, (a1) # Unknown-size Folded Spill
740 ; RV64-NEXT: vadd.vi v24, v2, 1
741 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
742 ; RV64-NEXT: csrr a1, vlenb
743 ; RV64-NEXT: li a2, 43
744 ; RV64-NEXT: mul a1, a1, a2
745 ; RV64-NEXT: add a1, sp, a1
746 ; RV64-NEXT: addi a1, a1, 16
747 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
748 ; RV64-NEXT: vrgatherei16.vv v8, v16, v24
749 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
750 ; RV64-NEXT: vadd.vi v24, v2, -15
751 ; RV64-NEXT: csrr a1, vlenb
752 ; RV64-NEXT: li a2, 11
753 ; RV64-NEXT: mul a1, a1, a2
754 ; RV64-NEXT: add a1, sp, a1
755 ; RV64-NEXT: addi a1, a1, 16
756 ; RV64-NEXT: vs2r.v v24, (a1) # Unknown-size Folded Spill
757 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
758 ; RV64-NEXT: vmv1r.v v0, v1
759 ; RV64-NEXT: csrr a1, vlenb
760 ; RV64-NEXT: li a2, 35
761 ; RV64-NEXT: mul a1, a1, a2
762 ; RV64-NEXT: add a1, sp, a1
763 ; RV64-NEXT: addi a1, a1, 16
764 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
765 ; RV64-NEXT: csrr a1, vlenb
766 ; RV64-NEXT: li a2, 11
767 ; RV64-NEXT: mul a1, a1, a2
768 ; RV64-NEXT: add a1, sp, a1
769 ; RV64-NEXT: addi a1, a1, 16
770 ; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
771 ; RV64-NEXT: vrgatherei16.vv v8, v24, v2, v0.t
772 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
773 ; RV64-NEXT: vmv.v.v v4, v8
774 ; RV64-NEXT: csrr a1, vlenb
775 ; RV64-NEXT: li a2, 11
776 ; RV64-NEXT: mul a1, a1, a2
777 ; RV64-NEXT: add a1, sp, a1
778 ; RV64-NEXT: addi a1, a1, 16
779 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
780 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
781 ; RV64-NEXT: addi a1, sp, 16
782 ; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
783 ; RV64-NEXT: vadd.vi v4, v2, 2
784 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
785 ; RV64-NEXT: vrgatherei16.vv v8, v16, v4
786 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
787 ; RV64-NEXT: li a1, 24
788 ; RV64-NEXT: vmv.s.x v4, a1
789 ; RV64-NEXT: vadd.vi v16, v2, -14
790 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
791 ; RV64-NEXT: vmv1r.v v0, v4
792 ; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
793 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
794 ; RV64-NEXT: vmv.v.i v12, 6
795 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
796 ; RV64-NEXT: csrr a1, vlenb
797 ; RV64-NEXT: li a2, 27
798 ; RV64-NEXT: mul a1, a1, a2
799 ; RV64-NEXT: add a1, sp, a1
800 ; RV64-NEXT: addi a1, a1, 16
801 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
802 ; RV64-NEXT: vmv4r.v v24, v16
803 ; RV64-NEXT: vrgatherei16.vv v16, v24, v12
804 ; RV64-NEXT: csrr a1, vlenb
805 ; RV64-NEXT: slli a2, a1, 1
806 ; RV64-NEXT: add a1, a2, a1
807 ; RV64-NEXT: add a1, sp, a1
808 ; RV64-NEXT: addi a1, a1, 16
809 ; RV64-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload
810 ; RV64-NEXT: vmv1r.v v0, v1
811 ; RV64-NEXT: csrr a1, vlenb
812 ; RV64-NEXT: li a2, 19
813 ; RV64-NEXT: mul a1, a1, a2
814 ; RV64-NEXT: add a1, sp, a1
815 ; RV64-NEXT: addi a1, a1, 16
816 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
817 ; RV64-NEXT: vrgather.vi v16, v24, 4, v0.t
818 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
819 ; RV64-NEXT: vmv.v.v v16, v8
820 ; RV64-NEXT: csrr a1, vlenb
821 ; RV64-NEXT: slli a2, a1, 3
822 ; RV64-NEXT: sub a1, a2, a1
823 ; RV64-NEXT: add a1, sp, a1
824 ; RV64-NEXT: addi a1, a1, 16
825 ; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
826 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
827 ; RV64-NEXT: vadd.vi v28, v2, 3
828 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
829 ; RV64-NEXT: csrr a1, vlenb
830 ; RV64-NEXT: li a2, 43
831 ; RV64-NEXT: mul a1, a1, a2
832 ; RV64-NEXT: add a1, sp, a1
833 ; RV64-NEXT: addi a1, a1, 16
834 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
835 ; RV64-NEXT: vrgatherei16.vv v8, v16, v28
836 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
837 ; RV64-NEXT: vadd.vi v16, v2, -13
838 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
839 ; RV64-NEXT: vmv1r.v v0, v4
840 ; RV64-NEXT: csrr a1, vlenb
841 ; RV64-NEXT: li a2, 35
842 ; RV64-NEXT: mul a1, a1, a2
843 ; RV64-NEXT: add a1, sp, a1
844 ; RV64-NEXT: addi a1, a1, 16
845 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
846 ; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
847 ; RV64-NEXT: lui a1, 16
848 ; RV64-NEXT: addi a1, a1, 7
849 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
850 ; RV64-NEXT: vmv.v.x v12, a1
851 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
852 ; RV64-NEXT: csrr a1, vlenb
853 ; RV64-NEXT: li a2, 27
854 ; RV64-NEXT: mul a1, a1, a2
855 ; RV64-NEXT: add a1, sp, a1
856 ; RV64-NEXT: addi a1, a1, 16
857 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
858 ; RV64-NEXT: vrgatherei16.vv v24, v16, v12
859 ; RV64-NEXT: vmv1r.v v0, v1
860 ; RV64-NEXT: csrr a1, vlenb
861 ; RV64-NEXT: li a2, 19
862 ; RV64-NEXT: mul a1, a1, a2
863 ; RV64-NEXT: add a1, sp, a1
864 ; RV64-NEXT: addi a1, a1, 16
865 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
866 ; RV64-NEXT: vrgather.vi v24, v16, 5, v0.t
867 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
868 ; RV64-NEXT: vmv.v.v v24, v8
869 ; RV64-NEXT: csrr a1, vlenb
870 ; RV64-NEXT: slli a2, a1, 1
871 ; RV64-NEXT: add a1, a2, a1
872 ; RV64-NEXT: add a1, sp, a1
873 ; RV64-NEXT: addi a1, a1, 16
874 ; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill
875 ; RV64-NEXT: lui a1, 96
876 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
877 ; RV64-NEXT: vmv.v.x v8, a1
878 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
879 ; RV64-NEXT: li a1, 192
880 ; RV64-NEXT: vmv.s.x v0, a1
881 ; RV64-NEXT: csrr a1, vlenb
882 ; RV64-NEXT: slli a1, a1, 1
883 ; RV64-NEXT: add a1, sp, a1
884 ; RV64-NEXT: addi a1, a1, 16
885 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
886 ; RV64-NEXT: csrr a1, vlenb
887 ; RV64-NEXT: li a2, 27
888 ; RV64-NEXT: mul a1, a1, a2
889 ; RV64-NEXT: add a1, sp, a1
890 ; RV64-NEXT: addi a1, a1, 16
891 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
892 ; RV64-NEXT: vrgather.vi v4, v24, 2
893 ; RV64-NEXT: vrgatherei16.vv v4, v16, v8, v0.t
894 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
895 ; RV64-NEXT: vadd.vi v26, v2, 4
896 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
897 ; RV64-NEXT: csrr a1, vlenb
898 ; RV64-NEXT: li a2, 43
899 ; RV64-NEXT: mul a1, a1, a2
900 ; RV64-NEXT: add a1, sp, a1
901 ; RV64-NEXT: addi a1, a1, 16
902 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
903 ; RV64-NEXT: vrgatherei16.vv v8, v16, v26
904 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
905 ; RV64-NEXT: li a1, 28
906 ; RV64-NEXT: vmv.s.x v1, a1
907 ; RV64-NEXT: vadd.vi v16, v2, -12
908 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
909 ; RV64-NEXT: vmv1r.v v0, v1
910 ; RV64-NEXT: csrr a1, vlenb
911 ; RV64-NEXT: li a2, 35
912 ; RV64-NEXT: mul a1, a1, a2
913 ; RV64-NEXT: add a1, sp, a1
914 ; RV64-NEXT: addi a1, a1, 16
915 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
916 ; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
917 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
918 ; RV64-NEXT: vmv.v.v v4, v8
919 ; RV64-NEXT: lui a1, 112
920 ; RV64-NEXT: addi a1, a1, 1
921 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
922 ; RV64-NEXT: vmv.v.x v12, a1
923 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
924 ; RV64-NEXT: csrr a1, vlenb
925 ; RV64-NEXT: li a2, 27
926 ; RV64-NEXT: mul a1, a1, a2
927 ; RV64-NEXT: add a1, sp, a1
928 ; RV64-NEXT: addi a1, a1, 16
929 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
930 ; RV64-NEXT: vrgather.vi v8, v16, 3
931 ; RV64-NEXT: csrr a1, vlenb
932 ; RV64-NEXT: slli a1, a1, 1
933 ; RV64-NEXT: add a1, sp, a1
934 ; RV64-NEXT: addi a1, a1, 16
935 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
936 ; RV64-NEXT: csrr a1, vlenb
937 ; RV64-NEXT: li a2, 19
938 ; RV64-NEXT: mul a1, a1, a2
939 ; RV64-NEXT: add a1, sp, a1
940 ; RV64-NEXT: addi a1, a1, 16
941 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
942 ; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
943 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
944 ; RV64-NEXT: vadd.vi v12, v2, 5
945 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
946 ; RV64-NEXT: csrr a1, vlenb
947 ; RV64-NEXT: li a2, 43
948 ; RV64-NEXT: mul a1, a1, a2
949 ; RV64-NEXT: add a1, sp, a1
950 ; RV64-NEXT: addi a1, a1, 16
951 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
952 ; RV64-NEXT: vrgatherei16.vv v16, v24, v12
953 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
954 ; RV64-NEXT: vadd.vi v12, v2, -11
955 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
956 ; RV64-NEXT: vmv1r.v v0, v1
957 ; RV64-NEXT: csrr a1, vlenb
958 ; RV64-NEXT: li a2, 35
959 ; RV64-NEXT: mul a1, a1, a2
960 ; RV64-NEXT: add a1, sp, a1
961 ; RV64-NEXT: addi a1, a1, 16
962 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
963 ; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t
964 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
965 ; RV64-NEXT: vmv.v.v v8, v16
966 ; RV64-NEXT: addi a1, a0, 320
967 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
968 ; RV64-NEXT: vse64.v v8, (a1)
969 ; RV64-NEXT: addi a1, a0, 256
970 ; RV64-NEXT: vse64.v v4, (a1)
971 ; RV64-NEXT: addi a1, a0, 192
972 ; RV64-NEXT: csrr a2, vlenb
973 ; RV64-NEXT: slli a3, a2, 1
974 ; RV64-NEXT: add a2, a3, a2
975 ; RV64-NEXT: add a2, sp, a2
976 ; RV64-NEXT: addi a2, a2, 16
977 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
978 ; RV64-NEXT: vse64.v v8, (a1)
979 ; RV64-NEXT: addi a1, a0, 128
980 ; RV64-NEXT: csrr a2, vlenb
981 ; RV64-NEXT: slli a3, a2, 3
982 ; RV64-NEXT: sub a2, a3, a2
983 ; RV64-NEXT: add a2, sp, a2
984 ; RV64-NEXT: addi a2, a2, 16
985 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
986 ; RV64-NEXT: vse64.v v8, (a1)
987 ; RV64-NEXT: addi a1, a0, 64
988 ; RV64-NEXT: csrr a2, vlenb
989 ; RV64-NEXT: li a3, 11
990 ; RV64-NEXT: mul a2, a2, a3
991 ; RV64-NEXT: add a2, sp, a2
992 ; RV64-NEXT: addi a2, a2, 16
993 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
994 ; RV64-NEXT: vse64.v v8, (a1)
995 ; RV64-NEXT: csrr a1, vlenb
996 ; RV64-NEXT: slli a2, a1, 4
997 ; RV64-NEXT: sub a1, a2, a1
998 ; RV64-NEXT: add a1, sp, a1
999 ; RV64-NEXT: addi a1, a1, 16
1000 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
1001 ; RV64-NEXT: vse64.v v8, (a0)
1002 ; RV64-NEXT: csrr a0, vlenb
1003 ; RV64-NEXT: li a1, 52
1004 ; RV64-NEXT: mul a0, a0, a1
1005 ; RV64-NEXT: add sp, sp, a0
1006 ; RV64-NEXT: addi sp, sp, 16
1008 %interleaved.vec = load <48 x i64>, ptr %ptr
1009 %v0 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 18, i32 24, i32 30, i32 36, i32 42>
1010 %v1 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 19, i32 25, i32 31, i32 37, i32 43>
1011 %v2 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 2, i32 8, i32 14, i32 20, i32 26, i32 32, i32 38, i32 44>
1012 %v3 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 3, i32 9, i32 15, i32 21, i32 27, i32 33, i32 39, i32 45>
1013 %v4 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 4, i32 10, i32 16, i32 22, i32 28, i32 34, i32 40, i32 46>
1014 %v5 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 5, i32 11, i32 17, i32 23, i32 29, i32 35, i32 41, i32 47>
1015 %res0 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} undef, <8 x i64> %v0, 0
1016 %res1 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res0, <8 x i64> %v1, 1
1017 %res2 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res1, <8 x i64> %v2, 2
1018 %res3 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res2, <8 x i64> %v3, 3
1019 %res4 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res3, <8 x i64> %v4, 4
1020 %res5 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res4, <8 x i64> %v5, 5
1021 ret {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res5
1025 ; ------------------------------------------------------------------------------
1027 ; ------------------------------------------------------------------------------
1029 define void @store_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
1030 ; CHECK-LABEL: store_factor2:
1032 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1033 ; CHECK-NEXT: vsseg2e32.v v8, (a0)
1035 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
1036 store <8 x i32> %interleaved.vec, ptr %ptr
1040 define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
1041 ; CHECK-LABEL: store_factor3:
1043 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1044 ; CHECK-NEXT: vsseg3e32.v v8, (a0)
1046 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1047 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1048 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
1049 store <12 x i32> %interleaved.vec, ptr %ptr
1053 define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
1054 ; CHECK-LABEL: store_factor4:
1056 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1057 ; CHECK-NEXT: vsseg4e32.v v8, (a0)
1059 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1060 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1061 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
1062 store <16 x i32> %interleaved.vec, ptr %ptr
1066 define void @store_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
1067 ; CHECK-LABEL: store_factor5:
1069 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1070 ; CHECK-NEXT: vsseg5e32.v v8, (a0)
1072 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1073 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1074 %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1075 %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1076 %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19>
1077 store <20 x i32> %interleaved.vec, ptr %ptr
1081 define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) {
1082 ; CHECK-LABEL: store_factor6:
1084 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1085 ; CHECK-NEXT: vsseg6e16.v v8, (a0)
1087 %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1088 %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1089 %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1090 %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1091 %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
1092 store <12 x i16> %interleaved.vec, ptr %ptr