1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
3 ; RUN: -riscv-use-rematerializable-movimm=false | FileCheck %s --check-prefix=NOREMAT
4 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
5 ; RUN: --riscv-use-rematerializable-movimm=true | FileCheck %s --check-prefix=REMAT
7 define void @test(ptr %0, ptr %1, i64 %2) {
10 ; NOREMAT-NEXT: addi sp, sp, -400
11 ; NOREMAT-NEXT: .cfi_def_cfa_offset 400
12 ; NOREMAT-NEXT: sd ra, 392(sp) # 8-byte Folded Spill
13 ; NOREMAT-NEXT: sd s0, 384(sp) # 8-byte Folded Spill
14 ; NOREMAT-NEXT: sd s1, 376(sp) # 8-byte Folded Spill
15 ; NOREMAT-NEXT: sd s2, 368(sp) # 8-byte Folded Spill
16 ; NOREMAT-NEXT: sd s3, 360(sp) # 8-byte Folded Spill
17 ; NOREMAT-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
18 ; NOREMAT-NEXT: sd s5, 344(sp) # 8-byte Folded Spill
19 ; NOREMAT-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
20 ; NOREMAT-NEXT: sd s7, 328(sp) # 8-byte Folded Spill
21 ; NOREMAT-NEXT: sd s8, 320(sp) # 8-byte Folded Spill
22 ; NOREMAT-NEXT: sd s9, 312(sp) # 8-byte Folded Spill
23 ; NOREMAT-NEXT: sd s10, 304(sp) # 8-byte Folded Spill
24 ; NOREMAT-NEXT: sd s11, 296(sp) # 8-byte Folded Spill
25 ; NOREMAT-NEXT: .cfi_offset ra, -8
26 ; NOREMAT-NEXT: .cfi_offset s0, -16
27 ; NOREMAT-NEXT: .cfi_offset s1, -24
28 ; NOREMAT-NEXT: .cfi_offset s2, -32
29 ; NOREMAT-NEXT: .cfi_offset s3, -40
30 ; NOREMAT-NEXT: .cfi_offset s4, -48
31 ; NOREMAT-NEXT: .cfi_offset s5, -56
32 ; NOREMAT-NEXT: .cfi_offset s6, -64
33 ; NOREMAT-NEXT: .cfi_offset s7, -72
34 ; NOREMAT-NEXT: .cfi_offset s8, -80
35 ; NOREMAT-NEXT: .cfi_offset s9, -88
36 ; NOREMAT-NEXT: .cfi_offset s10, -96
37 ; NOREMAT-NEXT: .cfi_offset s11, -104
38 ; NOREMAT-NEXT: csrr a2, vlenb
39 ; NOREMAT-NEXT: li a3, 6
40 ; NOREMAT-NEXT: mul a2, a2, a3
41 ; NOREMAT-NEXT: sub sp, sp, a2
42 ; NOREMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
43 ; NOREMAT-NEXT: li a2, 32
44 ; NOREMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
45 ; NOREMAT-NEXT: vle32.v v8, (a0)
46 ; NOREMAT-NEXT: addi a2, a0, 512
47 ; NOREMAT-NEXT: vle32.v v10, (a2)
48 ; NOREMAT-NEXT: addi a2, a0, 1024
49 ; NOREMAT-NEXT: vle32.v v12, (a2)
50 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10
51 ; NOREMAT-NEXT: vle32.v v8, (a2)
52 ; NOREMAT-NEXT: addi a2, a0, 1536
53 ; NOREMAT-NEXT: vle32.v v14, (a2)
54 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
55 ; NOREMAT-NEXT: vle32.v v10, (a2)
56 ; NOREMAT-NEXT: li a2, 1
57 ; NOREMAT-NEXT: slli a2, a2, 11
58 ; NOREMAT-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
59 ; NOREMAT-NEXT: add a2, a0, a2
60 ; NOREMAT-NEXT: vle32.v v12, (a2)
61 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
62 ; NOREMAT-NEXT: vle32.v v8, (a2)
63 ; NOREMAT-NEXT: li a5, 5
64 ; NOREMAT-NEXT: slli a2, a5, 9
65 ; NOREMAT-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
66 ; NOREMAT-NEXT: add a2, a0, a2
67 ; NOREMAT-NEXT: vle32.v v14, (a2)
68 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
69 ; NOREMAT-NEXT: vle32.v v10, (a2)
70 ; NOREMAT-NEXT: li a2, 3
71 ; NOREMAT-NEXT: slli a3, a2, 10
72 ; NOREMAT-NEXT: sd a3, 256(sp) # 8-byte Folded Spill
73 ; NOREMAT-NEXT: add a3, a0, a3
74 ; NOREMAT-NEXT: vle32.v v12, (a3)
75 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
76 ; NOREMAT-NEXT: vle32.v v8, (a3)
77 ; NOREMAT-NEXT: li a4, 7
78 ; NOREMAT-NEXT: slli a3, a4, 9
79 ; NOREMAT-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
80 ; NOREMAT-NEXT: add a3, a0, a3
81 ; NOREMAT-NEXT: vle32.v v14, (a3)
82 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
83 ; NOREMAT-NEXT: vle32.v v10, (a3)
84 ; NOREMAT-NEXT: lui a3, 1
85 ; NOREMAT-NEXT: add a3, a0, a3
86 ; NOREMAT-NEXT: vle32.v v12, (a3)
87 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
88 ; NOREMAT-NEXT: vle32.v v8, (a3)
89 ; NOREMAT-NEXT: li a3, 9
90 ; NOREMAT-NEXT: slli a6, a3, 9
91 ; NOREMAT-NEXT: sd a6, 240(sp) # 8-byte Folded Spill
92 ; NOREMAT-NEXT: add a6, a0, a6
93 ; NOREMAT-NEXT: vle32.v v14, (a6)
94 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
95 ; NOREMAT-NEXT: vle32.v v10, (a6)
96 ; NOREMAT-NEXT: slli a6, a5, 10
97 ; NOREMAT-NEXT: sd a6, 232(sp) # 8-byte Folded Spill
98 ; NOREMAT-NEXT: add a6, a0, a6
99 ; NOREMAT-NEXT: vle32.v v12, (a6)
100 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
101 ; NOREMAT-NEXT: vle32.v v8, (a6)
102 ; NOREMAT-NEXT: li s8, 11
103 ; NOREMAT-NEXT: slli a6, s8, 9
104 ; NOREMAT-NEXT: sd a6, 224(sp) # 8-byte Folded Spill
105 ; NOREMAT-NEXT: add a6, a0, a6
106 ; NOREMAT-NEXT: vle32.v v14, (a6)
107 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
108 ; NOREMAT-NEXT: vle32.v v10, (a6)
109 ; NOREMAT-NEXT: slli a2, a2, 11
110 ; NOREMAT-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
111 ; NOREMAT-NEXT: add a2, a0, a2
112 ; NOREMAT-NEXT: vle32.v v12, (a2)
113 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
114 ; NOREMAT-NEXT: vle32.v v8, (a2)
115 ; NOREMAT-NEXT: li s2, 13
116 ; NOREMAT-NEXT: slli a2, s2, 9
117 ; NOREMAT-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
118 ; NOREMAT-NEXT: add a2, a0, a2
119 ; NOREMAT-NEXT: vle32.v v14, (a2)
120 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
121 ; NOREMAT-NEXT: vle32.v v10, (a2)
122 ; NOREMAT-NEXT: slli a2, a4, 10
123 ; NOREMAT-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
124 ; NOREMAT-NEXT: add a2, a0, a2
125 ; NOREMAT-NEXT: vle32.v v12, (a2)
126 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
127 ; NOREMAT-NEXT: vle32.v v8, (a2)
128 ; NOREMAT-NEXT: li a2, 15
129 ; NOREMAT-NEXT: slli a6, a2, 9
130 ; NOREMAT-NEXT: sd a6, 192(sp) # 8-byte Folded Spill
131 ; NOREMAT-NEXT: add a6, a0, a6
132 ; NOREMAT-NEXT: vle32.v v26, (a6)
133 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
134 ; NOREMAT-NEXT: vle32.v v16, (a6)
135 ; NOREMAT-NEXT: lui a6, 2
136 ; NOREMAT-NEXT: add a6, a0, a6
137 ; NOREMAT-NEXT: vle32.v v28, (a6)
138 ; NOREMAT-NEXT: vle32.v v10, (a6)
139 ; NOREMAT-NEXT: li a6, 17
140 ; NOREMAT-NEXT: slli a6, a6, 9
141 ; NOREMAT-NEXT: sd a6, 184(sp) # 8-byte Folded Spill
142 ; NOREMAT-NEXT: li t0, 17
143 ; NOREMAT-NEXT: add a6, a0, a6
144 ; NOREMAT-NEXT: vle32.v v30, (a6)
145 ; NOREMAT-NEXT: vle32.v v18, (a6)
146 ; NOREMAT-NEXT: slli a6, a3, 10
147 ; NOREMAT-NEXT: sd a6, 176(sp) # 8-byte Folded Spill
148 ; NOREMAT-NEXT: add a6, a0, a6
149 ; NOREMAT-NEXT: vle32.v v6, (a6)
150 ; NOREMAT-NEXT: vle32.v v20, (a6)
151 ; NOREMAT-NEXT: li a6, 19
152 ; NOREMAT-NEXT: slli a6, a6, 9
153 ; NOREMAT-NEXT: sd a6, 168(sp) # 8-byte Folded Spill
154 ; NOREMAT-NEXT: li a7, 19
155 ; NOREMAT-NEXT: add a6, a0, a6
156 ; NOREMAT-NEXT: vle32.v v4, (a6)
157 ; NOREMAT-NEXT: vle32.v v22, (a6)
158 ; NOREMAT-NEXT: slli a5, a5, 11
159 ; NOREMAT-NEXT: sd a5, 160(sp) # 8-byte Folded Spill
160 ; NOREMAT-NEXT: add a5, a0, a5
161 ; NOREMAT-NEXT: vle32.v v2, (a5)
162 ; NOREMAT-NEXT: vle32.v v12, (a5)
163 ; NOREMAT-NEXT: li s10, 21
164 ; NOREMAT-NEXT: slli a5, s10, 9
165 ; NOREMAT-NEXT: sd a5, 152(sp) # 8-byte Folded Spill
166 ; NOREMAT-NEXT: add a5, a0, a5
167 ; NOREMAT-NEXT: vle32.v v24, (a5)
168 ; NOREMAT-NEXT: vle32.v v14, (a5)
169 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
170 ; NOREMAT-NEXT: slli a5, s8, 10
171 ; NOREMAT-NEXT: sd a5, 144(sp) # 8-byte Folded Spill
172 ; NOREMAT-NEXT: add a5, a0, a5
173 ; NOREMAT-NEXT: vle32.v v26, (a5)
174 ; NOREMAT-NEXT: vle32.v v8, (a5)
175 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v28
176 ; NOREMAT-NEXT: li s6, 23
177 ; NOREMAT-NEXT: slli a5, s6, 9
178 ; NOREMAT-NEXT: sd a5, 136(sp) # 8-byte Folded Spill
179 ; NOREMAT-NEXT: add a5, a0, a5
180 ; NOREMAT-NEXT: vle32.v v28, (a5)
181 ; NOREMAT-NEXT: vle32.v v16, (a5)
182 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
183 ; NOREMAT-NEXT: lui a5, 3
184 ; NOREMAT-NEXT: add a5, a0, a5
185 ; NOREMAT-NEXT: vle32.v v30, (a5)
186 ; NOREMAT-NEXT: vle32.v v10, (a5)
187 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v6
188 ; NOREMAT-NEXT: li s3, 25
189 ; NOREMAT-NEXT: slli a5, s3, 9
190 ; NOREMAT-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
191 ; NOREMAT-NEXT: add a5, a0, a5
192 ; NOREMAT-NEXT: vle32.v v6, (a5)
193 ; NOREMAT-NEXT: vle32.v v18, (a5)
194 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v4
195 ; NOREMAT-NEXT: slli a5, s2, 10
196 ; NOREMAT-NEXT: sd a5, 120(sp) # 8-byte Folded Spill
197 ; NOREMAT-NEXT: add a5, a0, a5
198 ; NOREMAT-NEXT: vle32.v v4, (a5)
199 ; NOREMAT-NEXT: vle32.v v20, (a5)
200 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v2
201 ; NOREMAT-NEXT: li t5, 27
202 ; NOREMAT-NEXT: slli a5, t5, 9
203 ; NOREMAT-NEXT: sd a5, 112(sp) # 8-byte Folded Spill
204 ; NOREMAT-NEXT: add a5, a0, a5
205 ; NOREMAT-NEXT: vle32.v v2, (a5)
206 ; NOREMAT-NEXT: vle32.v v22, (a5)
207 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v24
208 ; NOREMAT-NEXT: slli a4, a4, 11
209 ; NOREMAT-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
210 ; NOREMAT-NEXT: add a4, a0, a4
211 ; NOREMAT-NEXT: vle32.v v24, (a4)
212 ; NOREMAT-NEXT: vle32.v v12, (a4)
213 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v26
214 ; NOREMAT-NEXT: li t2, 29
215 ; NOREMAT-NEXT: slli a4, t2, 9
216 ; NOREMAT-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
217 ; NOREMAT-NEXT: add a4, a0, a4
218 ; NOREMAT-NEXT: vle32.v v26, (a4)
219 ; NOREMAT-NEXT: vle32.v v14, (a4)
220 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v28
221 ; NOREMAT-NEXT: slli a4, a2, 10
222 ; NOREMAT-NEXT: sd a4, 88(sp) # 8-byte Folded Spill
223 ; NOREMAT-NEXT: add a4, a0, a4
224 ; NOREMAT-NEXT: vle32.v v28, (a4)
225 ; NOREMAT-NEXT: vle32.v v8, (a4)
226 ; NOREMAT-NEXT: csrr a4, vlenb
227 ; NOREMAT-NEXT: slli a4, a4, 2
228 ; NOREMAT-NEXT: add a4, sp, a4
229 ; NOREMAT-NEXT: addi a4, a4, 288
230 ; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
231 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v30
232 ; NOREMAT-NEXT: li a5, 31
233 ; NOREMAT-NEXT: slli a4, a5, 9
234 ; NOREMAT-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
235 ; NOREMAT-NEXT: add a4, a0, a4
236 ; NOREMAT-NEXT: vle32.v v30, (a4)
237 ; NOREMAT-NEXT: vle32.v v16, (a4)
238 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v6
239 ; NOREMAT-NEXT: lui a6, 4
240 ; NOREMAT-NEXT: add a4, a0, a6
241 ; NOREMAT-NEXT: vle32.v v6, (a4)
242 ; NOREMAT-NEXT: vle32.v v8, (a4)
243 ; NOREMAT-NEXT: csrr a4, vlenb
244 ; NOREMAT-NEXT: slli a4, a4, 1
245 ; NOREMAT-NEXT: add a4, sp, a4
246 ; NOREMAT-NEXT: addi a4, a4, 288
247 ; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
248 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v4
249 ; NOREMAT-NEXT: addiw a4, a6, 512
250 ; NOREMAT-NEXT: sd a4, 72(sp) # 8-byte Folded Spill
251 ; NOREMAT-NEXT: add a4, a0, a4
252 ; NOREMAT-NEXT: vle32.v v4, (a4)
253 ; NOREMAT-NEXT: vle32.v v18, (a4)
254 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v2
255 ; NOREMAT-NEXT: slli a4, t0, 10
256 ; NOREMAT-NEXT: sd a4, 64(sp) # 8-byte Folded Spill
257 ; NOREMAT-NEXT: add a4, a0, a4
258 ; NOREMAT-NEXT: vle32.v v2, (a4)
259 ; NOREMAT-NEXT: vle32.v v20, (a4)
260 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v24
261 ; NOREMAT-NEXT: addiw a4, a6, 1536
262 ; NOREMAT-NEXT: sd a4, 56(sp) # 8-byte Folded Spill
263 ; NOREMAT-NEXT: add a4, a0, a4
264 ; NOREMAT-NEXT: vle32.v v0, (a4)
265 ; NOREMAT-NEXT: vle32.v v22, (a4)
266 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v26
267 ; NOREMAT-NEXT: slli a3, a3, 11
268 ; NOREMAT-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
269 ; NOREMAT-NEXT: add a3, a0, a3
270 ; NOREMAT-NEXT: vle32.v v12, (a3)
271 ; NOREMAT-NEXT: vle32.v v8, (a3)
272 ; NOREMAT-NEXT: addi a3, sp, 288
273 ; NOREMAT-NEXT: vs2r.v v8, (a3) # Unknown-size Folded Spill
274 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v28
275 ; NOREMAT-NEXT: lui s1, 5
276 ; NOREMAT-NEXT: addiw a3, s1, -1536
277 ; NOREMAT-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
278 ; NOREMAT-NEXT: add a3, a0, a3
279 ; NOREMAT-NEXT: vle32.v v8, (a3)
280 ; NOREMAT-NEXT: vle32.v v24, (a3)
281 ; NOREMAT-NEXT: csrr a3, vlenb
282 ; NOREMAT-NEXT: slli a3, a3, 2
283 ; NOREMAT-NEXT: add a3, sp, a3
284 ; NOREMAT-NEXT: addi a3, a3, 288
285 ; NOREMAT-NEXT: vl2r.v v10, (a3) # Unknown-size Folded Reload
286 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
287 ; NOREMAT-NEXT: slli a3, a7, 10
288 ; NOREMAT-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
289 ; NOREMAT-NEXT: add a3, a0, a3
290 ; NOREMAT-NEXT: vle32.v v10, (a3)
291 ; NOREMAT-NEXT: vle32.v v14, (a3)
292 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v6
293 ; NOREMAT-NEXT: addiw a3, s1, -512
294 ; NOREMAT-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
295 ; NOREMAT-NEXT: add a3, a0, a3
296 ; NOREMAT-NEXT: vle32.v v6, (a3)
297 ; NOREMAT-NEXT: vle32.v v16, (a3)
298 ; NOREMAT-NEXT: csrr a3, vlenb
299 ; NOREMAT-NEXT: slli a3, a3, 1
300 ; NOREMAT-NEXT: add a3, sp, a3
301 ; NOREMAT-NEXT: addi a3, a3, 288
302 ; NOREMAT-NEXT: vl2r.v v26, (a3) # Unknown-size Folded Reload
303 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v4
304 ; NOREMAT-NEXT: add a3, a0, s1
305 ; NOREMAT-NEXT: vle32.v v26, (a3)
306 ; NOREMAT-NEXT: vle32.v v28, (a3)
307 ; NOREMAT-NEXT: csrr a3, vlenb
308 ; NOREMAT-NEXT: slli a3, a3, 2
309 ; NOREMAT-NEXT: add a3, sp, a3
310 ; NOREMAT-NEXT: addi a3, a3, 288
311 ; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
312 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v2
313 ; NOREMAT-NEXT: addiw ra, s1, 512
314 ; NOREMAT-NEXT: add a3, a0, ra
315 ; NOREMAT-NEXT: vle32.v v28, (a3)
316 ; NOREMAT-NEXT: vle32.v v30, (a3)
317 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v0
318 ; NOREMAT-NEXT: slli s11, s10, 10
319 ; NOREMAT-NEXT: add a3, a0, s11
320 ; NOREMAT-NEXT: vle32.v v4, (a3)
321 ; NOREMAT-NEXT: vle32.v v18, (a3)
322 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v12
323 ; NOREMAT-NEXT: addiw s10, s1, 1536
324 ; NOREMAT-NEXT: add a3, a0, s10
325 ; NOREMAT-NEXT: vle32.v v2, (a3)
326 ; NOREMAT-NEXT: vle32.v v20, (a3)
327 ; NOREMAT-NEXT: addi a3, sp, 288
328 ; NOREMAT-NEXT: vl2r.v v12, (a3) # Unknown-size Folded Reload
329 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
330 ; NOREMAT-NEXT: slli s9, s8, 11
331 ; NOREMAT-NEXT: add a3, a0, s9
332 ; NOREMAT-NEXT: vle32.v v0, (a3)
333 ; NOREMAT-NEXT: vle32.v v12, (a3)
334 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v10
335 ; NOREMAT-NEXT: lui t0, 6
336 ; NOREMAT-NEXT: addiw s8, t0, -1536
337 ; NOREMAT-NEXT: add a3, a0, s8
338 ; NOREMAT-NEXT: vle32.v v8, (a3)
339 ; NOREMAT-NEXT: vle32.v v22, (a3)
340 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
341 ; NOREMAT-NEXT: slli s7, s6, 10
342 ; NOREMAT-NEXT: add a3, a0, s7
343 ; NOREMAT-NEXT: vle32.v v10, (a3)
344 ; NOREMAT-NEXT: vle32.v v14, (a3)
345 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
346 ; NOREMAT-NEXT: addiw s6, t0, -512
347 ; NOREMAT-NEXT: add a3, a0, s6
348 ; NOREMAT-NEXT: vle32.v v6, (a3)
349 ; NOREMAT-NEXT: vle32.v v16, (a3)
350 ; NOREMAT-NEXT: csrr a3, vlenb
351 ; NOREMAT-NEXT: slli a3, a3, 2
352 ; NOREMAT-NEXT: add a3, sp, a3
353 ; NOREMAT-NEXT: addi a3, a3, 288
354 ; NOREMAT-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload
355 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v28
356 ; NOREMAT-NEXT: add a3, a0, t0
357 ; NOREMAT-NEXT: vle32.v v24, (a3)
358 ; NOREMAT-NEXT: vle32.v v26, (a3)
359 ; NOREMAT-NEXT: csrr a3, vlenb
360 ; NOREMAT-NEXT: slli a3, a3, 2
361 ; NOREMAT-NEXT: add a3, sp, a3
362 ; NOREMAT-NEXT: addi a3, a3, 288
363 ; NOREMAT-NEXT: vs2r.v v26, (a3) # Unknown-size Folded Spill
364 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v4
365 ; NOREMAT-NEXT: addiw s5, t0, 512
366 ; NOREMAT-NEXT: add a3, a0, s5
367 ; NOREMAT-NEXT: vle32.v v26, (a3)
368 ; NOREMAT-NEXT: vle32.v v28, (a3)
369 ; NOREMAT-NEXT: csrr a3, vlenb
370 ; NOREMAT-NEXT: slli a3, a3, 1
371 ; NOREMAT-NEXT: add a3, sp, a3
372 ; NOREMAT-NEXT: addi a3, a3, 288
373 ; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
374 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v2
375 ; NOREMAT-NEXT: slli s4, s3, 10
376 ; NOREMAT-NEXT: add a3, a0, s4
377 ; NOREMAT-NEXT: vle32.v v28, (a3)
378 ; NOREMAT-NEXT: vle32.v v18, (a3)
379 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v0
380 ; NOREMAT-NEXT: addiw s3, t0, 1536
381 ; NOREMAT-NEXT: add a3, a0, s3
382 ; NOREMAT-NEXT: vle32.v v30, (a3)
383 ; NOREMAT-NEXT: vle32.v v20, (a3)
384 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
385 ; NOREMAT-NEXT: slli s2, s2, 11
386 ; NOREMAT-NEXT: add a3, a0, s2
387 ; NOREMAT-NEXT: vle32.v v4, (a3)
388 ; NOREMAT-NEXT: vle32.v v12, (a3)
389 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v10
390 ; NOREMAT-NEXT: lui a3, 7
391 ; NOREMAT-NEXT: addiw s0, a3, -1536
392 ; NOREMAT-NEXT: add a4, a0, s0
393 ; NOREMAT-NEXT: vle32.v v2, (a4)
394 ; NOREMAT-NEXT: vle32.v v22, (a4)
395 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
396 ; NOREMAT-NEXT: slli t6, t5, 10
397 ; NOREMAT-NEXT: add a4, a0, t6
398 ; NOREMAT-NEXT: vle32.v v0, (a4)
399 ; NOREMAT-NEXT: vle32.v v14, (a4)
400 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v24
401 ; NOREMAT-NEXT: addiw t5, a3, -512
402 ; NOREMAT-NEXT: add a4, a0, t5
403 ; NOREMAT-NEXT: vle32.v v6, (a4)
404 ; NOREMAT-NEXT: vle32.v v16, (a4)
405 ; NOREMAT-NEXT: csrr a4, vlenb
406 ; NOREMAT-NEXT: slli a4, a4, 2
407 ; NOREMAT-NEXT: add a4, sp, a4
408 ; NOREMAT-NEXT: addi a4, a4, 288
409 ; NOREMAT-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload
410 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
411 ; NOREMAT-NEXT: add a4, a0, a3
412 ; NOREMAT-NEXT: vle32.v v26, (a4)
413 ; NOREMAT-NEXT: vle32.v v8, (a4)
414 ; NOREMAT-NEXT: csrr a4, vlenb
415 ; NOREMAT-NEXT: slli a4, a4, 1
416 ; NOREMAT-NEXT: add a4, sp, a4
417 ; NOREMAT-NEXT: addi a4, a4, 288
418 ; NOREMAT-NEXT: vl2r.v v10, (a4) # Unknown-size Folded Reload
419 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v28
420 ; NOREMAT-NEXT: addiw t4, a3, 512
421 ; NOREMAT-NEXT: add a4, a0, t4
422 ; NOREMAT-NEXT: vle32.v v10, (a4)
423 ; NOREMAT-NEXT: vle32.v v24, (a4)
424 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v30
425 ; NOREMAT-NEXT: slli t3, t2, 10
426 ; NOREMAT-NEXT: add a4, a0, t3
427 ; NOREMAT-NEXT: vle32.v v18, (a4)
428 ; NOREMAT-NEXT: vle32.v v28, (a4)
429 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v4
430 ; NOREMAT-NEXT: addiw t2, a3, 1536
431 ; NOREMAT-NEXT: add a4, a0, t2
432 ; NOREMAT-NEXT: vle32.v v20, (a4)
433 ; NOREMAT-NEXT: vle32.v v30, (a4)
434 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v2
435 ; NOREMAT-NEXT: slli t1, a2, 11
436 ; NOREMAT-NEXT: add a2, a0, t1
437 ; NOREMAT-NEXT: vle32.v v12, (a2)
438 ; NOREMAT-NEXT: vle32.v v4, (a2)
439 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v0
440 ; NOREMAT-NEXT: lui a2, 8
441 ; NOREMAT-NEXT: addiw a7, a2, -1536
442 ; NOREMAT-NEXT: add a4, a0, a7
443 ; NOREMAT-NEXT: vle32.v v22, (a4)
444 ; NOREMAT-NEXT: vle32.v v2, (a4)
445 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
446 ; NOREMAT-NEXT: slli a6, a5, 10
447 ; NOREMAT-NEXT: add a4, a0, a6
448 ; NOREMAT-NEXT: vle32.v v14, (a4)
449 ; NOREMAT-NEXT: vle32.v v6, (a4)
450 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
451 ; NOREMAT-NEXT: addiw a5, a2, -512
452 ; NOREMAT-NEXT: add a4, a0, a5
453 ; NOREMAT-NEXT: vle32.v v16, (a4)
454 ; NOREMAT-NEXT: vle32.v v26, (a4)
455 ; NOREMAT-NEXT: add a0, a0, a2
456 ; NOREMAT-NEXT: vle32.v v0, (a0)
457 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10
458 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v18
459 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v20
460 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12
461 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v22
462 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v14
463 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v16
464 ; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v0
465 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
466 ; NOREMAT-NEXT: addi a0, a1, 1024
467 ; NOREMAT-NEXT: vse32.v v8, (a0)
468 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
469 ; NOREMAT-NEXT: addi a0, a1, 1536
470 ; NOREMAT-NEXT: vse32.v v8, (a0)
471 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
472 ; NOREMAT-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
473 ; NOREMAT-NEXT: add a0, a1, a0
474 ; NOREMAT-NEXT: vse32.v v8, (a0)
475 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
476 ; NOREMAT-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
477 ; NOREMAT-NEXT: add a0, a1, a0
478 ; NOREMAT-NEXT: vse32.v v8, (a0)
479 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
480 ; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
481 ; NOREMAT-NEXT: add a0, a1, a0
482 ; NOREMAT-NEXT: vse32.v v8, (a0)
483 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
484 ; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
485 ; NOREMAT-NEXT: add a0, a1, a0
486 ; NOREMAT-NEXT: vse32.v v8, (a0)
487 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
488 ; NOREMAT-NEXT: lui a0, 1
489 ; NOREMAT-NEXT: add a0, a1, a0
490 ; NOREMAT-NEXT: vse32.v v8, (a0)
491 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
492 ; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
493 ; NOREMAT-NEXT: add a0, a1, a0
494 ; NOREMAT-NEXT: vse32.v v8, (a0)
495 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
496 ; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
497 ; NOREMAT-NEXT: add a0, a1, a0
498 ; NOREMAT-NEXT: vse32.v v8, (a0)
499 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
500 ; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
501 ; NOREMAT-NEXT: add a0, a1, a0
502 ; NOREMAT-NEXT: vse32.v v8, (a0)
503 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
504 ; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
505 ; NOREMAT-NEXT: add a0, a1, a0
506 ; NOREMAT-NEXT: vse32.v v8, (a0)
507 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
508 ; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
509 ; NOREMAT-NEXT: add a0, a1, a0
510 ; NOREMAT-NEXT: vse32.v v8, (a0)
511 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
512 ; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
513 ; NOREMAT-NEXT: add a0, a1, a0
514 ; NOREMAT-NEXT: vse32.v v8, (a0)
515 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
516 ; NOREMAT-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
517 ; NOREMAT-NEXT: add a0, a1, a0
518 ; NOREMAT-NEXT: vse32.v v8, (a0)
519 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
520 ; NOREMAT-NEXT: lui a0, 2
521 ; NOREMAT-NEXT: add a0, a1, a0
522 ; NOREMAT-NEXT: vse32.v v8, (a0)
523 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
524 ; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
525 ; NOREMAT-NEXT: add a0, a1, a0
526 ; NOREMAT-NEXT: vse32.v v8, (a0)
527 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
528 ; NOREMAT-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
529 ; NOREMAT-NEXT: add a0, a1, a0
530 ; NOREMAT-NEXT: vse32.v v8, (a0)
531 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
532 ; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
533 ; NOREMAT-NEXT: add a0, a1, a0
534 ; NOREMAT-NEXT: vse32.v v8, (a0)
535 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
536 ; NOREMAT-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
537 ; NOREMAT-NEXT: add a0, a1, a0
538 ; NOREMAT-NEXT: vse32.v v8, (a0)
539 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
540 ; NOREMAT-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
541 ; NOREMAT-NEXT: add a0, a1, a0
542 ; NOREMAT-NEXT: vse32.v v8, (a0)
543 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
544 ; NOREMAT-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
545 ; NOREMAT-NEXT: add a0, a1, a0
546 ; NOREMAT-NEXT: vse32.v v8, (a0)
547 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
548 ; NOREMAT-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
549 ; NOREMAT-NEXT: add a0, a1, a0
550 ; NOREMAT-NEXT: vse32.v v8, (a0)
551 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
552 ; NOREMAT-NEXT: lui a0, 3
553 ; NOREMAT-NEXT: add a0, a1, a0
554 ; NOREMAT-NEXT: vse32.v v8, (a0)
555 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
556 ; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
557 ; NOREMAT-NEXT: add a0, a1, a0
558 ; NOREMAT-NEXT: vse32.v v8, (a0)
559 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
560 ; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
561 ; NOREMAT-NEXT: add a0, a1, a0
562 ; NOREMAT-NEXT: vse32.v v8, (a0)
563 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
564 ; NOREMAT-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
565 ; NOREMAT-NEXT: add a0, a1, a0
566 ; NOREMAT-NEXT: vse32.v v8, (a0)
567 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
568 ; NOREMAT-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
569 ; NOREMAT-NEXT: add a0, a1, a0
570 ; NOREMAT-NEXT: vse32.v v8, (a0)
571 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
572 ; NOREMAT-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
573 ; NOREMAT-NEXT: add a0, a1, a0
574 ; NOREMAT-NEXT: vse32.v v8, (a0)
575 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
576 ; NOREMAT-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
577 ; NOREMAT-NEXT: add a0, a1, a0
578 ; NOREMAT-NEXT: vse32.v v8, (a0)
579 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
580 ; NOREMAT-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
581 ; NOREMAT-NEXT: add a0, a1, a0
582 ; NOREMAT-NEXT: vse32.v v8, (a0)
583 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
584 ; NOREMAT-NEXT: lui a0, 4
585 ; NOREMAT-NEXT: add a0, a1, a0
586 ; NOREMAT-NEXT: vse32.v v8, (a0)
587 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
588 ; NOREMAT-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
589 ; NOREMAT-NEXT: add a0, a1, a0
590 ; NOREMAT-NEXT: vse32.v v8, (a0)
591 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
592 ; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
593 ; NOREMAT-NEXT: add a0, a1, a0
594 ; NOREMAT-NEXT: vse32.v v8, (a0)
595 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
596 ; NOREMAT-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
597 ; NOREMAT-NEXT: add a0, a1, a0
598 ; NOREMAT-NEXT: vse32.v v8, (a0)
599 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
600 ; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
601 ; NOREMAT-NEXT: add a0, a1, a0
602 ; NOREMAT-NEXT: vse32.v v8, (a0)
603 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
604 ; NOREMAT-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
605 ; NOREMAT-NEXT: add a0, a1, a0
606 ; NOREMAT-NEXT: vse32.v v8, (a0)
607 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
608 ; NOREMAT-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
609 ; NOREMAT-NEXT: add a0, a1, a0
610 ; NOREMAT-NEXT: vse32.v v8, (a0)
611 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
612 ; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
613 ; NOREMAT-NEXT: add a0, a1, a0
614 ; NOREMAT-NEXT: vse32.v v8, (a0)
615 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
616 ; NOREMAT-NEXT: add s1, a1, s1
617 ; NOREMAT-NEXT: vse32.v v8, (s1)
618 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
619 ; NOREMAT-NEXT: add ra, a1, ra
620 ; NOREMAT-NEXT: vse32.v v8, (ra)
621 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
622 ; NOREMAT-NEXT: add s11, a1, s11
623 ; NOREMAT-NEXT: vse32.v v8, (s11)
624 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
625 ; NOREMAT-NEXT: add s10, a1, s10
626 ; NOREMAT-NEXT: vse32.v v8, (s10)
627 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
628 ; NOREMAT-NEXT: add s9, a1, s9
629 ; NOREMAT-NEXT: vse32.v v8, (s9)
630 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
631 ; NOREMAT-NEXT: add s8, a1, s8
632 ; NOREMAT-NEXT: vse32.v v8, (s8)
633 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
634 ; NOREMAT-NEXT: add s7, a1, s7
635 ; NOREMAT-NEXT: vse32.v v8, (s7)
636 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
637 ; NOREMAT-NEXT: add s6, a1, s6
638 ; NOREMAT-NEXT: vse32.v v8, (s6)
639 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
640 ; NOREMAT-NEXT: add t0, a1, t0
641 ; NOREMAT-NEXT: vse32.v v8, (t0)
642 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
643 ; NOREMAT-NEXT: add s5, a1, s5
644 ; NOREMAT-NEXT: vse32.v v8, (s5)
645 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
646 ; NOREMAT-NEXT: add s4, a1, s4
647 ; NOREMAT-NEXT: vse32.v v8, (s4)
648 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
649 ; NOREMAT-NEXT: add s3, a1, s3
650 ; NOREMAT-NEXT: vse32.v v8, (s3)
651 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
652 ; NOREMAT-NEXT: add s2, a1, s2
653 ; NOREMAT-NEXT: vse32.v v8, (s2)
654 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
655 ; NOREMAT-NEXT: add s0, a1, s0
656 ; NOREMAT-NEXT: vse32.v v8, (s0)
657 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
658 ; NOREMAT-NEXT: add t6, a1, t6
659 ; NOREMAT-NEXT: vse32.v v8, (t6)
660 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
661 ; NOREMAT-NEXT: add t5, a1, t5
662 ; NOREMAT-NEXT: vse32.v v8, (t5)
663 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
664 ; NOREMAT-NEXT: add a3, a1, a3
665 ; NOREMAT-NEXT: vse32.v v8, (a3)
666 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
667 ; NOREMAT-NEXT: add t4, a1, t4
668 ; NOREMAT-NEXT: vse32.v v8, (t4)
669 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
670 ; NOREMAT-NEXT: add t3, a1, t3
671 ; NOREMAT-NEXT: vse32.v v8, (t3)
672 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
673 ; NOREMAT-NEXT: add t2, a1, t2
674 ; NOREMAT-NEXT: vse32.v v8, (t2)
675 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
676 ; NOREMAT-NEXT: add t1, a1, t1
677 ; NOREMAT-NEXT: vse32.v v8, (t1)
678 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
679 ; NOREMAT-NEXT: add a7, a1, a7
680 ; NOREMAT-NEXT: vse32.v v8, (a7)
681 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
682 ; NOREMAT-NEXT: add a6, a1, a6
683 ; NOREMAT-NEXT: vse32.v v8, (a6)
684 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
685 ; NOREMAT-NEXT: add a5, a1, a5
686 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
687 ; NOREMAT-NEXT: vse32.v v8, (a5)
688 ; NOREMAT-NEXT: add a0, a1, a2
689 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
690 ; NOREMAT-NEXT: vse32.v v10, (a0)
691 ; NOREMAT-NEXT: addiw a0, a2, 512
692 ; NOREMAT-NEXT: add a0, a1, a0
693 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
694 ; NOREMAT-NEXT: vse32.v v8, (a0)
695 ; NOREMAT-NEXT: addiw a0, a2, 1024
696 ; NOREMAT-NEXT: add a0, a1, a0
697 ; NOREMAT-NEXT: vse32.v v10, (a0)
698 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
699 ; NOREMAT-NEXT: addiw a0, a2, 1536
700 ; NOREMAT-NEXT: add a0, a1, a0
701 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
702 ; NOREMAT-NEXT: vse32.v v8, (a0)
703 ; NOREMAT-NEXT: li a0, 17
704 ; NOREMAT-NEXT: slli a0, a0, 11
705 ; NOREMAT-NEXT: add a0, a1, a0
706 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
707 ; NOREMAT-NEXT: vse32.v v10, (a0)
708 ; NOREMAT-NEXT: lui a0, 9
709 ; NOREMAT-NEXT: addiw a2, a0, -1536
710 ; NOREMAT-NEXT: add a2, a1, a2
711 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
712 ; NOREMAT-NEXT: vse32.v v8, (a2)
713 ; NOREMAT-NEXT: addiw a2, a0, -1024
714 ; NOREMAT-NEXT: add a2, a1, a2
715 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
716 ; NOREMAT-NEXT: vse32.v v10, (a2)
717 ; NOREMAT-NEXT: addiw a2, a0, -512
718 ; NOREMAT-NEXT: add a2, a1, a2
719 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
720 ; NOREMAT-NEXT: vse32.v v8, (a2)
721 ; NOREMAT-NEXT: add a2, a1, a0
722 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
723 ; NOREMAT-NEXT: vse32.v v10, (a2)
724 ; NOREMAT-NEXT: addiw a2, a0, 512
725 ; NOREMAT-NEXT: add a2, a1, a2
726 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
727 ; NOREMAT-NEXT: vse32.v v8, (a2)
728 ; NOREMAT-NEXT: addiw a2, a0, 1024
729 ; NOREMAT-NEXT: add a2, a1, a2
730 ; NOREMAT-NEXT: vse32.v v10, (a2)
731 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
732 ; NOREMAT-NEXT: addiw a0, a0, 1536
733 ; NOREMAT-NEXT: add a0, a1, a0
734 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
735 ; NOREMAT-NEXT: vse32.v v8, (a0)
736 ; NOREMAT-NEXT: li a0, 19
737 ; NOREMAT-NEXT: slli a0, a0, 11
738 ; NOREMAT-NEXT: add a0, a1, a0
739 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
740 ; NOREMAT-NEXT: vse32.v v10, (a0)
741 ; NOREMAT-NEXT: lui a0, 10
742 ; NOREMAT-NEXT: addiw a2, a0, -1536
743 ; NOREMAT-NEXT: add a2, a1, a2
744 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
745 ; NOREMAT-NEXT: vse32.v v8, (a2)
746 ; NOREMAT-NEXT: addiw a2, a0, -1024
747 ; NOREMAT-NEXT: add a2, a1, a2
748 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
749 ; NOREMAT-NEXT: vse32.v v10, (a2)
750 ; NOREMAT-NEXT: addiw a2, a0, -512
751 ; NOREMAT-NEXT: add a2, a1, a2
752 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
753 ; NOREMAT-NEXT: vse32.v v8, (a2)
754 ; NOREMAT-NEXT: add a2, a1, a0
755 ; NOREMAT-NEXT: vse32.v v10, (a2)
756 ; NOREMAT-NEXT: addiw a0, a0, 512
757 ; NOREMAT-NEXT: add a0, a1, a0
758 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
759 ; NOREMAT-NEXT: vse32.v v8, (a0)
760 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
761 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
762 ; NOREMAT-NEXT: csrr a0, vlenb
763 ; NOREMAT-NEXT: li a1, 6
764 ; NOREMAT-NEXT: mul a0, a0, a1
765 ; NOREMAT-NEXT: add sp, sp, a0
766 ; NOREMAT-NEXT: ld ra, 392(sp) # 8-byte Folded Reload
767 ; NOREMAT-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
768 ; NOREMAT-NEXT: ld s1, 376(sp) # 8-byte Folded Reload
769 ; NOREMAT-NEXT: ld s2, 368(sp) # 8-byte Folded Reload
770 ; NOREMAT-NEXT: ld s3, 360(sp) # 8-byte Folded Reload
771 ; NOREMAT-NEXT: ld s4, 352(sp) # 8-byte Folded Reload
772 ; NOREMAT-NEXT: ld s5, 344(sp) # 8-byte Folded Reload
773 ; NOREMAT-NEXT: ld s6, 336(sp) # 8-byte Folded Reload
774 ; NOREMAT-NEXT: ld s7, 328(sp) # 8-byte Folded Reload
775 ; NOREMAT-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
776 ; NOREMAT-NEXT: ld s9, 312(sp) # 8-byte Folded Reload
777 ; NOREMAT-NEXT: ld s10, 304(sp) # 8-byte Folded Reload
778 ; NOREMAT-NEXT: ld s11, 296(sp) # 8-byte Folded Reload
779 ; NOREMAT-NEXT: addi sp, sp, 400
784 ; REMAT-NEXT: addi sp, sp, -112
785 ; REMAT-NEXT: .cfi_def_cfa_offset 112
786 ; REMAT-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
787 ; REMAT-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
788 ; REMAT-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
789 ; REMAT-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
790 ; REMAT-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
791 ; REMAT-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
792 ; REMAT-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
793 ; REMAT-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
794 ; REMAT-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
795 ; REMAT-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
796 ; REMAT-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
797 ; REMAT-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
798 ; REMAT-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
799 ; REMAT-NEXT: .cfi_offset ra, -8
800 ; REMAT-NEXT: .cfi_offset s0, -16
801 ; REMAT-NEXT: .cfi_offset s1, -24
802 ; REMAT-NEXT: .cfi_offset s2, -32
803 ; REMAT-NEXT: .cfi_offset s3, -40
804 ; REMAT-NEXT: .cfi_offset s4, -48
805 ; REMAT-NEXT: .cfi_offset s5, -56
806 ; REMAT-NEXT: .cfi_offset s6, -64
807 ; REMAT-NEXT: .cfi_offset s7, -72
808 ; REMAT-NEXT: .cfi_offset s8, -80
809 ; REMAT-NEXT: .cfi_offset s9, -88
810 ; REMAT-NEXT: .cfi_offset s10, -96
811 ; REMAT-NEXT: .cfi_offset s11, -104
812 ; REMAT-NEXT: li a2, 32
813 ; REMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
814 ; REMAT-NEXT: vle32.v v8, (a0)
815 ; REMAT-NEXT: addi a2, a0, 512
816 ; REMAT-NEXT: vle32.v v10, (a2)
817 ; REMAT-NEXT: addi a2, a0, 1024
818 ; REMAT-NEXT: vle32.v v12, (a2)
819 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10
820 ; REMAT-NEXT: vle32.v v8, (a2)
821 ; REMAT-NEXT: addi a2, a0, 1536
822 ; REMAT-NEXT: vle32.v v14, (a2)
823 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
824 ; REMAT-NEXT: vle32.v v10, (a2)
825 ; REMAT-NEXT: li a2, 1
826 ; REMAT-NEXT: slli a2, a2, 11
827 ; REMAT-NEXT: add a2, a0, a2
828 ; REMAT-NEXT: vle32.v v12, (a2)
829 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
830 ; REMAT-NEXT: vle32.v v8, (a2)
831 ; REMAT-NEXT: li a2, 5
832 ; REMAT-NEXT: slli a2, a2, 9
833 ; REMAT-NEXT: add a2, a0, a2
834 ; REMAT-NEXT: vle32.v v14, (a2)
835 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
836 ; REMAT-NEXT: vle32.v v10, (a2)
837 ; REMAT-NEXT: li a2, 3
838 ; REMAT-NEXT: slli a2, a2, 10
839 ; REMAT-NEXT: add a2, a0, a2
840 ; REMAT-NEXT: vle32.v v12, (a2)
841 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
842 ; REMAT-NEXT: vle32.v v8, (a2)
843 ; REMAT-NEXT: li a2, 7
844 ; REMAT-NEXT: slli a2, a2, 9
845 ; REMAT-NEXT: add a2, a0, a2
846 ; REMAT-NEXT: vle32.v v14, (a2)
847 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
848 ; REMAT-NEXT: vle32.v v10, (a2)
849 ; REMAT-NEXT: lui a2, 1
850 ; REMAT-NEXT: add a2, a0, a2
851 ; REMAT-NEXT: vle32.v v12, (a2)
852 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
853 ; REMAT-NEXT: vle32.v v8, (a2)
854 ; REMAT-NEXT: li a2, 9
855 ; REMAT-NEXT: slli a2, a2, 9
856 ; REMAT-NEXT: add a2, a0, a2
857 ; REMAT-NEXT: vle32.v v14, (a2)
858 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
859 ; REMAT-NEXT: vle32.v v10, (a2)
860 ; REMAT-NEXT: li a2, 5
861 ; REMAT-NEXT: slli a2, a2, 10
862 ; REMAT-NEXT: add a2, a0, a2
863 ; REMAT-NEXT: vle32.v v12, (a2)
864 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
865 ; REMAT-NEXT: vle32.v v8, (a2)
866 ; REMAT-NEXT: li a2, 11
867 ; REMAT-NEXT: slli a2, a2, 9
868 ; REMAT-NEXT: add a2, a0, a2
869 ; REMAT-NEXT: vle32.v v14, (a2)
870 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
871 ; REMAT-NEXT: vle32.v v10, (a2)
872 ; REMAT-NEXT: li a2, 3
873 ; REMAT-NEXT: slli a2, a2, 11
874 ; REMAT-NEXT: add a2, a0, a2
875 ; REMAT-NEXT: vle32.v v12, (a2)
876 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
877 ; REMAT-NEXT: vle32.v v8, (a2)
878 ; REMAT-NEXT: li a2, 13
879 ; REMAT-NEXT: slli a2, a2, 9
880 ; REMAT-NEXT: add a2, a0, a2
881 ; REMAT-NEXT: vle32.v v14, (a2)
882 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
883 ; REMAT-NEXT: vle32.v v10, (a2)
884 ; REMAT-NEXT: li a2, 7
885 ; REMAT-NEXT: slli a2, a2, 10
886 ; REMAT-NEXT: add a2, a0, a2
887 ; REMAT-NEXT: vle32.v v12, (a2)
888 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
889 ; REMAT-NEXT: vle32.v v8, (a2)
890 ; REMAT-NEXT: li a2, 15
891 ; REMAT-NEXT: slli a2, a2, 9
892 ; REMAT-NEXT: add a2, a0, a2
893 ; REMAT-NEXT: vle32.v v14, (a2)
894 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
895 ; REMAT-NEXT: vle32.v v10, (a2)
896 ; REMAT-NEXT: lui a2, 2
897 ; REMAT-NEXT: add a2, a0, a2
898 ; REMAT-NEXT: vle32.v v12, (a2)
899 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
900 ; REMAT-NEXT: vle32.v v8, (a2)
901 ; REMAT-NEXT: li a2, 17
902 ; REMAT-NEXT: slli a2, a2, 9
903 ; REMAT-NEXT: add a2, a0, a2
904 ; REMAT-NEXT: vle32.v v14, (a2)
905 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
906 ; REMAT-NEXT: vle32.v v10, (a2)
907 ; REMAT-NEXT: li a2, 9
908 ; REMAT-NEXT: slli a2, a2, 10
909 ; REMAT-NEXT: add a2, a0, a2
910 ; REMAT-NEXT: vle32.v v12, (a2)
911 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
912 ; REMAT-NEXT: vle32.v v8, (a2)
913 ; REMAT-NEXT: li a2, 19
914 ; REMAT-NEXT: slli a2, a2, 9
915 ; REMAT-NEXT: add a2, a0, a2
916 ; REMAT-NEXT: vle32.v v14, (a2)
917 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
918 ; REMAT-NEXT: vle32.v v10, (a2)
919 ; REMAT-NEXT: li a2, 5
920 ; REMAT-NEXT: slli a2, a2, 11
921 ; REMAT-NEXT: add a2, a0, a2
922 ; REMAT-NEXT: vle32.v v12, (a2)
923 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
924 ; REMAT-NEXT: vle32.v v8, (a2)
925 ; REMAT-NEXT: li a2, 21
926 ; REMAT-NEXT: slli a2, a2, 9
927 ; REMAT-NEXT: add a2, a0, a2
928 ; REMAT-NEXT: vle32.v v14, (a2)
929 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
930 ; REMAT-NEXT: vle32.v v10, (a2)
931 ; REMAT-NEXT: li a2, 11
932 ; REMAT-NEXT: slli a2, a2, 10
933 ; REMAT-NEXT: add a2, a0, a2
934 ; REMAT-NEXT: vle32.v v26, (a2)
935 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
936 ; REMAT-NEXT: vle32.v v12, (a2)
937 ; REMAT-NEXT: li a2, 23
938 ; REMAT-NEXT: slli a2, a2, 9
939 ; REMAT-NEXT: add a2, a0, a2
940 ; REMAT-NEXT: vle32.v v28, (a2)
941 ; REMAT-NEXT: vle32.v v14, (a2)
942 ; REMAT-NEXT: lui a2, 3
943 ; REMAT-NEXT: add a2, a0, a2
944 ; REMAT-NEXT: vle32.v v30, (a2)
945 ; REMAT-NEXT: vle32.v v16, (a2)
946 ; REMAT-NEXT: li a2, 25
947 ; REMAT-NEXT: slli a2, a2, 9
948 ; REMAT-NEXT: add a2, a0, a2
949 ; REMAT-NEXT: vle32.v v6, (a2)
950 ; REMAT-NEXT: vle32.v v18, (a2)
951 ; REMAT-NEXT: li a2, 13
952 ; REMAT-NEXT: slli a2, a2, 10
953 ; REMAT-NEXT: add a2, a0, a2
954 ; REMAT-NEXT: vle32.v v4, (a2)
955 ; REMAT-NEXT: vle32.v v20, (a2)
956 ; REMAT-NEXT: li a2, 27
957 ; REMAT-NEXT: slli a2, a2, 9
958 ; REMAT-NEXT: add a2, a0, a2
959 ; REMAT-NEXT: vle32.v v2, (a2)
960 ; REMAT-NEXT: vle32.v v22, (a2)
961 ; REMAT-NEXT: li a2, 7
962 ; REMAT-NEXT: slli a2, a2, 11
963 ; REMAT-NEXT: add a2, a0, a2
964 ; REMAT-NEXT: vle32.v v24, (a2)
965 ; REMAT-NEXT: vle32.v v8, (a2)
966 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26
967 ; REMAT-NEXT: li a2, 29
968 ; REMAT-NEXT: slli a2, a2, 9
969 ; REMAT-NEXT: add a2, a0, a2
970 ; REMAT-NEXT: vle32.v v26, (a2)
971 ; REMAT-NEXT: vle32.v v10, (a2)
972 ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28
973 ; REMAT-NEXT: li a2, 15
974 ; REMAT-NEXT: slli a2, a2, 10
975 ; REMAT-NEXT: add a2, a0, a2
976 ; REMAT-NEXT: vle32.v v28, (a2)
977 ; REMAT-NEXT: vle32.v v12, (a2)
978 ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30
979 ; REMAT-NEXT: li a2, 31
980 ; REMAT-NEXT: slli a2, a2, 9
981 ; REMAT-NEXT: add a2, a0, a2
982 ; REMAT-NEXT: vle32.v v30, (a2)
983 ; REMAT-NEXT: vle32.v v14, (a2)
984 ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6
985 ; REMAT-NEXT: lui a2, 4
986 ; REMAT-NEXT: add a2, a0, a2
987 ; REMAT-NEXT: vle32.v v6, (a2)
988 ; REMAT-NEXT: vle32.v v16, (a2)
989 ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4
990 ; REMAT-NEXT: lui a2, 4
991 ; REMAT-NEXT: addiw a2, a2, 512
992 ; REMAT-NEXT: add a2, a0, a2
993 ; REMAT-NEXT: vle32.v v4, (a2)
994 ; REMAT-NEXT: vle32.v v18, (a2)
995 ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v2
996 ; REMAT-NEXT: li a2, 17
997 ; REMAT-NEXT: slli a2, a2, 10
998 ; REMAT-NEXT: add a2, a0, a2
999 ; REMAT-NEXT: vle32.v v2, (a2)
1000 ; REMAT-NEXT: vle32.v v20, (a2)
1001 ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24
1002 ; REMAT-NEXT: lui a2, 4
1003 ; REMAT-NEXT: addiw a2, a2, 1536
1004 ; REMAT-NEXT: add a2, a0, a2
1005 ; REMAT-NEXT: vle32.v v24, (a2)
1006 ; REMAT-NEXT: vle32.v v22, (a2)
1007 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26
1008 ; REMAT-NEXT: li a2, 9
1009 ; REMAT-NEXT: slli a2, a2, 11
1010 ; REMAT-NEXT: add a2, a0, a2
1011 ; REMAT-NEXT: vle32.v v26, (a2)
1012 ; REMAT-NEXT: vle32.v v8, (a2)
1013 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28
1014 ; REMAT-NEXT: lui a2, 5
1015 ; REMAT-NEXT: addiw a2, a2, -1536
1016 ; REMAT-NEXT: add a2, a0, a2
1017 ; REMAT-NEXT: vle32.v v28, (a2)
1018 ; REMAT-NEXT: vle32.v v10, (a2)
1019 ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30
1020 ; REMAT-NEXT: li a2, 19
1021 ; REMAT-NEXT: slli a2, a2, 10
1022 ; REMAT-NEXT: add a2, a0, a2
1023 ; REMAT-NEXT: vle32.v v30, (a2)
1024 ; REMAT-NEXT: vle32.v v12, (a2)
1025 ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v6
1026 ; REMAT-NEXT: lui ra, 5
1027 ; REMAT-NEXT: addiw ra, ra, -512
1028 ; REMAT-NEXT: add a2, a0, ra
1029 ; REMAT-NEXT: vle32.v v6, (a2)
1030 ; REMAT-NEXT: vle32.v v14, (a2)
1031 ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4
1032 ; REMAT-NEXT: lui s11, 5
1033 ; REMAT-NEXT: add a2, a0, s11
1034 ; REMAT-NEXT: vle32.v v4, (a2)
1035 ; REMAT-NEXT: vle32.v v16, (a2)
1036 ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2
1037 ; REMAT-NEXT: lui s10, 5
1038 ; REMAT-NEXT: addiw s10, s10, 512
1039 ; REMAT-NEXT: add a2, a0, s10
1040 ; REMAT-NEXT: vle32.v v2, (a2)
1041 ; REMAT-NEXT: vle32.v v18, (a2)
1042 ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24
1043 ; REMAT-NEXT: li s9, 21
1044 ; REMAT-NEXT: slli s9, s9, 10
1045 ; REMAT-NEXT: add a2, a0, s9
1046 ; REMAT-NEXT: vle32.v v24, (a2)
1047 ; REMAT-NEXT: vle32.v v20, (a2)
1048 ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26
1049 ; REMAT-NEXT: lui s8, 5
1050 ; REMAT-NEXT: addiw s8, s8, 1536
1051 ; REMAT-NEXT: add a2, a0, s8
1052 ; REMAT-NEXT: vle32.v v26, (a2)
1053 ; REMAT-NEXT: vle32.v v22, (a2)
1054 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28
1055 ; REMAT-NEXT: li s7, 11
1056 ; REMAT-NEXT: slli s7, s7, 11
1057 ; REMAT-NEXT: add a2, a0, s7
1058 ; REMAT-NEXT: vle32.v v28, (a2)
1059 ; REMAT-NEXT: vle32.v v8, (a2)
1060 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30
1061 ; REMAT-NEXT: lui s6, 6
1062 ; REMAT-NEXT: addiw s6, s6, -1536
1063 ; REMAT-NEXT: add a2, a0, s6
1064 ; REMAT-NEXT: vle32.v v30, (a2)
1065 ; REMAT-NEXT: vle32.v v10, (a2)
1066 ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v6
1067 ; REMAT-NEXT: li s5, 23
1068 ; REMAT-NEXT: slli s5, s5, 10
1069 ; REMAT-NEXT: add a2, a0, s5
1070 ; REMAT-NEXT: vle32.v v6, (a2)
1071 ; REMAT-NEXT: vle32.v v12, (a2)
1072 ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4
1073 ; REMAT-NEXT: lui s4, 6
1074 ; REMAT-NEXT: addiw s4, s4, -512
1075 ; REMAT-NEXT: add a2, a0, s4
1076 ; REMAT-NEXT: vle32.v v4, (a2)
1077 ; REMAT-NEXT: vle32.v v14, (a2)
1078 ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2
1079 ; REMAT-NEXT: lui s3, 6
1080 ; REMAT-NEXT: add a2, a0, s3
1081 ; REMAT-NEXT: vle32.v v2, (a2)
1082 ; REMAT-NEXT: vle32.v v16, (a2)
1083 ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24
1084 ; REMAT-NEXT: lui s2, 6
1085 ; REMAT-NEXT: addiw s2, s2, 512
1086 ; REMAT-NEXT: add a2, a0, s2
1087 ; REMAT-NEXT: vle32.v v0, (a2)
1088 ; REMAT-NEXT: vle32.v v18, (a2)
1089 ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26
1090 ; REMAT-NEXT: li s1, 25
1091 ; REMAT-NEXT: slli s1, s1, 10
1092 ; REMAT-NEXT: add a2, a0, s1
1093 ; REMAT-NEXT: vle32.v v26, (a2)
1094 ; REMAT-NEXT: vle32.v v20, (a2)
1095 ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28
1096 ; REMAT-NEXT: lui s0, 6
1097 ; REMAT-NEXT: addiw s0, s0, 1536
1098 ; REMAT-NEXT: add a2, a0, s0
1099 ; REMAT-NEXT: vle32.v v28, (a2)
1100 ; REMAT-NEXT: vle32.v v22, (a2)
1101 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30
1102 ; REMAT-NEXT: li t6, 13
1103 ; REMAT-NEXT: slli t6, t6, 11
1104 ; REMAT-NEXT: add a2, a0, t6
1105 ; REMAT-NEXT: vle32.v v30, (a2)
1106 ; REMAT-NEXT: vle32.v v24, (a2)
1107 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v6
1108 ; REMAT-NEXT: lui t5, 7
1109 ; REMAT-NEXT: addiw t5, t5, -1536
1110 ; REMAT-NEXT: add a2, a0, t5
1111 ; REMAT-NEXT: vle32.v v6, (a2)
1112 ; REMAT-NEXT: vle32.v v10, (a2)
1113 ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4
1114 ; REMAT-NEXT: li t4, 27
1115 ; REMAT-NEXT: slli t4, t4, 10
1116 ; REMAT-NEXT: add a2, a0, t4
1117 ; REMAT-NEXT: vle32.v v4, (a2)
1118 ; REMAT-NEXT: vle32.v v12, (a2)
1119 ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2
1120 ; REMAT-NEXT: lui t3, 7
1121 ; REMAT-NEXT: addiw t3, t3, -512
1122 ; REMAT-NEXT: add a2, a0, t3
1123 ; REMAT-NEXT: vle32.v v2, (a2)
1124 ; REMAT-NEXT: vle32.v v14, (a2)
1125 ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0
1126 ; REMAT-NEXT: lui t2, 7
1127 ; REMAT-NEXT: add a2, a0, t2
1128 ; REMAT-NEXT: vle32.v v0, (a2)
1129 ; REMAT-NEXT: vle32.v v8, (a2)
1130 ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26
1131 ; REMAT-NEXT: lui t1, 7
1132 ; REMAT-NEXT: addiw t1, t1, 512
1133 ; REMAT-NEXT: add a2, a0, t1
1134 ; REMAT-NEXT: vle32.v v16, (a2)
1135 ; REMAT-NEXT: vle32.v v18, (a2)
1136 ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28
1137 ; REMAT-NEXT: li t0, 29
1138 ; REMAT-NEXT: slli t0, t0, 10
1139 ; REMAT-NEXT: add a2, a0, t0
1140 ; REMAT-NEXT: vle32.v v20, (a2)
1141 ; REMAT-NEXT: vle32.v v26, (a2)
1142 ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30
1143 ; REMAT-NEXT: lui a7, 7
1144 ; REMAT-NEXT: addiw a7, a7, 1536
1145 ; REMAT-NEXT: add a2, a0, a7
1146 ; REMAT-NEXT: vle32.v v22, (a2)
1147 ; REMAT-NEXT: vle32.v v28, (a2)
1148 ; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6
1149 ; REMAT-NEXT: li a6, 15
1150 ; REMAT-NEXT: slli a6, a6, 11
1151 ; REMAT-NEXT: add a2, a0, a6
1152 ; REMAT-NEXT: vle32.v v24, (a2)
1153 ; REMAT-NEXT: vle32.v v30, (a2)
1154 ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4
1155 ; REMAT-NEXT: lui a5, 8
1156 ; REMAT-NEXT: addiw a5, a5, -1536
1157 ; REMAT-NEXT: add a2, a0, a5
1158 ; REMAT-NEXT: vle32.v v10, (a2)
1159 ; REMAT-NEXT: vle32.v v6, (a2)
1160 ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2
1161 ; REMAT-NEXT: li a4, 31
1162 ; REMAT-NEXT: slli a4, a4, 10
1163 ; REMAT-NEXT: add a2, a0, a4
1164 ; REMAT-NEXT: vle32.v v12, (a2)
1165 ; REMAT-NEXT: vle32.v v4, (a2)
1166 ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v0
1167 ; REMAT-NEXT: lui a3, 8
1168 ; REMAT-NEXT: addiw a3, a3, -512
1169 ; REMAT-NEXT: add a2, a0, a3
1170 ; REMAT-NEXT: vle32.v v14, (a2)
1171 ; REMAT-NEXT: vle32.v v2, (a2)
1172 ; REMAT-NEXT: lui a2, 8
1173 ; REMAT-NEXT: add a0, a0, a2
1174 ; REMAT-NEXT: vle32.v v0, (a0)
1175 ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v16
1176 ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20
1177 ; REMAT-NEXT: sf.vc.vv 3, 0, v26, v22
1178 ; REMAT-NEXT: sf.vc.vv 3, 0, v28, v24
1179 ; REMAT-NEXT: sf.vc.vv 3, 0, v30, v10
1180 ; REMAT-NEXT: sf.vc.vv 3, 0, v6, v12
1181 ; REMAT-NEXT: sf.vc.vv 3, 0, v4, v14
1182 ; REMAT-NEXT: sf.vc.vv 3, 0, v2, v0
1183 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1184 ; REMAT-NEXT: addi a0, a1, 1024
1185 ; REMAT-NEXT: vse32.v v8, (a0)
1186 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1187 ; REMAT-NEXT: addi a0, a1, 1536
1188 ; REMAT-NEXT: vse32.v v8, (a0)
1189 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1190 ; REMAT-NEXT: li a0, 1
1191 ; REMAT-NEXT: slli a0, a0, 11
1192 ; REMAT-NEXT: add a0, a1, a0
1193 ; REMAT-NEXT: vse32.v v8, (a0)
1194 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1195 ; REMAT-NEXT: li a0, 5
1196 ; REMAT-NEXT: slli a0, a0, 9
1197 ; REMAT-NEXT: add a0, a1, a0
1198 ; REMAT-NEXT: vse32.v v8, (a0)
1199 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1200 ; REMAT-NEXT: li a0, 3
1201 ; REMAT-NEXT: slli a0, a0, 10
1202 ; REMAT-NEXT: add a0, a1, a0
1203 ; REMAT-NEXT: vse32.v v8, (a0)
1204 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1205 ; REMAT-NEXT: li a0, 7
1206 ; REMAT-NEXT: slli a0, a0, 9
1207 ; REMAT-NEXT: add a0, a1, a0
1208 ; REMAT-NEXT: vse32.v v8, (a0)
1209 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1210 ; REMAT-NEXT: lui a0, 1
1211 ; REMAT-NEXT: add a0, a1, a0
1212 ; REMAT-NEXT: vse32.v v8, (a0)
1213 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1214 ; REMAT-NEXT: li a0, 9
1215 ; REMAT-NEXT: slli a0, a0, 9
1216 ; REMAT-NEXT: add a0, a1, a0
1217 ; REMAT-NEXT: vse32.v v8, (a0)
1218 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1219 ; REMAT-NEXT: li a0, 5
1220 ; REMAT-NEXT: slli a0, a0, 10
1221 ; REMAT-NEXT: add a0, a1, a0
1222 ; REMAT-NEXT: vse32.v v8, (a0)
1223 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1224 ; REMAT-NEXT: li a0, 11
1225 ; REMAT-NEXT: slli a0, a0, 9
1226 ; REMAT-NEXT: add a0, a1, a0
1227 ; REMAT-NEXT: vse32.v v8, (a0)
1228 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1229 ; REMAT-NEXT: li a0, 3
1230 ; REMAT-NEXT: slli a0, a0, 11
1231 ; REMAT-NEXT: add a0, a1, a0
1232 ; REMAT-NEXT: vse32.v v8, (a0)
1233 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1234 ; REMAT-NEXT: li a0, 13
1235 ; REMAT-NEXT: slli a0, a0, 9
1236 ; REMAT-NEXT: add a0, a1, a0
1237 ; REMAT-NEXT: vse32.v v8, (a0)
1238 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1239 ; REMAT-NEXT: li a0, 7
1240 ; REMAT-NEXT: slli a0, a0, 10
1241 ; REMAT-NEXT: add a0, a1, a0
1242 ; REMAT-NEXT: vse32.v v8, (a0)
1243 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1244 ; REMAT-NEXT: li a0, 15
1245 ; REMAT-NEXT: slli a0, a0, 9
1246 ; REMAT-NEXT: add a0, a1, a0
1247 ; REMAT-NEXT: vse32.v v8, (a0)
1248 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1249 ; REMAT-NEXT: lui a0, 2
1250 ; REMAT-NEXT: add a0, a1, a0
1251 ; REMAT-NEXT: vse32.v v8, (a0)
1252 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1253 ; REMAT-NEXT: li a0, 17
1254 ; REMAT-NEXT: slli a0, a0, 9
1255 ; REMAT-NEXT: add a0, a1, a0
1256 ; REMAT-NEXT: vse32.v v8, (a0)
1257 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1258 ; REMAT-NEXT: li a0, 9
1259 ; REMAT-NEXT: slli a0, a0, 10
1260 ; REMAT-NEXT: add a0, a1, a0
1261 ; REMAT-NEXT: vse32.v v8, (a0)
1262 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1263 ; REMAT-NEXT: li a0, 19
1264 ; REMAT-NEXT: slli a0, a0, 9
1265 ; REMAT-NEXT: add a0, a1, a0
1266 ; REMAT-NEXT: vse32.v v8, (a0)
1267 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1268 ; REMAT-NEXT: li a0, 5
1269 ; REMAT-NEXT: slli a0, a0, 11
1270 ; REMAT-NEXT: add a0, a1, a0
1271 ; REMAT-NEXT: vse32.v v8, (a0)
1272 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1273 ; REMAT-NEXT: li a0, 21
1274 ; REMAT-NEXT: slli a0, a0, 9
1275 ; REMAT-NEXT: add a0, a1, a0
1276 ; REMAT-NEXT: vse32.v v8, (a0)
1277 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1278 ; REMAT-NEXT: li a0, 11
1279 ; REMAT-NEXT: slli a0, a0, 10
1280 ; REMAT-NEXT: add a0, a1, a0
1281 ; REMAT-NEXT: vse32.v v8, (a0)
1282 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1283 ; REMAT-NEXT: li a0, 23
1284 ; REMAT-NEXT: slli a0, a0, 9
1285 ; REMAT-NEXT: add a0, a1, a0
1286 ; REMAT-NEXT: vse32.v v8, (a0)
1287 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1288 ; REMAT-NEXT: lui a0, 3
1289 ; REMAT-NEXT: add a0, a1, a0
1290 ; REMAT-NEXT: vse32.v v8, (a0)
1291 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1292 ; REMAT-NEXT: li a0, 25
1293 ; REMAT-NEXT: slli a0, a0, 9
1294 ; REMAT-NEXT: add a0, a1, a0
1295 ; REMAT-NEXT: vse32.v v8, (a0)
1296 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1297 ; REMAT-NEXT: li a0, 13
1298 ; REMAT-NEXT: slli a0, a0, 10
1299 ; REMAT-NEXT: add a0, a1, a0
1300 ; REMAT-NEXT: vse32.v v8, (a0)
1301 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1302 ; REMAT-NEXT: li a0, 27
1303 ; REMAT-NEXT: slli a0, a0, 9
1304 ; REMAT-NEXT: add a0, a1, a0
1305 ; REMAT-NEXT: vse32.v v8, (a0)
1306 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1307 ; REMAT-NEXT: li a0, 7
1308 ; REMAT-NEXT: slli a0, a0, 11
1309 ; REMAT-NEXT: add a0, a1, a0
1310 ; REMAT-NEXT: vse32.v v8, (a0)
1311 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1312 ; REMAT-NEXT: li a0, 29
1313 ; REMAT-NEXT: slli a0, a0, 9
1314 ; REMAT-NEXT: add a0, a1, a0
1315 ; REMAT-NEXT: vse32.v v8, (a0)
1316 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1317 ; REMAT-NEXT: li a0, 15
1318 ; REMAT-NEXT: slli a0, a0, 10
1319 ; REMAT-NEXT: add a0, a1, a0
1320 ; REMAT-NEXT: vse32.v v8, (a0)
1321 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1322 ; REMAT-NEXT: li a0, 31
1323 ; REMAT-NEXT: slli a0, a0, 9
1324 ; REMAT-NEXT: add a0, a1, a0
1325 ; REMAT-NEXT: vse32.v v8, (a0)
1326 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1327 ; REMAT-NEXT: lui a0, 4
1328 ; REMAT-NEXT: add a0, a1, a0
1329 ; REMAT-NEXT: vse32.v v8, (a0)
1330 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1331 ; REMAT-NEXT: lui a0, 4
1332 ; REMAT-NEXT: addiw a0, a0, 512
1333 ; REMAT-NEXT: add a0, a1, a0
1334 ; REMAT-NEXT: vse32.v v8, (a0)
1335 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1336 ; REMAT-NEXT: li a0, 17
1337 ; REMAT-NEXT: slli a0, a0, 10
1338 ; REMAT-NEXT: add a0, a1, a0
1339 ; REMAT-NEXT: vse32.v v8, (a0)
1340 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1341 ; REMAT-NEXT: lui a0, 4
1342 ; REMAT-NEXT: addiw a0, a0, 1536
1343 ; REMAT-NEXT: add a0, a1, a0
1344 ; REMAT-NEXT: vse32.v v8, (a0)
1345 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1346 ; REMAT-NEXT: li a0, 9
1347 ; REMAT-NEXT: slli a0, a0, 11
1348 ; REMAT-NEXT: add a0, a1, a0
1349 ; REMAT-NEXT: vse32.v v8, (a0)
1350 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1351 ; REMAT-NEXT: lui a0, 5
1352 ; REMAT-NEXT: addiw a0, a0, -1536
1353 ; REMAT-NEXT: add a0, a1, a0
1354 ; REMAT-NEXT: vse32.v v8, (a0)
1355 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1356 ; REMAT-NEXT: li a0, 19
1357 ; REMAT-NEXT: slli a0, a0, 10
1358 ; REMAT-NEXT: add a0, a1, a0
1359 ; REMAT-NEXT: vse32.v v8, (a0)
1360 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1361 ; REMAT-NEXT: add ra, a1, ra
1362 ; REMAT-NEXT: vse32.v v8, (ra)
1363 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1364 ; REMAT-NEXT: add s11, a1, s11
1365 ; REMAT-NEXT: vse32.v v8, (s11)
1366 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1367 ; REMAT-NEXT: add s10, a1, s10
1368 ; REMAT-NEXT: vse32.v v8, (s10)
1369 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1370 ; REMAT-NEXT: add s9, a1, s9
1371 ; REMAT-NEXT: vse32.v v8, (s9)
1372 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1373 ; REMAT-NEXT: add s8, a1, s8
1374 ; REMAT-NEXT: vse32.v v8, (s8)
1375 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1376 ; REMAT-NEXT: add s7, a1, s7
1377 ; REMAT-NEXT: vse32.v v8, (s7)
1378 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1379 ; REMAT-NEXT: add s6, a1, s6
1380 ; REMAT-NEXT: vse32.v v8, (s6)
1381 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1382 ; REMAT-NEXT: add s5, a1, s5
1383 ; REMAT-NEXT: vse32.v v8, (s5)
1384 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1385 ; REMAT-NEXT: add s4, a1, s4
1386 ; REMAT-NEXT: vse32.v v8, (s4)
1387 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1388 ; REMAT-NEXT: add s3, a1, s3
1389 ; REMAT-NEXT: vse32.v v8, (s3)
1390 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1391 ; REMAT-NEXT: add s2, a1, s2
1392 ; REMAT-NEXT: vse32.v v8, (s2)
1393 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1394 ; REMAT-NEXT: add s1, a1, s1
1395 ; REMAT-NEXT: vse32.v v8, (s1)
1396 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1397 ; REMAT-NEXT: add s0, a1, s0
1398 ; REMAT-NEXT: vse32.v v8, (s0)
1399 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1400 ; REMAT-NEXT: add t6, a1, t6
1401 ; REMAT-NEXT: vse32.v v8, (t6)
1402 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1403 ; REMAT-NEXT: add t5, a1, t5
1404 ; REMAT-NEXT: vse32.v v8, (t5)
1405 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1406 ; REMAT-NEXT: add t4, a1, t4
1407 ; REMAT-NEXT: vse32.v v8, (t4)
1408 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1409 ; REMAT-NEXT: add t3, a1, t3
1410 ; REMAT-NEXT: vse32.v v8, (t3)
1411 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1412 ; REMAT-NEXT: add t2, a1, t2
1413 ; REMAT-NEXT: vse32.v v8, (t2)
1414 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1415 ; REMAT-NEXT: add t1, a1, t1
1416 ; REMAT-NEXT: vse32.v v8, (t1)
1417 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1418 ; REMAT-NEXT: add t0, a1, t0
1419 ; REMAT-NEXT: vse32.v v8, (t0)
1420 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1421 ; REMAT-NEXT: add a7, a1, a7
1422 ; REMAT-NEXT: vse32.v v8, (a7)
1423 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1424 ; REMAT-NEXT: add a6, a1, a6
1425 ; REMAT-NEXT: vse32.v v8, (a6)
1426 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1427 ; REMAT-NEXT: add a5, a1, a5
1428 ; REMAT-NEXT: vse32.v v8, (a5)
1429 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1430 ; REMAT-NEXT: add a4, a1, a4
1431 ; REMAT-NEXT: vse32.v v8, (a4)
1432 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1433 ; REMAT-NEXT: add a3, a1, a3
1434 ; REMAT-NEXT: vse32.v v8, (a3)
1435 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1436 ; REMAT-NEXT: add a2, a1, a2
1437 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1438 ; REMAT-NEXT: vse32.v v8, (a2)
1439 ; REMAT-NEXT: lui a0, 8
1440 ; REMAT-NEXT: addiw a0, a0, 512
1441 ; REMAT-NEXT: add a0, a1, a0
1442 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1443 ; REMAT-NEXT: vse32.v v10, (a0)
1444 ; REMAT-NEXT: lui a0, 8
1445 ; REMAT-NEXT: addiw a0, a0, 1024
1446 ; REMAT-NEXT: add a0, a1, a0
1447 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1448 ; REMAT-NEXT: vse32.v v8, (a0)
1449 ; REMAT-NEXT: lui a0, 8
1450 ; REMAT-NEXT: addiw a0, a0, 1536
1451 ; REMAT-NEXT: add a0, a1, a0
1452 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1453 ; REMAT-NEXT: vse32.v v10, (a0)
1454 ; REMAT-NEXT: li a0, 17
1455 ; REMAT-NEXT: slli a0, a0, 11
1456 ; REMAT-NEXT: add a0, a1, a0
1457 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1458 ; REMAT-NEXT: vse32.v v8, (a0)
1459 ; REMAT-NEXT: lui a0, 9
1460 ; REMAT-NEXT: addiw a0, a0, -1536
1461 ; REMAT-NEXT: add a0, a1, a0
1462 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1463 ; REMAT-NEXT: vse32.v v10, (a0)
1464 ; REMAT-NEXT: lui a0, 9
1465 ; REMAT-NEXT: addiw a0, a0, -1024
1466 ; REMAT-NEXT: add a0, a1, a0
1467 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1468 ; REMAT-NEXT: vse32.v v8, (a0)
1469 ; REMAT-NEXT: lui a0, 9
1470 ; REMAT-NEXT: addiw a0, a0, -512
1471 ; REMAT-NEXT: add a0, a1, a0
1472 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1473 ; REMAT-NEXT: vse32.v v10, (a0)
1474 ; REMAT-NEXT: lui a0, 9
1475 ; REMAT-NEXT: add a0, a1, a0
1476 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1477 ; REMAT-NEXT: vse32.v v8, (a0)
1478 ; REMAT-NEXT: lui a0, 9
1479 ; REMAT-NEXT: addiw a0, a0, 512
1480 ; REMAT-NEXT: add a0, a1, a0
1481 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1482 ; REMAT-NEXT: vse32.v v10, (a0)
1483 ; REMAT-NEXT: lui a0, 9
1484 ; REMAT-NEXT: addiw a0, a0, 1024
1485 ; REMAT-NEXT: add a0, a1, a0
1486 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1487 ; REMAT-NEXT: vse32.v v8, (a0)
1488 ; REMAT-NEXT: lui a0, 9
1489 ; REMAT-NEXT: addiw a0, a0, 1536
1490 ; REMAT-NEXT: add a0, a1, a0
1491 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1492 ; REMAT-NEXT: vse32.v v10, (a0)
1493 ; REMAT-NEXT: li a0, 19
1494 ; REMAT-NEXT: slli a0, a0, 11
1495 ; REMAT-NEXT: add a0, a1, a0
1496 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1497 ; REMAT-NEXT: vse32.v v8, (a0)
1498 ; REMAT-NEXT: lui a0, 10
1499 ; REMAT-NEXT: addiw a0, a0, -1536
1500 ; REMAT-NEXT: add a0, a1, a0
1501 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1502 ; REMAT-NEXT: vse32.v v10, (a0)
1503 ; REMAT-NEXT: lui a0, 10
1504 ; REMAT-NEXT: addiw a0, a0, -1024
1505 ; REMAT-NEXT: add a0, a1, a0
1506 ; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
1507 ; REMAT-NEXT: vse32.v v8, (a0)
1508 ; REMAT-NEXT: lui a0, 10
1509 ; REMAT-NEXT: addiw a0, a0, -512
1510 ; REMAT-NEXT: add a0, a1, a0
1511 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1512 ; REMAT-NEXT: vse32.v v10, (a0)
1513 ; REMAT-NEXT: lui a0, 10
1514 ; REMAT-NEXT: add a0, a1, a0
1515 ; REMAT-NEXT: vse32.v v8, (a0)
1516 ; REMAT-NEXT: lui a0, 10
1517 ; REMAT-NEXT: addiw a0, a0, 512
1518 ; REMAT-NEXT: add a0, a1, a0
1519 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1520 ; REMAT-NEXT: vse32.v v8, (a0)
1521 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1522 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
1523 ; REMAT-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
1524 ; REMAT-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
1525 ; REMAT-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
1526 ; REMAT-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
1527 ; REMAT-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
1528 ; REMAT-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
1529 ; REMAT-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
1530 ; REMAT-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
1531 ; REMAT-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
1532 ; REMAT-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
1533 ; REMAT-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
1534 ; REMAT-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
1535 ; REMAT-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
1536 ; REMAT-NEXT: addi sp, sp, 112
1538 %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 2, i64 1)
1539 %5 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %0, i64 %4)
1540 %6 = getelementptr inbounds i32, ptr %0, i64 128
1541 %7 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %6, i64 %4)
1542 %8 = getelementptr inbounds i32, ptr %0, i64 256
1543 %9 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
1544 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %5, <vscale x 4 x i32> %7, i64 %4)
1545 %10 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
1546 %11 = getelementptr inbounds i32, ptr %0, i64 384
1547 %12 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
1548 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %7, <vscale x 4 x i32> %9, i64 %4)
1549 %13 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
1550 %14 = getelementptr inbounds i32, ptr %0, i64 512
1551 %15 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
1552 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %10, <vscale x 4 x i32> %12, i64 %4)
1553 %16 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
1554 %17 = getelementptr inbounds i32, ptr %0, i64 640
1555 %18 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
1556 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %13, <vscale x 4 x i32> %15, i64 %4)
1557 %19 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
1558 %20 = getelementptr inbounds i32, ptr %0, i64 768
1559 %21 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
1560 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %16, <vscale x 4 x i32> %18, i64 %4)
1561 %22 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
1562 %23 = getelementptr inbounds i32, ptr %0, i64 896
1563 %24 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
1564 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %19, <vscale x 4 x i32> %21, i64 %4)
1565 %25 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
1566 %26 = getelementptr inbounds i32, ptr %0, i64 1024
1567 %27 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
1568 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %22, <vscale x 4 x i32> %24, i64 %4)
1569 %28 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
1570 %29 = getelementptr inbounds i32, ptr %0, i64 1152
1571 %30 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
1572 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %25, <vscale x 4 x i32> %27, i64 %4)
1573 %31 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
1574 %32 = getelementptr inbounds i32, ptr %0, i64 1280
1575 %33 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
1576 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %28, <vscale x 4 x i32> %30, i64 %4)
1577 %34 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
1578 %35 = getelementptr inbounds i32, ptr %0, i64 1408
1579 %36 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
1580 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %31, <vscale x 4 x i32> %33, i64 %4)
1581 %37 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
1582 %38 = getelementptr inbounds i32, ptr %0, i64 1536
1583 %39 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
1584 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %34, <vscale x 4 x i32> %36, i64 %4)
1585 %40 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
1586 %41 = getelementptr inbounds i32, ptr %0, i64 1664
1587 %42 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
1588 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %37, <vscale x 4 x i32> %39, i64 %4)
1589 %43 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
1590 %44 = getelementptr inbounds i32, ptr %0, i64 1792
1591 %45 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
1592 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %40, <vscale x 4 x i32> %42, i64 %4)
1593 %46 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
1594 %47 = getelementptr inbounds i32, ptr %0, i64 1920
1595 %48 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
1596 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %43, <vscale x 4 x i32> %45, i64 %4)
1597 %49 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
1598 %50 = getelementptr inbounds i32, ptr %0, i64 2048
1599 %51 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
1600 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %46, <vscale x 4 x i32> %48, i64 %4)
1601 %52 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
1602 %53 = getelementptr inbounds i32, ptr %0, i64 2176
1603 %54 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
1604 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %49, <vscale x 4 x i32> %51, i64 %4)
1605 %55 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
1606 %56 = getelementptr inbounds i32, ptr %0, i64 2304
1607 %57 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
1608 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %52, <vscale x 4 x i32> %54, i64 %4)
1609 %58 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
1610 %59 = getelementptr inbounds i32, ptr %0, i64 2432
1611 %60 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
1612 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %55, <vscale x 4 x i32> %57, i64 %4)
1613 %61 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
1614 %62 = getelementptr inbounds i32, ptr %0, i64 2560
1615 %63 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
1616 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %58, <vscale x 4 x i32> %60, i64 %4)
1617 %64 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
1618 %65 = getelementptr inbounds i32, ptr %0, i64 2688
1619 %66 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
1620 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %61, <vscale x 4 x i32> %63, i64 %4)
1621 %67 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
1622 %68 = getelementptr inbounds i32, ptr %0, i64 2816
1623 %69 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
1624 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %64, <vscale x 4 x i32> %66, i64 %4)
1625 %70 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
1626 %71 = getelementptr inbounds i32, ptr %0, i64 2944
1627 %72 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
1628 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %67, <vscale x 4 x i32> %69, i64 %4)
1629 %73 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
1630 %74 = getelementptr inbounds i32, ptr %0, i64 3072
1631 %75 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
1632 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %70, <vscale x 4 x i32> %72, i64 %4)
1633 %76 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
1634 %77 = getelementptr inbounds i32, ptr %0, i64 3200
1635 %78 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
1636 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %73, <vscale x 4 x i32> %75, i64 %4)
1637 %79 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
1638 %80 = getelementptr inbounds i32, ptr %0, i64 3328
1639 %81 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
1640 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %76, <vscale x 4 x i32> %78, i64 %4)
1641 %82 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
1642 %83 = getelementptr inbounds i32, ptr %0, i64 3456
1643 %84 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
1644 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %79, <vscale x 4 x i32> %81, i64 %4)
1645 %85 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
1646 %86 = getelementptr inbounds i32, ptr %0, i64 3584
1647 %87 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
1648 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %82, <vscale x 4 x i32> %84, i64 %4)
1649 %88 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
1650 %89 = getelementptr inbounds i32, ptr %0, i64 3712
1651 %90 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
1652 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %85, <vscale x 4 x i32> %87, i64 %4)
1653 %91 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
1654 %92 = getelementptr inbounds i32, ptr %0, i64 3840
1655 %93 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
1656 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %88, <vscale x 4 x i32> %90, i64 %4)
1657 %94 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
1658 %95 = getelementptr inbounds i32, ptr %0, i64 3968
1659 %96 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
1660 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %91, <vscale x 4 x i32> %93, i64 %4)
1661 %97 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
1662 %98 = getelementptr inbounds i32, ptr %0, i64 4096
1663 %99 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
1664 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %94, <vscale x 4 x i32> %96, i64 %4)
1665 %100 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
1666 %101 = getelementptr inbounds i32, ptr %0, i64 4224
1667 %102 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
1668 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %97, <vscale x 4 x i32> %99, i64 %4)
1669 %103 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
1670 %104 = getelementptr inbounds i32, ptr %0, i64 4352
1671 %105 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
1672 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %100, <vscale x 4 x i32> %102, i64 %4)
1673 %106 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
1674 %107 = getelementptr inbounds i32, ptr %0, i64 4480
1675 %108 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
1676 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %103, <vscale x 4 x i32> %105, i64 %4)
1677 %109 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
1678 %110 = getelementptr inbounds i32, ptr %0, i64 4608
1679 %111 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
1680 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %106, <vscale x 4 x i32> %108, i64 %4)
1681 %112 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
1682 %113 = getelementptr inbounds i32, ptr %0, i64 4736
1683 %114 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
1684 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %109, <vscale x 4 x i32> %111, i64 %4)
1685 %115 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
1686 %116 = getelementptr inbounds i32, ptr %0, i64 4864
1687 %117 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
1688 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %112, <vscale x 4 x i32> %114, i64 %4)
1689 %118 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
1690 %119 = getelementptr inbounds i32, ptr %0, i64 4992
1691 %120 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
1692 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %115, <vscale x 4 x i32> %117, i64 %4)
1693 %121 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
1694 %122 = getelementptr inbounds i32, ptr %0, i64 5120
1695 %123 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
1696 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %118, <vscale x 4 x i32> %120, i64 %4)
1697 %124 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
1698 %125 = getelementptr inbounds i32, ptr %0, i64 5248
1699 %126 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
1700 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %121, <vscale x 4 x i32> %123, i64 %4)
1701 %127 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
1702 %128 = getelementptr inbounds i32, ptr %0, i64 5376
1703 %129 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
1704 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %124, <vscale x 4 x i32> %126, i64 %4)
1705 %130 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
1706 %131 = getelementptr inbounds i32, ptr %0, i64 5504
1707 %132 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
1708 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %127, <vscale x 4 x i32> %129, i64 %4)
1709 %133 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
1710 %134 = getelementptr inbounds i32, ptr %0, i64 5632
1711 %135 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
1712 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %130, <vscale x 4 x i32> %132, i64 %4)
1713 %136 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
1714 %137 = getelementptr inbounds i32, ptr %0, i64 5760
1715 %138 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
1716 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %133, <vscale x 4 x i32> %135, i64 %4)
1717 %139 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
1718 %140 = getelementptr inbounds i32, ptr %0, i64 5888
1719 %141 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
1720 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %136, <vscale x 4 x i32> %138, i64 %4)
1721 %142 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
1722 %143 = getelementptr inbounds i32, ptr %0, i64 6016
1723 %144 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
1724 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %139, <vscale x 4 x i32> %141, i64 %4)
1725 %145 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
1726 %146 = getelementptr inbounds i32, ptr %0, i64 6144
1727 %147 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
1728 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %142, <vscale x 4 x i32> %144, i64 %4)
1729 %148 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
1730 %149 = getelementptr inbounds i32, ptr %0, i64 6272
1731 %150 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
1732 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %145, <vscale x 4 x i32> %147, i64 %4)
1733 %151 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
1734 %152 = getelementptr inbounds i32, ptr %0, i64 6400
1735 %153 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
1736 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %148, <vscale x 4 x i32> %150, i64 %4)
1737 %154 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
1738 %155 = getelementptr inbounds i32, ptr %0, i64 6528
1739 %156 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
1740 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %151, <vscale x 4 x i32> %153, i64 %4)
1741 %157 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
1742 %158 = getelementptr inbounds i32, ptr %0, i64 6656
1743 %159 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
1744 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %154, <vscale x 4 x i32> %156, i64 %4)
1745 %160 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
1746 %161 = getelementptr inbounds i32, ptr %0, i64 6784
1747 %162 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
1748 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %157, <vscale x 4 x i32> %159, i64 %4)
1749 %163 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
1750 %164 = getelementptr inbounds i32, ptr %0, i64 6912
1751 %165 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
1752 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %160, <vscale x 4 x i32> %162, i64 %4)
1753 %166 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
1754 %167 = getelementptr inbounds i32, ptr %0, i64 7040
1755 %168 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
1756 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %163, <vscale x 4 x i32> %165, i64 %4)
1757 %169 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
1758 %170 = getelementptr inbounds i32, ptr %0, i64 7168
1759 %171 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
1760 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %166, <vscale x 4 x i32> %168, i64 %4)
1761 %172 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
1762 %173 = getelementptr inbounds i32, ptr %0, i64 7296
1763 %174 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
1764 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %169, <vscale x 4 x i32> %171, i64 %4)
1765 %175 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
1766 %176 = getelementptr inbounds i32, ptr %0, i64 7424
1767 %177 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
1768 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %172, <vscale x 4 x i32> %174, i64 %4)
1769 %178 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
1770 %179 = getelementptr inbounds i32, ptr %0, i64 7552
1771 %180 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
1772 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %175, <vscale x 4 x i32> %177, i64 %4)
1773 %181 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
1774 %182 = getelementptr inbounds i32, ptr %0, i64 7680
1775 %183 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
1776 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %178, <vscale x 4 x i32> %180, i64 %4)
1777 %184 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
1778 %185 = getelementptr inbounds i32, ptr %0, i64 7808
1779 %186 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
1780 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %181, <vscale x 4 x i32> %183, i64 %4)
1781 %187 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
1782 %188 = getelementptr inbounds i32, ptr %0, i64 7936
1783 %189 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
1784 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %184, <vscale x 4 x i32> %186, i64 %4)
1785 %190 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
1786 %191 = getelementptr inbounds i32, ptr %0, i64 8064
1787 %192 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
1788 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %187, <vscale x 4 x i32> %189, i64 %4)
1789 %193 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
1790 %194 = getelementptr inbounds i32, ptr %0, i64 8192
1791 %195 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %194, i64 %4)
1792 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %190, <vscale x 4 x i32> %192, i64 %4)
1793 tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %193, <vscale x 4 x i32> %195, i64 %4)
1794 %196 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1795 %197 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1796 %198 = getelementptr inbounds i32, ptr %1, i64 256
1797 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %196, ptr %198, i64 %4)
1798 %199 = getelementptr inbounds i32, ptr %1, i64 384
1799 %200 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1800 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %197, ptr %199, i64 %4)
1801 %201 = getelementptr inbounds i32, ptr %1, i64 512
1802 %202 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1803 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %200, ptr %201, i64 %4)
1804 %203 = getelementptr inbounds i32, ptr %1, i64 640
1805 %204 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1806 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %202, ptr %203, i64 %4)
1807 %205 = getelementptr inbounds i32, ptr %1, i64 768
1808 %206 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1809 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %204, ptr %205, i64 %4)
1810 %207 = getelementptr inbounds i32, ptr %1, i64 896
1811 %208 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1812 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %206, ptr %207, i64 %4)
1813 %209 = getelementptr inbounds i32, ptr %1, i64 1024
1814 %210 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1815 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %208, ptr %209, i64 %4)
1816 %211 = getelementptr inbounds i32, ptr %1, i64 1152
1817 %212 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1818 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %210, ptr %211, i64 %4)
1819 %213 = getelementptr inbounds i32, ptr %1, i64 1280
1820 %214 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1821 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %212, ptr %213, i64 %4)
1822 %215 = getelementptr inbounds i32, ptr %1, i64 1408
1823 %216 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1824 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %214, ptr %215, i64 %4)
1825 %217 = getelementptr inbounds i32, ptr %1, i64 1536
1826 %218 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1827 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %216, ptr %217, i64 %4)
1828 %219 = getelementptr inbounds i32, ptr %1, i64 1664
1829 %220 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1830 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %218, ptr %219, i64 %4)
1831 %221 = getelementptr inbounds i32, ptr %1, i64 1792
1832 %222 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1833 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %220, ptr %221, i64 %4)
1834 %223 = getelementptr inbounds i32, ptr %1, i64 1920
1835 %224 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1836 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %222, ptr %223, i64 %4)
1837 %225 = getelementptr inbounds i32, ptr %1, i64 2048
1838 %226 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1839 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %224, ptr %225, i64 %4)
1840 %227 = getelementptr inbounds i32, ptr %1, i64 2176
1841 %228 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1842 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %226, ptr %227, i64 %4)
1843 %229 = getelementptr inbounds i32, ptr %1, i64 2304
1844 %230 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1845 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %228, ptr %229, i64 %4)
1846 %231 = getelementptr inbounds i32, ptr %1, i64 2432
1847 %232 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1848 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %230, ptr %231, i64 %4)
1849 %233 = getelementptr inbounds i32, ptr %1, i64 2560
1850 %234 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1851 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %232, ptr %233, i64 %4)
1852 %235 = getelementptr inbounds i32, ptr %1, i64 2688
1853 %236 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1854 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %234, ptr %235, i64 %4)
1855 %237 = getelementptr inbounds i32, ptr %1, i64 2816
1856 %238 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1857 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %236, ptr %237, i64 %4)
1858 %239 = getelementptr inbounds i32, ptr %1, i64 2944
1859 %240 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1860 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %238, ptr %239, i64 %4)
1861 %241 = getelementptr inbounds i32, ptr %1, i64 3072
1862 %242 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1863 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %240, ptr %241, i64 %4)
1864 %243 = getelementptr inbounds i32, ptr %1, i64 3200
1865 %244 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1866 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %242, ptr %243, i64 %4)
1867 %245 = getelementptr inbounds i32, ptr %1, i64 3328
1868 %246 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1869 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %244, ptr %245, i64 %4)
1870 %247 = getelementptr inbounds i32, ptr %1, i64 3456
1871 %248 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1872 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %246, ptr %247, i64 %4)
1873 %249 = getelementptr inbounds i32, ptr %1, i64 3584
1874 %250 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1875 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %248, ptr %249, i64 %4)
1876 %251 = getelementptr inbounds i32, ptr %1, i64 3712
1877 %252 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1878 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %250, ptr %251, i64 %4)
1879 %253 = getelementptr inbounds i32, ptr %1, i64 3840
1880 %254 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1881 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %252, ptr %253, i64 %4)
1882 %255 = getelementptr inbounds i32, ptr %1, i64 3968
1883 %256 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1884 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %254, ptr %255, i64 %4)
1885 %257 = getelementptr inbounds i32, ptr %1, i64 4096
1886 %258 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1887 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %256, ptr %257, i64 %4)
1888 %259 = getelementptr inbounds i32, ptr %1, i64 4224
1889 %260 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1890 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %258, ptr %259, i64 %4)
1891 %261 = getelementptr inbounds i32, ptr %1, i64 4352
1892 %262 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1893 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %260, ptr %261, i64 %4)
1894 %263 = getelementptr inbounds i32, ptr %1, i64 4480
1895 %264 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1896 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %262, ptr %263, i64 %4)
1897 %265 = getelementptr inbounds i32, ptr %1, i64 4608
1898 %266 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1899 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %264, ptr %265, i64 %4)
1900 %267 = getelementptr inbounds i32, ptr %1, i64 4736
1901 %268 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1902 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %266, ptr %267, i64 %4)
1903 %269 = getelementptr inbounds i32, ptr %1, i64 4864
1904 %270 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1905 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %268, ptr %269, i64 %4)
1906 %271 = getelementptr inbounds i32, ptr %1, i64 4992
1907 %272 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1908 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %270, ptr %271, i64 %4)
1909 %273 = getelementptr inbounds i32, ptr %1, i64 5120
1910 %274 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1911 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %272, ptr %273, i64 %4)
1912 %275 = getelementptr inbounds i32, ptr %1, i64 5248
1913 %276 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1914 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %274, ptr %275, i64 %4)
1915 %277 = getelementptr inbounds i32, ptr %1, i64 5376
1916 %278 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1917 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %276, ptr %277, i64 %4)
1918 %279 = getelementptr inbounds i32, ptr %1, i64 5504
1919 %280 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1920 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %278, ptr %279, i64 %4)
1921 %281 = getelementptr inbounds i32, ptr %1, i64 5632
1922 %282 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1923 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %280, ptr %281, i64 %4)
1924 %283 = getelementptr inbounds i32, ptr %1, i64 5760
1925 %284 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1926 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %282, ptr %283, i64 %4)
1927 %285 = getelementptr inbounds i32, ptr %1, i64 5888
1928 %286 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1929 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %284, ptr %285, i64 %4)
1930 %287 = getelementptr inbounds i32, ptr %1, i64 6016
1931 %288 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1932 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %286, ptr %287, i64 %4)
1933 %289 = getelementptr inbounds i32, ptr %1, i64 6144
1934 %290 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1935 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %288, ptr %289, i64 %4)
1936 %291 = getelementptr inbounds i32, ptr %1, i64 6272
1937 %292 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1938 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %290, ptr %291, i64 %4)
1939 %293 = getelementptr inbounds i32, ptr %1, i64 6400
1940 %294 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1941 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %292, ptr %293, i64 %4)
1942 %295 = getelementptr inbounds i32, ptr %1, i64 6528
1943 %296 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1944 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %294, ptr %295, i64 %4)
1945 %297 = getelementptr inbounds i32, ptr %1, i64 6656
1946 %298 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1947 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %296, ptr %297, i64 %4)
1948 %299 = getelementptr inbounds i32, ptr %1, i64 6784
1949 %300 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1950 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %298, ptr %299, i64 %4)
1951 %301 = getelementptr inbounds i32, ptr %1, i64 6912
1952 %302 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1953 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %300, ptr %301, i64 %4)
1954 %303 = getelementptr inbounds i32, ptr %1, i64 7040
1955 %304 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1956 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %302, ptr %303, i64 %4)
1957 %305 = getelementptr inbounds i32, ptr %1, i64 7168
1958 %306 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1959 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %304, ptr %305, i64 %4)
1960 %307 = getelementptr inbounds i32, ptr %1, i64 7296
1961 %308 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1962 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %306, ptr %307, i64 %4)
1963 %309 = getelementptr inbounds i32, ptr %1, i64 7424
1964 %310 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1965 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %308, ptr %309, i64 %4)
1966 %311 = getelementptr inbounds i32, ptr %1, i64 7552
1967 %312 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1968 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %310, ptr %311, i64 %4)
1969 %313 = getelementptr inbounds i32, ptr %1, i64 7680
1970 %314 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1971 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %312, ptr %313, i64 %4)
1972 %315 = getelementptr inbounds i32, ptr %1, i64 7808
1973 %316 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1974 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %314, ptr %315, i64 %4)
1975 %317 = getelementptr inbounds i32, ptr %1, i64 7936
1976 %318 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1977 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %316, ptr %317, i64 %4)
1978 %319 = getelementptr inbounds i32, ptr %1, i64 8064
1979 %320 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1980 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %318, ptr %319, i64 %4)
1981 %321 = getelementptr inbounds i32, ptr %1, i64 8192
1982 %322 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1983 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %320, ptr %321, i64 %4)
1984 %323 = getelementptr inbounds i32, ptr %1, i64 8320
1985 %324 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1986 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %322, ptr %323, i64 %4)
1987 %325 = getelementptr inbounds i32, ptr %1, i64 8448
1988 %326 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1989 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %324, ptr %325, i64 %4)
1990 %327 = getelementptr inbounds i32, ptr %1, i64 8576
1991 %328 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1992 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %326, ptr %327, i64 %4)
1993 %329 = getelementptr inbounds i32, ptr %1, i64 8704
1994 %330 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1995 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %328, ptr %329, i64 %4)
1996 %331 = getelementptr inbounds i32, ptr %1, i64 8832
1997 %332 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
1998 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %330, ptr %331, i64 %4)
1999 %333 = getelementptr inbounds i32, ptr %1, i64 8960
2000 %334 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2001 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %332, ptr %333, i64 %4)
2002 %335 = getelementptr inbounds i32, ptr %1, i64 9088
2003 %336 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2004 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %334, ptr %335, i64 %4)
2005 %337 = getelementptr inbounds i32, ptr %1, i64 9216
2006 %338 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2007 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %336, ptr %337, i64 %4)
2008 %339 = getelementptr inbounds i32, ptr %1, i64 9344
2009 %340 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2010 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %338, ptr %339, i64 %4)
2011 %341 = getelementptr inbounds i32, ptr %1, i64 9472
2012 %342 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2013 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %340, ptr %341, i64 %4)
2014 %343 = getelementptr inbounds i32, ptr %1, i64 9600
2015 %344 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2016 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %342, ptr %343, i64 %4)
2017 %345 = getelementptr inbounds i32, ptr %1, i64 9728
2018 %346 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2019 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %344, ptr %345, i64 %4)
2020 %347 = getelementptr inbounds i32, ptr %1, i64 9856
2021 %348 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2022 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %346, ptr %347, i64 %4)
2023 %349 = getelementptr inbounds i32, ptr %1, i64 9984
2024 %350 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2025 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %348, ptr %349, i64 %4)
2026 %351 = getelementptr inbounds i32, ptr %1, i64 10112
2027 %352 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2028 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %350, ptr %351, i64 %4)
2029 %353 = getelementptr inbounds i32, ptr %1, i64 10240
2030 %354 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2031 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %352, ptr %353, i64 %4)
2032 %355 = getelementptr inbounds i32, ptr %1, i64 10368
2033 %356 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2034 tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %354, ptr %355, i64 %4)
2035 %357 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
2039 declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
2040 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)
2041 declare void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64, i64, <vscale x 4 x i32>, <vscale x 4 x i32>, i64)
2042 declare <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64, i64, i64, i64)
2043 declare void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)