1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV64I
3 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV32I
5 define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
6 ; RV64I-LABEL: lshr_4bytes:
8 ; RV64I-NEXT: lbu a3, 1(a0)
9 ; RV64I-NEXT: lbu a4, 0(a0)
10 ; RV64I-NEXT: lbu a5, 2(a0)
11 ; RV64I-NEXT: slli a3, a3, 8
12 ; RV64I-NEXT: lb a0, 3(a0)
13 ; RV64I-NEXT: or a3, a3, a4
14 ; RV64I-NEXT: slli a5, a5, 16
15 ; RV64I-NEXT: lbu a1, 0(a1)
16 ; RV64I-NEXT: slli a0, a0, 24
17 ; RV64I-NEXT: or a0, a0, a5
18 ; RV64I-NEXT: or a0, a0, a3
19 ; RV64I-NEXT: srlw a0, a0, a1
20 ; RV64I-NEXT: sb a0, 0(a2)
21 ; RV64I-NEXT: srli a1, a0, 16
22 ; RV64I-NEXT: sb a1, 2(a2)
23 ; RV64I-NEXT: srli a1, a0, 24
24 ; RV64I-NEXT: sb a1, 3(a2)
25 ; RV64I-NEXT: srli a0, a0, 8
26 ; RV64I-NEXT: sb a0, 1(a2)
29 ; RV32I-LABEL: lshr_4bytes:
31 ; RV32I-NEXT: lbu a3, 1(a0)
32 ; RV32I-NEXT: lbu a4, 0(a0)
33 ; RV32I-NEXT: lbu a5, 2(a0)
34 ; RV32I-NEXT: lbu a0, 3(a0)
35 ; RV32I-NEXT: slli a3, a3, 8
36 ; RV32I-NEXT: or a3, a3, a4
37 ; RV32I-NEXT: slli a5, a5, 16
38 ; RV32I-NEXT: slli a0, a0, 24
39 ; RV32I-NEXT: or a0, a0, a5
40 ; RV32I-NEXT: or a0, a0, a3
41 ; RV32I-NEXT: lbu a3, 1(a1)
42 ; RV32I-NEXT: lbu a4, 0(a1)
43 ; RV32I-NEXT: lbu a5, 2(a1)
44 ; RV32I-NEXT: lbu a1, 3(a1)
45 ; RV32I-NEXT: slli a3, a3, 8
46 ; RV32I-NEXT: or a3, a3, a4
47 ; RV32I-NEXT: slli a5, a5, 16
48 ; RV32I-NEXT: slli a1, a1, 24
49 ; RV32I-NEXT: or a1, a1, a5
50 ; RV32I-NEXT: or a1, a1, a3
51 ; RV32I-NEXT: srl a0, a0, a1
52 ; RV32I-NEXT: sb a0, 0(a2)
53 ; RV32I-NEXT: srli a1, a0, 16
54 ; RV32I-NEXT: sb a1, 2(a2)
55 ; RV32I-NEXT: srli a1, a0, 24
56 ; RV32I-NEXT: sb a1, 3(a2)
57 ; RV32I-NEXT: srli a0, a0, 8
58 ; RV32I-NEXT: sb a0, 1(a2)
60 %src = load i32, ptr %src.ptr, align 1
61 %bitOff = load i32, ptr %bitOff.ptr, align 1
62 %res = lshr i32 %src, %bitOff
63 store i32 %res, ptr %dst, align 1
66 define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
67 ; RV64I-LABEL: shl_4bytes:
69 ; RV64I-NEXT: lbu a3, 1(a0)
70 ; RV64I-NEXT: lbu a4, 0(a0)
71 ; RV64I-NEXT: lbu a5, 2(a0)
72 ; RV64I-NEXT: slli a3, a3, 8
73 ; RV64I-NEXT: lb a0, 3(a0)
74 ; RV64I-NEXT: or a3, a3, a4
75 ; RV64I-NEXT: slli a5, a5, 16
76 ; RV64I-NEXT: lbu a1, 0(a1)
77 ; RV64I-NEXT: slli a0, a0, 24
78 ; RV64I-NEXT: or a0, a0, a5
79 ; RV64I-NEXT: or a0, a0, a3
80 ; RV64I-NEXT: sllw a0, a0, a1
81 ; RV64I-NEXT: sb a0, 0(a2)
82 ; RV64I-NEXT: srli a1, a0, 16
83 ; RV64I-NEXT: sb a1, 2(a2)
84 ; RV64I-NEXT: srli a1, a0, 24
85 ; RV64I-NEXT: sb a1, 3(a2)
86 ; RV64I-NEXT: srli a0, a0, 8
87 ; RV64I-NEXT: sb a0, 1(a2)
90 ; RV32I-LABEL: shl_4bytes:
92 ; RV32I-NEXT: lbu a3, 1(a0)
93 ; RV32I-NEXT: lbu a4, 0(a0)
94 ; RV32I-NEXT: lbu a5, 2(a0)
95 ; RV32I-NEXT: lbu a0, 3(a0)
96 ; RV32I-NEXT: slli a3, a3, 8
97 ; RV32I-NEXT: or a3, a3, a4
98 ; RV32I-NEXT: slli a5, a5, 16
99 ; RV32I-NEXT: slli a0, a0, 24
100 ; RV32I-NEXT: or a0, a0, a5
101 ; RV32I-NEXT: or a0, a0, a3
102 ; RV32I-NEXT: lbu a3, 1(a1)
103 ; RV32I-NEXT: lbu a4, 0(a1)
104 ; RV32I-NEXT: lbu a5, 2(a1)
105 ; RV32I-NEXT: lbu a1, 3(a1)
106 ; RV32I-NEXT: slli a3, a3, 8
107 ; RV32I-NEXT: or a3, a3, a4
108 ; RV32I-NEXT: slli a5, a5, 16
109 ; RV32I-NEXT: slli a1, a1, 24
110 ; RV32I-NEXT: or a1, a1, a5
111 ; RV32I-NEXT: or a1, a1, a3
112 ; RV32I-NEXT: sll a0, a0, a1
113 ; RV32I-NEXT: sb a0, 0(a2)
114 ; RV32I-NEXT: srli a1, a0, 16
115 ; RV32I-NEXT: sb a1, 2(a2)
116 ; RV32I-NEXT: srli a1, a0, 24
117 ; RV32I-NEXT: sb a1, 3(a2)
118 ; RV32I-NEXT: srli a0, a0, 8
119 ; RV32I-NEXT: sb a0, 1(a2)
121 %src = load i32, ptr %src.ptr, align 1
122 %bitOff = load i32, ptr %bitOff.ptr, align 1
123 %res = shl i32 %src, %bitOff
124 store i32 %res, ptr %dst, align 1
127 define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
128 ; RV64I-LABEL: ashr_4bytes:
130 ; RV64I-NEXT: lbu a3, 1(a0)
131 ; RV64I-NEXT: lbu a4, 0(a0)
132 ; RV64I-NEXT: lbu a5, 2(a0)
133 ; RV64I-NEXT: slli a3, a3, 8
134 ; RV64I-NEXT: lb a0, 3(a0)
135 ; RV64I-NEXT: or a3, a3, a4
136 ; RV64I-NEXT: slli a5, a5, 16
137 ; RV64I-NEXT: lbu a1, 0(a1)
138 ; RV64I-NEXT: slli a0, a0, 24
139 ; RV64I-NEXT: or a0, a0, a5
140 ; RV64I-NEXT: or a0, a0, a3
141 ; RV64I-NEXT: sraw a0, a0, a1
142 ; RV64I-NEXT: sb a0, 0(a2)
143 ; RV64I-NEXT: srli a1, a0, 16
144 ; RV64I-NEXT: sb a1, 2(a2)
145 ; RV64I-NEXT: srli a1, a0, 24
146 ; RV64I-NEXT: sb a1, 3(a2)
147 ; RV64I-NEXT: srli a0, a0, 8
148 ; RV64I-NEXT: sb a0, 1(a2)
151 ; RV32I-LABEL: ashr_4bytes:
153 ; RV32I-NEXT: lbu a3, 1(a0)
154 ; RV32I-NEXT: lbu a4, 0(a0)
155 ; RV32I-NEXT: lbu a5, 2(a0)
156 ; RV32I-NEXT: lbu a0, 3(a0)
157 ; RV32I-NEXT: slli a3, a3, 8
158 ; RV32I-NEXT: or a3, a3, a4
159 ; RV32I-NEXT: slli a5, a5, 16
160 ; RV32I-NEXT: slli a0, a0, 24
161 ; RV32I-NEXT: or a0, a0, a5
162 ; RV32I-NEXT: or a0, a0, a3
163 ; RV32I-NEXT: lbu a3, 1(a1)
164 ; RV32I-NEXT: lbu a4, 0(a1)
165 ; RV32I-NEXT: lbu a5, 2(a1)
166 ; RV32I-NEXT: lbu a1, 3(a1)
167 ; RV32I-NEXT: slli a3, a3, 8
168 ; RV32I-NEXT: or a3, a3, a4
169 ; RV32I-NEXT: slli a5, a5, 16
170 ; RV32I-NEXT: slli a1, a1, 24
171 ; RV32I-NEXT: or a1, a1, a5
172 ; RV32I-NEXT: or a1, a1, a3
173 ; RV32I-NEXT: sra a0, a0, a1
174 ; RV32I-NEXT: sb a0, 0(a2)
175 ; RV32I-NEXT: srli a1, a0, 16
176 ; RV32I-NEXT: sb a1, 2(a2)
177 ; RV32I-NEXT: srli a1, a0, 24
178 ; RV32I-NEXT: sb a1, 3(a2)
179 ; RV32I-NEXT: srli a0, a0, 8
180 ; RV32I-NEXT: sb a0, 1(a2)
182 %src = load i32, ptr %src.ptr, align 1
183 %bitOff = load i32, ptr %bitOff.ptr, align 1
184 %res = ashr i32 %src, %bitOff
185 store i32 %res, ptr %dst, align 1
189 define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
190 ; RV64I-LABEL: lshr_8bytes:
192 ; RV64I-NEXT: lbu a3, 1(a0)
193 ; RV64I-NEXT: lbu a4, 0(a0)
194 ; RV64I-NEXT: lbu a5, 2(a0)
195 ; RV64I-NEXT: lbu a6, 3(a0)
196 ; RV64I-NEXT: slli a3, a3, 8
197 ; RV64I-NEXT: or a3, a3, a4
198 ; RV64I-NEXT: slli a5, a5, 16
199 ; RV64I-NEXT: slli a6, a6, 24
200 ; RV64I-NEXT: or a4, a6, a5
201 ; RV64I-NEXT: or a3, a4, a3
202 ; RV64I-NEXT: lbu a4, 5(a0)
203 ; RV64I-NEXT: lbu a5, 4(a0)
204 ; RV64I-NEXT: lbu a6, 6(a0)
205 ; RV64I-NEXT: lbu a0, 7(a0)
206 ; RV64I-NEXT: slli a4, a4, 8
207 ; RV64I-NEXT: or a4, a4, a5
208 ; RV64I-NEXT: slli a6, a6, 16
209 ; RV64I-NEXT: slli a0, a0, 24
210 ; RV64I-NEXT: or a0, a0, a6
211 ; RV64I-NEXT: or a0, a0, a4
212 ; RV64I-NEXT: slli a0, a0, 32
213 ; RV64I-NEXT: or a0, a0, a3
214 ; RV64I-NEXT: lbu a3, 1(a1)
215 ; RV64I-NEXT: lbu a4, 0(a1)
216 ; RV64I-NEXT: lbu a5, 2(a1)
217 ; RV64I-NEXT: lbu a6, 3(a1)
218 ; RV64I-NEXT: slli a3, a3, 8
219 ; RV64I-NEXT: or a3, a3, a4
220 ; RV64I-NEXT: slli a5, a5, 16
221 ; RV64I-NEXT: slli a6, a6, 24
222 ; RV64I-NEXT: or a4, a6, a5
223 ; RV64I-NEXT: or a3, a4, a3
224 ; RV64I-NEXT: lbu a4, 5(a1)
225 ; RV64I-NEXT: lbu a5, 4(a1)
226 ; RV64I-NEXT: lbu a6, 6(a1)
227 ; RV64I-NEXT: lbu a1, 7(a1)
228 ; RV64I-NEXT: slli a4, a4, 8
229 ; RV64I-NEXT: or a4, a4, a5
230 ; RV64I-NEXT: slli a6, a6, 16
231 ; RV64I-NEXT: slli a1, a1, 24
232 ; RV64I-NEXT: or a1, a1, a6
233 ; RV64I-NEXT: or a1, a1, a4
234 ; RV64I-NEXT: slli a1, a1, 32
235 ; RV64I-NEXT: or a1, a1, a3
236 ; RV64I-NEXT: srl a0, a0, a1
237 ; RV64I-NEXT: sb a0, 0(a2)
238 ; RV64I-NEXT: srli a1, a0, 48
239 ; RV64I-NEXT: sb a1, 6(a2)
240 ; RV64I-NEXT: srli a1, a0, 56
241 ; RV64I-NEXT: sb a1, 7(a2)
242 ; RV64I-NEXT: srli a1, a0, 32
243 ; RV64I-NEXT: sb a1, 4(a2)
244 ; RV64I-NEXT: srli a1, a0, 40
245 ; RV64I-NEXT: sb a1, 5(a2)
246 ; RV64I-NEXT: srli a1, a0, 16
247 ; RV64I-NEXT: sb a1, 2(a2)
248 ; RV64I-NEXT: srli a1, a0, 24
249 ; RV64I-NEXT: sb a1, 3(a2)
250 ; RV64I-NEXT: srli a0, a0, 8
251 ; RV64I-NEXT: sb a0, 1(a2)
254 ; RV32I-LABEL: lshr_8bytes:
256 ; RV32I-NEXT: lbu a3, 5(a0)
257 ; RV32I-NEXT: lbu a4, 4(a0)
258 ; RV32I-NEXT: lbu a5, 6(a0)
259 ; RV32I-NEXT: lbu a6, 7(a0)
260 ; RV32I-NEXT: slli a3, a3, 8
261 ; RV32I-NEXT: or a3, a3, a4
262 ; RV32I-NEXT: slli a5, a5, 16
263 ; RV32I-NEXT: slli a6, a6, 24
264 ; RV32I-NEXT: or a4, a6, a5
265 ; RV32I-NEXT: or a3, a4, a3
266 ; RV32I-NEXT: lbu a4, 1(a1)
267 ; RV32I-NEXT: lbu a5, 0(a1)
268 ; RV32I-NEXT: lbu a6, 2(a1)
269 ; RV32I-NEXT: lbu a1, 3(a1)
270 ; RV32I-NEXT: slli a4, a4, 8
271 ; RV32I-NEXT: or a4, a4, a5
272 ; RV32I-NEXT: slli a6, a6, 16
273 ; RV32I-NEXT: slli a1, a1, 24
274 ; RV32I-NEXT: or a5, a1, a6
275 ; RV32I-NEXT: or a5, a5, a4
276 ; RV32I-NEXT: addi a4, a5, -32
277 ; RV32I-NEXT: srl a1, a3, a5
278 ; RV32I-NEXT: bltz a4, .LBB3_2
279 ; RV32I-NEXT: # %bb.1:
280 ; RV32I-NEXT: mv a0, a1
281 ; RV32I-NEXT: j .LBB3_3
282 ; RV32I-NEXT: .LBB3_2:
283 ; RV32I-NEXT: lbu a6, 1(a0)
284 ; RV32I-NEXT: lbu a7, 0(a0)
285 ; RV32I-NEXT: lbu t0, 2(a0)
286 ; RV32I-NEXT: lbu a0, 3(a0)
287 ; RV32I-NEXT: slli a6, a6, 8
288 ; RV32I-NEXT: or a6, a6, a7
289 ; RV32I-NEXT: slli t0, t0, 16
290 ; RV32I-NEXT: slli a0, a0, 24
291 ; RV32I-NEXT: or a0, a0, t0
292 ; RV32I-NEXT: or a0, a0, a6
293 ; RV32I-NEXT: srl a0, a0, a5
294 ; RV32I-NEXT: not a5, a5
295 ; RV32I-NEXT: slli a3, a3, 1
296 ; RV32I-NEXT: sll a3, a3, a5
297 ; RV32I-NEXT: or a0, a0, a3
298 ; RV32I-NEXT: .LBB3_3:
299 ; RV32I-NEXT: srai a4, a4, 31
300 ; RV32I-NEXT: and a1, a4, a1
301 ; RV32I-NEXT: sb a1, 4(a2)
302 ; RV32I-NEXT: srli a3, a1, 16
303 ; RV32I-NEXT: sb a3, 6(a2)
304 ; RV32I-NEXT: srli a3, a1, 24
305 ; RV32I-NEXT: sb a3, 7(a2)
306 ; RV32I-NEXT: srli a1, a1, 8
307 ; RV32I-NEXT: sb a1, 5(a2)
308 ; RV32I-NEXT: sb a0, 0(a2)
309 ; RV32I-NEXT: srli a1, a0, 16
310 ; RV32I-NEXT: sb a1, 2(a2)
311 ; RV32I-NEXT: srli a1, a0, 24
312 ; RV32I-NEXT: sb a1, 3(a2)
313 ; RV32I-NEXT: srli a0, a0, 8
314 ; RV32I-NEXT: sb a0, 1(a2)
316 %src = load i64, ptr %src.ptr, align 1
317 %bitOff = load i64, ptr %bitOff.ptr, align 1
318 %res = lshr i64 %src, %bitOff
319 store i64 %res, ptr %dst, align 1
322 define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
323 ; RV64I-LABEL: shl_8bytes:
325 ; RV64I-NEXT: lbu a3, 1(a0)
326 ; RV64I-NEXT: lbu a4, 0(a0)
327 ; RV64I-NEXT: lbu a5, 2(a0)
328 ; RV64I-NEXT: lbu a6, 3(a0)
329 ; RV64I-NEXT: slli a3, a3, 8
330 ; RV64I-NEXT: or a3, a3, a4
331 ; RV64I-NEXT: slli a5, a5, 16
332 ; RV64I-NEXT: slli a6, a6, 24
333 ; RV64I-NEXT: or a4, a6, a5
334 ; RV64I-NEXT: or a3, a4, a3
335 ; RV64I-NEXT: lbu a4, 5(a0)
336 ; RV64I-NEXT: lbu a5, 4(a0)
337 ; RV64I-NEXT: lbu a6, 6(a0)
338 ; RV64I-NEXT: lbu a0, 7(a0)
339 ; RV64I-NEXT: slli a4, a4, 8
340 ; RV64I-NEXT: or a4, a4, a5
341 ; RV64I-NEXT: slli a6, a6, 16
342 ; RV64I-NEXT: slli a0, a0, 24
343 ; RV64I-NEXT: or a0, a0, a6
344 ; RV64I-NEXT: or a0, a0, a4
345 ; RV64I-NEXT: slli a0, a0, 32
346 ; RV64I-NEXT: or a0, a0, a3
347 ; RV64I-NEXT: lbu a3, 1(a1)
348 ; RV64I-NEXT: lbu a4, 0(a1)
349 ; RV64I-NEXT: lbu a5, 2(a1)
350 ; RV64I-NEXT: lbu a6, 3(a1)
351 ; RV64I-NEXT: slli a3, a3, 8
352 ; RV64I-NEXT: or a3, a3, a4
353 ; RV64I-NEXT: slli a5, a5, 16
354 ; RV64I-NEXT: slli a6, a6, 24
355 ; RV64I-NEXT: or a4, a6, a5
356 ; RV64I-NEXT: or a3, a4, a3
357 ; RV64I-NEXT: lbu a4, 5(a1)
358 ; RV64I-NEXT: lbu a5, 4(a1)
359 ; RV64I-NEXT: lbu a6, 6(a1)
360 ; RV64I-NEXT: lbu a1, 7(a1)
361 ; RV64I-NEXT: slli a4, a4, 8
362 ; RV64I-NEXT: or a4, a4, a5
363 ; RV64I-NEXT: slli a6, a6, 16
364 ; RV64I-NEXT: slli a1, a1, 24
365 ; RV64I-NEXT: or a1, a1, a6
366 ; RV64I-NEXT: or a1, a1, a4
367 ; RV64I-NEXT: slli a1, a1, 32
368 ; RV64I-NEXT: or a1, a1, a3
369 ; RV64I-NEXT: sll a0, a0, a1
370 ; RV64I-NEXT: sb a0, 0(a2)
371 ; RV64I-NEXT: srli a1, a0, 48
372 ; RV64I-NEXT: sb a1, 6(a2)
373 ; RV64I-NEXT: srli a1, a0, 56
374 ; RV64I-NEXT: sb a1, 7(a2)
375 ; RV64I-NEXT: srli a1, a0, 32
376 ; RV64I-NEXT: sb a1, 4(a2)
377 ; RV64I-NEXT: srli a1, a0, 40
378 ; RV64I-NEXT: sb a1, 5(a2)
379 ; RV64I-NEXT: srli a1, a0, 16
380 ; RV64I-NEXT: sb a1, 2(a2)
381 ; RV64I-NEXT: srli a1, a0, 24
382 ; RV64I-NEXT: sb a1, 3(a2)
383 ; RV64I-NEXT: srli a0, a0, 8
384 ; RV64I-NEXT: sb a0, 1(a2)
387 ; RV32I-LABEL: shl_8bytes:
389 ; RV32I-NEXT: lbu a3, 1(a0)
390 ; RV32I-NEXT: lbu a4, 0(a0)
391 ; RV32I-NEXT: lbu a5, 2(a0)
392 ; RV32I-NEXT: lbu a6, 3(a0)
393 ; RV32I-NEXT: slli a3, a3, 8
394 ; RV32I-NEXT: or a3, a3, a4
395 ; RV32I-NEXT: slli a5, a5, 16
396 ; RV32I-NEXT: slli a6, a6, 24
397 ; RV32I-NEXT: or a4, a6, a5
398 ; RV32I-NEXT: or a3, a4, a3
399 ; RV32I-NEXT: lbu a4, 1(a1)
400 ; RV32I-NEXT: lbu a5, 0(a1)
401 ; RV32I-NEXT: lbu a6, 2(a1)
402 ; RV32I-NEXT: lbu a1, 3(a1)
403 ; RV32I-NEXT: slli a4, a4, 8
404 ; RV32I-NEXT: or a4, a4, a5
405 ; RV32I-NEXT: slli a6, a6, 16
406 ; RV32I-NEXT: slli a1, a1, 24
407 ; RV32I-NEXT: or a5, a1, a6
408 ; RV32I-NEXT: or a5, a5, a4
409 ; RV32I-NEXT: addi a4, a5, -32
410 ; RV32I-NEXT: sll a1, a3, a5
411 ; RV32I-NEXT: bltz a4, .LBB4_2
412 ; RV32I-NEXT: # %bb.1:
413 ; RV32I-NEXT: mv a0, a1
414 ; RV32I-NEXT: j .LBB4_3
415 ; RV32I-NEXT: .LBB4_2:
416 ; RV32I-NEXT: lbu a6, 5(a0)
417 ; RV32I-NEXT: lbu a7, 4(a0)
418 ; RV32I-NEXT: lbu t0, 6(a0)
419 ; RV32I-NEXT: lbu a0, 7(a0)
420 ; RV32I-NEXT: slli a6, a6, 8
421 ; RV32I-NEXT: or a6, a6, a7
422 ; RV32I-NEXT: slli t0, t0, 16
423 ; RV32I-NEXT: slli a0, a0, 24
424 ; RV32I-NEXT: or a0, a0, t0
425 ; RV32I-NEXT: or a0, a0, a6
426 ; RV32I-NEXT: sll a0, a0, a5
427 ; RV32I-NEXT: not a5, a5
428 ; RV32I-NEXT: srli a3, a3, 1
429 ; RV32I-NEXT: srl a3, a3, a5
430 ; RV32I-NEXT: or a0, a0, a3
431 ; RV32I-NEXT: .LBB4_3:
432 ; RV32I-NEXT: srai a4, a4, 31
433 ; RV32I-NEXT: and a1, a4, a1
434 ; RV32I-NEXT: sb a1, 0(a2)
435 ; RV32I-NEXT: sb a0, 4(a2)
436 ; RV32I-NEXT: srli a3, a1, 16
437 ; RV32I-NEXT: sb a3, 2(a2)
438 ; RV32I-NEXT: srli a3, a1, 24
439 ; RV32I-NEXT: sb a3, 3(a2)
440 ; RV32I-NEXT: srli a1, a1, 8
441 ; RV32I-NEXT: sb a1, 1(a2)
442 ; RV32I-NEXT: srli a1, a0, 16
443 ; RV32I-NEXT: sb a1, 6(a2)
444 ; RV32I-NEXT: srli a1, a0, 24
445 ; RV32I-NEXT: sb a1, 7(a2)
446 ; RV32I-NEXT: srli a0, a0, 8
447 ; RV32I-NEXT: sb a0, 5(a2)
449 %src = load i64, ptr %src.ptr, align 1
450 %bitOff = load i64, ptr %bitOff.ptr, align 1
451 %res = shl i64 %src, %bitOff
452 store i64 %res, ptr %dst, align 1
455 define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
456 ; RV64I-LABEL: ashr_8bytes:
458 ; RV64I-NEXT: lbu a3, 1(a0)
459 ; RV64I-NEXT: lbu a4, 0(a0)
460 ; RV64I-NEXT: lbu a5, 2(a0)
461 ; RV64I-NEXT: lbu a6, 3(a0)
462 ; RV64I-NEXT: slli a3, a3, 8
463 ; RV64I-NEXT: or a3, a3, a4
464 ; RV64I-NEXT: slli a5, a5, 16
465 ; RV64I-NEXT: slli a6, a6, 24
466 ; RV64I-NEXT: or a4, a6, a5
467 ; RV64I-NEXT: or a3, a4, a3
468 ; RV64I-NEXT: lbu a4, 5(a0)
469 ; RV64I-NEXT: lbu a5, 4(a0)
470 ; RV64I-NEXT: lbu a6, 6(a0)
471 ; RV64I-NEXT: lbu a0, 7(a0)
472 ; RV64I-NEXT: slli a4, a4, 8
473 ; RV64I-NEXT: or a4, a4, a5
474 ; RV64I-NEXT: slli a6, a6, 16
475 ; RV64I-NEXT: slli a0, a0, 24
476 ; RV64I-NEXT: or a0, a0, a6
477 ; RV64I-NEXT: or a0, a0, a4
478 ; RV64I-NEXT: slli a0, a0, 32
479 ; RV64I-NEXT: or a0, a0, a3
480 ; RV64I-NEXT: lbu a3, 1(a1)
481 ; RV64I-NEXT: lbu a4, 0(a1)
482 ; RV64I-NEXT: lbu a5, 2(a1)
483 ; RV64I-NEXT: lbu a6, 3(a1)
484 ; RV64I-NEXT: slli a3, a3, 8
485 ; RV64I-NEXT: or a3, a3, a4
486 ; RV64I-NEXT: slli a5, a5, 16
487 ; RV64I-NEXT: slli a6, a6, 24
488 ; RV64I-NEXT: or a4, a6, a5
489 ; RV64I-NEXT: or a3, a4, a3
490 ; RV64I-NEXT: lbu a4, 5(a1)
491 ; RV64I-NEXT: lbu a5, 4(a1)
492 ; RV64I-NEXT: lbu a6, 6(a1)
493 ; RV64I-NEXT: lbu a1, 7(a1)
494 ; RV64I-NEXT: slli a4, a4, 8
495 ; RV64I-NEXT: or a4, a4, a5
496 ; RV64I-NEXT: slli a6, a6, 16
497 ; RV64I-NEXT: slli a1, a1, 24
498 ; RV64I-NEXT: or a1, a1, a6
499 ; RV64I-NEXT: or a1, a1, a4
500 ; RV64I-NEXT: slli a1, a1, 32
501 ; RV64I-NEXT: or a1, a1, a3
502 ; RV64I-NEXT: sra a0, a0, a1
503 ; RV64I-NEXT: sb a0, 0(a2)
504 ; RV64I-NEXT: srli a1, a0, 48
505 ; RV64I-NEXT: sb a1, 6(a2)
506 ; RV64I-NEXT: srli a1, a0, 56
507 ; RV64I-NEXT: sb a1, 7(a2)
508 ; RV64I-NEXT: srli a1, a0, 32
509 ; RV64I-NEXT: sb a1, 4(a2)
510 ; RV64I-NEXT: srli a1, a0, 40
511 ; RV64I-NEXT: sb a1, 5(a2)
512 ; RV64I-NEXT: srli a1, a0, 16
513 ; RV64I-NEXT: sb a1, 2(a2)
514 ; RV64I-NEXT: srli a1, a0, 24
515 ; RV64I-NEXT: sb a1, 3(a2)
516 ; RV64I-NEXT: srli a0, a0, 8
517 ; RV64I-NEXT: sb a0, 1(a2)
520 ; RV32I-LABEL: ashr_8bytes:
522 ; RV32I-NEXT: lbu a3, 5(a0)
523 ; RV32I-NEXT: lbu a4, 4(a0)
524 ; RV32I-NEXT: lbu a5, 6(a0)
525 ; RV32I-NEXT: lbu a6, 7(a0)
526 ; RV32I-NEXT: slli a3, a3, 8
527 ; RV32I-NEXT: or a3, a3, a4
528 ; RV32I-NEXT: slli a5, a5, 16
529 ; RV32I-NEXT: slli a4, a6, 24
530 ; RV32I-NEXT: or a5, a4, a5
531 ; RV32I-NEXT: or a3, a5, a3
532 ; RV32I-NEXT: lbu a5, 1(a1)
533 ; RV32I-NEXT: lbu a6, 0(a1)
534 ; RV32I-NEXT: lbu a7, 2(a1)
535 ; RV32I-NEXT: lbu a1, 3(a1)
536 ; RV32I-NEXT: slli a5, a5, 8
537 ; RV32I-NEXT: or a5, a5, a6
538 ; RV32I-NEXT: slli a7, a7, 16
539 ; RV32I-NEXT: slli a1, a1, 24
540 ; RV32I-NEXT: or a1, a1, a7
541 ; RV32I-NEXT: or a5, a1, a5
542 ; RV32I-NEXT: addi a6, a5, -32
543 ; RV32I-NEXT: sra a1, a3, a5
544 ; RV32I-NEXT: bltz a6, .LBB5_2
545 ; RV32I-NEXT: # %bb.1:
546 ; RV32I-NEXT: srai a4, a4, 31
547 ; RV32I-NEXT: mv a0, a1
548 ; RV32I-NEXT: mv a1, a4
549 ; RV32I-NEXT: j .LBB5_3
550 ; RV32I-NEXT: .LBB5_2:
551 ; RV32I-NEXT: lbu a4, 1(a0)
552 ; RV32I-NEXT: lbu a6, 0(a0)
553 ; RV32I-NEXT: lbu a7, 2(a0)
554 ; RV32I-NEXT: lbu a0, 3(a0)
555 ; RV32I-NEXT: slli a4, a4, 8
556 ; RV32I-NEXT: or a4, a4, a6
557 ; RV32I-NEXT: slli a7, a7, 16
558 ; RV32I-NEXT: slli a0, a0, 24
559 ; RV32I-NEXT: or a0, a0, a7
560 ; RV32I-NEXT: or a0, a0, a4
561 ; RV32I-NEXT: srl a0, a0, a5
562 ; RV32I-NEXT: not a4, a5
563 ; RV32I-NEXT: slli a3, a3, 1
564 ; RV32I-NEXT: sll a3, a3, a4
565 ; RV32I-NEXT: or a0, a0, a3
566 ; RV32I-NEXT: .LBB5_3:
567 ; RV32I-NEXT: sb a1, 4(a2)
568 ; RV32I-NEXT: srli a3, a1, 16
569 ; RV32I-NEXT: sb a3, 6(a2)
570 ; RV32I-NEXT: srli a3, a1, 24
571 ; RV32I-NEXT: sb a3, 7(a2)
572 ; RV32I-NEXT: srli a1, a1, 8
573 ; RV32I-NEXT: sb a1, 5(a2)
574 ; RV32I-NEXT: sb a0, 0(a2)
575 ; RV32I-NEXT: srli a1, a0, 16
576 ; RV32I-NEXT: sb a1, 2(a2)
577 ; RV32I-NEXT: srli a1, a0, 24
578 ; RV32I-NEXT: sb a1, 3(a2)
579 ; RV32I-NEXT: srli a0, a0, 8
580 ; RV32I-NEXT: sb a0, 1(a2)
582 %src = load i64, ptr %src.ptr, align 1
583 %bitOff = load i64, ptr %bitOff.ptr, align 1
584 %res = ashr i64 %src, %bitOff
585 store i64 %res, ptr %dst, align 1
589 define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
590 ; RV64I-LABEL: lshr_16bytes:
592 ; RV64I-NEXT: lbu a3, 9(a0)
593 ; RV64I-NEXT: lbu a4, 8(a0)
594 ; RV64I-NEXT: lbu a5, 10(a0)
595 ; RV64I-NEXT: lbu a6, 11(a0)
596 ; RV64I-NEXT: slli a3, a3, 8
597 ; RV64I-NEXT: or a3, a3, a4
598 ; RV64I-NEXT: slli a5, a5, 16
599 ; RV64I-NEXT: slli a6, a6, 24
600 ; RV64I-NEXT: or a4, a6, a5
601 ; RV64I-NEXT: or a3, a4, a3
602 ; RV64I-NEXT: lbu a4, 13(a0)
603 ; RV64I-NEXT: lbu a5, 12(a0)
604 ; RV64I-NEXT: lbu a6, 14(a0)
605 ; RV64I-NEXT: lbu a7, 15(a0)
606 ; RV64I-NEXT: slli a4, a4, 8
607 ; RV64I-NEXT: or a4, a4, a5
608 ; RV64I-NEXT: slli a6, a6, 16
609 ; RV64I-NEXT: slli a7, a7, 24
610 ; RV64I-NEXT: or a5, a7, a6
611 ; RV64I-NEXT: or a4, a5, a4
612 ; RV64I-NEXT: slli a4, a4, 32
613 ; RV64I-NEXT: or a3, a4, a3
614 ; RV64I-NEXT: lbu a4, 1(a1)
615 ; RV64I-NEXT: lbu a5, 0(a1)
616 ; RV64I-NEXT: lbu a6, 2(a1)
617 ; RV64I-NEXT: lbu a7, 3(a1)
618 ; RV64I-NEXT: slli a4, a4, 8
619 ; RV64I-NEXT: or a4, a4, a5
620 ; RV64I-NEXT: slli a6, a6, 16
621 ; RV64I-NEXT: slli a7, a7, 24
622 ; RV64I-NEXT: or a5, a7, a6
623 ; RV64I-NEXT: or a4, a5, a4
624 ; RV64I-NEXT: lbu a5, 5(a1)
625 ; RV64I-NEXT: lbu a6, 4(a1)
626 ; RV64I-NEXT: lbu a7, 6(a1)
627 ; RV64I-NEXT: lbu a1, 7(a1)
628 ; RV64I-NEXT: slli a5, a5, 8
629 ; RV64I-NEXT: or a5, a5, a6
630 ; RV64I-NEXT: slli a7, a7, 16
631 ; RV64I-NEXT: slli a1, a1, 24
632 ; RV64I-NEXT: or a1, a1, a7
633 ; RV64I-NEXT: or a1, a1, a5
634 ; RV64I-NEXT: slli a1, a1, 32
635 ; RV64I-NEXT: or a5, a1, a4
636 ; RV64I-NEXT: addi a4, a5, -64
637 ; RV64I-NEXT: srl a1, a3, a5
638 ; RV64I-NEXT: bltz a4, .LBB6_2
639 ; RV64I-NEXT: # %bb.1:
640 ; RV64I-NEXT: mv a0, a1
641 ; RV64I-NEXT: j .LBB6_3
642 ; RV64I-NEXT: .LBB6_2:
643 ; RV64I-NEXT: lbu a6, 1(a0)
644 ; RV64I-NEXT: lbu a7, 0(a0)
645 ; RV64I-NEXT: lbu t0, 2(a0)
646 ; RV64I-NEXT: lbu t1, 3(a0)
647 ; RV64I-NEXT: slli a6, a6, 8
648 ; RV64I-NEXT: or a6, a6, a7
649 ; RV64I-NEXT: slli t0, t0, 16
650 ; RV64I-NEXT: slli t1, t1, 24
651 ; RV64I-NEXT: or a7, t1, t0
652 ; RV64I-NEXT: or a6, a7, a6
653 ; RV64I-NEXT: lbu a7, 5(a0)
654 ; RV64I-NEXT: lbu t0, 4(a0)
655 ; RV64I-NEXT: lbu t1, 6(a0)
656 ; RV64I-NEXT: lbu a0, 7(a0)
657 ; RV64I-NEXT: slli a7, a7, 8
658 ; RV64I-NEXT: or a7, a7, t0
659 ; RV64I-NEXT: slli t1, t1, 16
660 ; RV64I-NEXT: slli a0, a0, 24
661 ; RV64I-NEXT: or a0, a0, t1
662 ; RV64I-NEXT: or a0, a0, a7
663 ; RV64I-NEXT: slli a0, a0, 32
664 ; RV64I-NEXT: or a0, a0, a6
665 ; RV64I-NEXT: srl a0, a0, a5
666 ; RV64I-NEXT: not a5, a5
667 ; RV64I-NEXT: slli a3, a3, 1
668 ; RV64I-NEXT: sll a3, a3, a5
669 ; RV64I-NEXT: or a0, a0, a3
670 ; RV64I-NEXT: .LBB6_3:
671 ; RV64I-NEXT: srai a4, a4, 63
672 ; RV64I-NEXT: and a1, a4, a1
673 ; RV64I-NEXT: sb a1, 8(a2)
674 ; RV64I-NEXT: srli a3, a1, 56
675 ; RV64I-NEXT: sb a3, 15(a2)
676 ; RV64I-NEXT: srli a3, a1, 48
677 ; RV64I-NEXT: sb a3, 14(a2)
678 ; RV64I-NEXT: srli a3, a1, 40
679 ; RV64I-NEXT: sb a3, 13(a2)
680 ; RV64I-NEXT: srli a3, a1, 32
681 ; RV64I-NEXT: sb a3, 12(a2)
682 ; RV64I-NEXT: srli a3, a1, 24
683 ; RV64I-NEXT: sb a3, 11(a2)
684 ; RV64I-NEXT: srli a3, a1, 16
685 ; RV64I-NEXT: sb a3, 10(a2)
686 ; RV64I-NEXT: srli a1, a1, 8
687 ; RV64I-NEXT: sb a1, 9(a2)
688 ; RV64I-NEXT: sb a0, 0(a2)
689 ; RV64I-NEXT: srli a1, a0, 56
690 ; RV64I-NEXT: sb a1, 7(a2)
691 ; RV64I-NEXT: srli a1, a0, 48
692 ; RV64I-NEXT: sb a1, 6(a2)
693 ; RV64I-NEXT: srli a1, a0, 40
694 ; RV64I-NEXT: sb a1, 5(a2)
695 ; RV64I-NEXT: srli a1, a0, 32
696 ; RV64I-NEXT: sb a1, 4(a2)
697 ; RV64I-NEXT: srli a1, a0, 24
698 ; RV64I-NEXT: sb a1, 3(a2)
699 ; RV64I-NEXT: srli a1, a0, 16
700 ; RV64I-NEXT: sb a1, 2(a2)
701 ; RV64I-NEXT: srli a0, a0, 8
702 ; RV64I-NEXT: sb a0, 1(a2)
705 ; RV32I-LABEL: lshr_16bytes:
707 ; RV32I-NEXT: addi sp, sp, -64
708 ; RV32I-NEXT: sw s0, 60(sp) # 4-byte Folded Spill
709 ; RV32I-NEXT: sw s1, 56(sp) # 4-byte Folded Spill
710 ; RV32I-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
711 ; RV32I-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
712 ; RV32I-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
713 ; RV32I-NEXT: lbu a3, 0(a0)
714 ; RV32I-NEXT: lbu a4, 1(a0)
715 ; RV32I-NEXT: lbu a5, 2(a0)
716 ; RV32I-NEXT: lbu a6, 3(a0)
717 ; RV32I-NEXT: lbu a7, 4(a0)
718 ; RV32I-NEXT: lbu t0, 5(a0)
719 ; RV32I-NEXT: lbu t1, 6(a0)
720 ; RV32I-NEXT: lbu t2, 7(a0)
721 ; RV32I-NEXT: lbu t3, 8(a0)
722 ; RV32I-NEXT: lbu t4, 9(a0)
723 ; RV32I-NEXT: lbu t5, 10(a0)
724 ; RV32I-NEXT: lbu t6, 11(a0)
725 ; RV32I-NEXT: lbu s0, 1(a1)
726 ; RV32I-NEXT: lbu s1, 0(a1)
727 ; RV32I-NEXT: lbu s2, 12(a0)
728 ; RV32I-NEXT: lbu s3, 13(a0)
729 ; RV32I-NEXT: slli s0, s0, 8
730 ; RV32I-NEXT: or s0, s0, s1
731 ; RV32I-NEXT: lbu s1, 2(a1)
732 ; RV32I-NEXT: lbu a1, 3(a1)
733 ; RV32I-NEXT: lbu s4, 14(a0)
734 ; RV32I-NEXT: lbu a0, 15(a0)
735 ; RV32I-NEXT: slli s1, s1, 16
736 ; RV32I-NEXT: slli a1, a1, 24
737 ; RV32I-NEXT: or a1, a1, s1
738 ; RV32I-NEXT: or a1, a1, s0
739 ; RV32I-NEXT: sb zero, 43(sp)
740 ; RV32I-NEXT: sb zero, 42(sp)
741 ; RV32I-NEXT: sb zero, 41(sp)
742 ; RV32I-NEXT: sb zero, 40(sp)
743 ; RV32I-NEXT: sb zero, 39(sp)
744 ; RV32I-NEXT: sb zero, 38(sp)
745 ; RV32I-NEXT: sb zero, 37(sp)
746 ; RV32I-NEXT: sb zero, 36(sp)
747 ; RV32I-NEXT: sb zero, 35(sp)
748 ; RV32I-NEXT: sb zero, 34(sp)
749 ; RV32I-NEXT: sb zero, 33(sp)
750 ; RV32I-NEXT: sb zero, 32(sp)
751 ; RV32I-NEXT: sb zero, 31(sp)
752 ; RV32I-NEXT: sb zero, 30(sp)
753 ; RV32I-NEXT: sb zero, 29(sp)
754 ; RV32I-NEXT: sb zero, 28(sp)
755 ; RV32I-NEXT: sb a0, 27(sp)
756 ; RV32I-NEXT: sb s4, 26(sp)
757 ; RV32I-NEXT: sb s3, 25(sp)
758 ; RV32I-NEXT: sb s2, 24(sp)
759 ; RV32I-NEXT: sb t6, 23(sp)
760 ; RV32I-NEXT: sb t5, 22(sp)
761 ; RV32I-NEXT: sb t4, 21(sp)
762 ; RV32I-NEXT: sb t3, 20(sp)
763 ; RV32I-NEXT: sb t2, 19(sp)
764 ; RV32I-NEXT: sb t1, 18(sp)
765 ; RV32I-NEXT: sb t0, 17(sp)
766 ; RV32I-NEXT: sb a7, 16(sp)
767 ; RV32I-NEXT: sb a6, 15(sp)
768 ; RV32I-NEXT: sb a5, 14(sp)
769 ; RV32I-NEXT: sb a4, 13(sp)
770 ; RV32I-NEXT: sb a3, 12(sp)
771 ; RV32I-NEXT: slli a0, a1, 25
772 ; RV32I-NEXT: srli a0, a0, 28
773 ; RV32I-NEXT: addi a3, sp, 12
774 ; RV32I-NEXT: add a3, a3, a0
775 ; RV32I-NEXT: lbu a0, 5(a3)
776 ; RV32I-NEXT: lbu a4, 4(a3)
777 ; RV32I-NEXT: lbu a5, 6(a3)
778 ; RV32I-NEXT: lbu a6, 7(a3)
779 ; RV32I-NEXT: slli a0, a0, 8
780 ; RV32I-NEXT: or a0, a0, a4
781 ; RV32I-NEXT: slli a5, a5, 16
782 ; RV32I-NEXT: slli a6, a6, 24
783 ; RV32I-NEXT: or a4, a6, a5
784 ; RV32I-NEXT: or a5, a4, a0
785 ; RV32I-NEXT: andi a4, a1, 7
786 ; RV32I-NEXT: srl a0, a5, a4
787 ; RV32I-NEXT: lbu a1, 9(a3)
788 ; RV32I-NEXT: lbu a6, 8(a3)
789 ; RV32I-NEXT: lbu a7, 10(a3)
790 ; RV32I-NEXT: lbu t0, 11(a3)
791 ; RV32I-NEXT: slli a1, a1, 8
792 ; RV32I-NEXT: or a1, a1, a6
793 ; RV32I-NEXT: slli a7, a7, 16
794 ; RV32I-NEXT: slli t0, t0, 24
795 ; RV32I-NEXT: or a6, t0, a7
796 ; RV32I-NEXT: or a6, a6, a1
797 ; RV32I-NEXT: slli a1, a6, 1
798 ; RV32I-NEXT: not a7, a4
799 ; RV32I-NEXT: sll a1, a1, a7
800 ; RV32I-NEXT: or a1, a0, a1
801 ; RV32I-NEXT: lbu a7, 1(a3)
802 ; RV32I-NEXT: lbu t0, 0(a3)
803 ; RV32I-NEXT: lbu t1, 2(a3)
804 ; RV32I-NEXT: lbu t2, 3(a3)
805 ; RV32I-NEXT: slli a7, a7, 8
806 ; RV32I-NEXT: or a7, a7, t0
807 ; RV32I-NEXT: slli t1, t1, 16
808 ; RV32I-NEXT: slli t2, t2, 24
809 ; RV32I-NEXT: or t0, t2, t1
810 ; RV32I-NEXT: or a7, t0, a7
811 ; RV32I-NEXT: srl a7, a7, a4
812 ; RV32I-NEXT: slli a5, a5, 1
813 ; RV32I-NEXT: xori t0, a4, 31
814 ; RV32I-NEXT: sll a5, a5, t0
815 ; RV32I-NEXT: or a5, a7, a5
816 ; RV32I-NEXT: srl a6, a6, a4
817 ; RV32I-NEXT: lbu t1, 13(a3)
818 ; RV32I-NEXT: lbu t2, 12(a3)
819 ; RV32I-NEXT: lbu t3, 14(a3)
820 ; RV32I-NEXT: lbu a3, 15(a3)
821 ; RV32I-NEXT: slli t1, t1, 8
822 ; RV32I-NEXT: or t1, t1, t2
823 ; RV32I-NEXT: slli t3, t3, 16
824 ; RV32I-NEXT: slli a3, a3, 24
825 ; RV32I-NEXT: or a3, a3, t3
826 ; RV32I-NEXT: or a3, a3, t1
827 ; RV32I-NEXT: slli t1, a3, 1
828 ; RV32I-NEXT: sll t0, t1, t0
829 ; RV32I-NEXT: or t0, a6, t0
830 ; RV32I-NEXT: srl a3, a3, a4
831 ; RV32I-NEXT: sb a6, 8(a2)
832 ; RV32I-NEXT: sb a3, 12(a2)
833 ; RV32I-NEXT: sb a7, 0(a2)
834 ; RV32I-NEXT: sb a0, 4(a2)
835 ; RV32I-NEXT: srli a4, a6, 16
836 ; RV32I-NEXT: sb a4, 10(a2)
837 ; RV32I-NEXT: srli a4, a6, 8
838 ; RV32I-NEXT: sb a4, 9(a2)
839 ; RV32I-NEXT: srli a4, a3, 16
840 ; RV32I-NEXT: sb a4, 14(a2)
841 ; RV32I-NEXT: srli a4, a3, 24
842 ; RV32I-NEXT: sb a4, 15(a2)
843 ; RV32I-NEXT: srli a3, a3, 8
844 ; RV32I-NEXT: sb a3, 13(a2)
845 ; RV32I-NEXT: srli a3, a7, 16
846 ; RV32I-NEXT: sb a3, 2(a2)
847 ; RV32I-NEXT: srli a3, a7, 8
848 ; RV32I-NEXT: sb a3, 1(a2)
849 ; RV32I-NEXT: srli a3, a0, 16
850 ; RV32I-NEXT: sb a3, 6(a2)
851 ; RV32I-NEXT: srli a0, a0, 8
852 ; RV32I-NEXT: sb a0, 5(a2)
853 ; RV32I-NEXT: srli a0, t0, 24
854 ; RV32I-NEXT: sb a0, 11(a2)
855 ; RV32I-NEXT: srli a5, a5, 24
856 ; RV32I-NEXT: sb a5, 3(a2)
857 ; RV32I-NEXT: srli a1, a1, 24
858 ; RV32I-NEXT: sb a1, 7(a2)
859 ; RV32I-NEXT: lw s0, 60(sp) # 4-byte Folded Reload
860 ; RV32I-NEXT: lw s1, 56(sp) # 4-byte Folded Reload
861 ; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
862 ; RV32I-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
863 ; RV32I-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
864 ; RV32I-NEXT: addi sp, sp, 64
866 %src = load i128, ptr %src.ptr, align 1
867 %bitOff = load i128, ptr %bitOff.ptr, align 1
868 %res = lshr i128 %src, %bitOff
869 store i128 %res, ptr %dst, align 1
872 define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
873 ; RV64I-LABEL: shl_16bytes:
875 ; RV64I-NEXT: lbu a3, 1(a0)
876 ; RV64I-NEXT: lbu a4, 0(a0)
877 ; RV64I-NEXT: lbu a5, 2(a0)
878 ; RV64I-NEXT: lbu a6, 3(a0)
879 ; RV64I-NEXT: slli a3, a3, 8
880 ; RV64I-NEXT: or a3, a3, a4
881 ; RV64I-NEXT: slli a5, a5, 16
882 ; RV64I-NEXT: slli a6, a6, 24
883 ; RV64I-NEXT: or a4, a6, a5
884 ; RV64I-NEXT: or a3, a4, a3
885 ; RV64I-NEXT: lbu a4, 5(a0)
886 ; RV64I-NEXT: lbu a5, 4(a0)
887 ; RV64I-NEXT: lbu a6, 6(a0)
888 ; RV64I-NEXT: lbu a7, 7(a0)
889 ; RV64I-NEXT: slli a4, a4, 8
890 ; RV64I-NEXT: or a4, a4, a5
891 ; RV64I-NEXT: slli a6, a6, 16
892 ; RV64I-NEXT: slli a7, a7, 24
893 ; RV64I-NEXT: or a5, a7, a6
894 ; RV64I-NEXT: or a4, a5, a4
895 ; RV64I-NEXT: slli a4, a4, 32
896 ; RV64I-NEXT: or a3, a4, a3
897 ; RV64I-NEXT: lbu a4, 1(a1)
898 ; RV64I-NEXT: lbu a5, 0(a1)
899 ; RV64I-NEXT: lbu a6, 2(a1)
900 ; RV64I-NEXT: lbu a7, 3(a1)
901 ; RV64I-NEXT: slli a4, a4, 8
902 ; RV64I-NEXT: or a4, a4, a5
903 ; RV64I-NEXT: slli a6, a6, 16
904 ; RV64I-NEXT: slli a7, a7, 24
905 ; RV64I-NEXT: or a5, a7, a6
906 ; RV64I-NEXT: or a4, a5, a4
907 ; RV64I-NEXT: lbu a5, 5(a1)
908 ; RV64I-NEXT: lbu a6, 4(a1)
909 ; RV64I-NEXT: lbu a7, 6(a1)
910 ; RV64I-NEXT: lbu a1, 7(a1)
911 ; RV64I-NEXT: slli a5, a5, 8
912 ; RV64I-NEXT: or a5, a5, a6
913 ; RV64I-NEXT: slli a7, a7, 16
914 ; RV64I-NEXT: slli a1, a1, 24
915 ; RV64I-NEXT: or a1, a1, a7
916 ; RV64I-NEXT: or a1, a1, a5
917 ; RV64I-NEXT: slli a1, a1, 32
918 ; RV64I-NEXT: or a5, a1, a4
919 ; RV64I-NEXT: addi a4, a5, -64
920 ; RV64I-NEXT: sll a1, a3, a5
921 ; RV64I-NEXT: bltz a4, .LBB7_2
922 ; RV64I-NEXT: # %bb.1:
923 ; RV64I-NEXT: mv a0, a1
924 ; RV64I-NEXT: j .LBB7_3
925 ; RV64I-NEXT: .LBB7_2:
926 ; RV64I-NEXT: lbu a6, 9(a0)
927 ; RV64I-NEXT: lbu a7, 8(a0)
928 ; RV64I-NEXT: lbu t0, 10(a0)
929 ; RV64I-NEXT: lbu t1, 11(a0)
930 ; RV64I-NEXT: slli a6, a6, 8
931 ; RV64I-NEXT: or a6, a6, a7
932 ; RV64I-NEXT: slli t0, t0, 16
933 ; RV64I-NEXT: slli t1, t1, 24
934 ; RV64I-NEXT: or a7, t1, t0
935 ; RV64I-NEXT: or a6, a7, a6
936 ; RV64I-NEXT: lbu a7, 13(a0)
937 ; RV64I-NEXT: lbu t0, 12(a0)
938 ; RV64I-NEXT: lbu t1, 14(a0)
939 ; RV64I-NEXT: lbu a0, 15(a0)
940 ; RV64I-NEXT: slli a7, a7, 8
941 ; RV64I-NEXT: or a7, a7, t0
942 ; RV64I-NEXT: slli t1, t1, 16
943 ; RV64I-NEXT: slli a0, a0, 24
944 ; RV64I-NEXT: or a0, a0, t1
945 ; RV64I-NEXT: or a0, a0, a7
946 ; RV64I-NEXT: slli a0, a0, 32
947 ; RV64I-NEXT: or a0, a0, a6
948 ; RV64I-NEXT: sll a0, a0, a5
949 ; RV64I-NEXT: not a5, a5
950 ; RV64I-NEXT: srli a3, a3, 1
951 ; RV64I-NEXT: srl a3, a3, a5
952 ; RV64I-NEXT: or a0, a0, a3
953 ; RV64I-NEXT: .LBB7_3:
954 ; RV64I-NEXT: srai a4, a4, 63
955 ; RV64I-NEXT: and a1, a4, a1
956 ; RV64I-NEXT: sb a1, 0(a2)
957 ; RV64I-NEXT: sb a0, 8(a2)
958 ; RV64I-NEXT: srli a3, a1, 56
959 ; RV64I-NEXT: sb a3, 7(a2)
960 ; RV64I-NEXT: srli a3, a1, 48
961 ; RV64I-NEXT: sb a3, 6(a2)
962 ; RV64I-NEXT: srli a3, a1, 40
963 ; RV64I-NEXT: sb a3, 5(a2)
964 ; RV64I-NEXT: srli a3, a1, 32
965 ; RV64I-NEXT: sb a3, 4(a2)
966 ; RV64I-NEXT: srli a3, a1, 24
967 ; RV64I-NEXT: sb a3, 3(a2)
968 ; RV64I-NEXT: srli a3, a1, 16
969 ; RV64I-NEXT: sb a3, 2(a2)
970 ; RV64I-NEXT: srli a1, a1, 8
971 ; RV64I-NEXT: sb a1, 1(a2)
972 ; RV64I-NEXT: srli a1, a0, 56
973 ; RV64I-NEXT: sb a1, 15(a2)
974 ; RV64I-NEXT: srli a1, a0, 48
975 ; RV64I-NEXT: sb a1, 14(a2)
976 ; RV64I-NEXT: srli a1, a0, 40
977 ; RV64I-NEXT: sb a1, 13(a2)
978 ; RV64I-NEXT: srli a1, a0, 32
979 ; RV64I-NEXT: sb a1, 12(a2)
980 ; RV64I-NEXT: srli a1, a0, 24
981 ; RV64I-NEXT: sb a1, 11(a2)
982 ; RV64I-NEXT: srli a1, a0, 16
983 ; RV64I-NEXT: sb a1, 10(a2)
984 ; RV64I-NEXT: srli a0, a0, 8
985 ; RV64I-NEXT: sb a0, 9(a2)
988 ; RV32I-LABEL: shl_16bytes:
990 ; RV32I-NEXT: addi sp, sp, -64
991 ; RV32I-NEXT: sw s0, 60(sp) # 4-byte Folded Spill
992 ; RV32I-NEXT: sw s1, 56(sp) # 4-byte Folded Spill
993 ; RV32I-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
994 ; RV32I-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
995 ; RV32I-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
996 ; RV32I-NEXT: lbu a3, 0(a0)
997 ; RV32I-NEXT: lbu a4, 1(a0)
998 ; RV32I-NEXT: lbu a5, 2(a0)
999 ; RV32I-NEXT: lbu a6, 3(a0)
1000 ; RV32I-NEXT: lbu a7, 4(a0)
1001 ; RV32I-NEXT: lbu t0, 5(a0)
1002 ; RV32I-NEXT: lbu t1, 6(a0)
1003 ; RV32I-NEXT: lbu t2, 7(a0)
1004 ; RV32I-NEXT: lbu t3, 8(a0)
1005 ; RV32I-NEXT: lbu t4, 9(a0)
1006 ; RV32I-NEXT: lbu t5, 10(a0)
1007 ; RV32I-NEXT: lbu t6, 11(a0)
1008 ; RV32I-NEXT: lbu s0, 1(a1)
1009 ; RV32I-NEXT: lbu s1, 0(a1)
1010 ; RV32I-NEXT: lbu s2, 12(a0)
1011 ; RV32I-NEXT: lbu s3, 13(a0)
1012 ; RV32I-NEXT: slli s0, s0, 8
1013 ; RV32I-NEXT: or s0, s0, s1
1014 ; RV32I-NEXT: lbu s1, 2(a1)
1015 ; RV32I-NEXT: lbu a1, 3(a1)
1016 ; RV32I-NEXT: lbu s4, 14(a0)
1017 ; RV32I-NEXT: lbu a0, 15(a0)
1018 ; RV32I-NEXT: slli s1, s1, 16
1019 ; RV32I-NEXT: slli a1, a1, 24
1020 ; RV32I-NEXT: or a1, a1, s1
1021 ; RV32I-NEXT: or a1, a1, s0
1022 ; RV32I-NEXT: sb zero, 27(sp)
1023 ; RV32I-NEXT: sb zero, 26(sp)
1024 ; RV32I-NEXT: sb zero, 25(sp)
1025 ; RV32I-NEXT: sb zero, 24(sp)
1026 ; RV32I-NEXT: sb zero, 23(sp)
1027 ; RV32I-NEXT: sb zero, 22(sp)
1028 ; RV32I-NEXT: sb zero, 21(sp)
1029 ; RV32I-NEXT: sb zero, 20(sp)
1030 ; RV32I-NEXT: sb zero, 19(sp)
1031 ; RV32I-NEXT: sb zero, 18(sp)
1032 ; RV32I-NEXT: sb zero, 17(sp)
1033 ; RV32I-NEXT: sb zero, 16(sp)
1034 ; RV32I-NEXT: sb zero, 15(sp)
1035 ; RV32I-NEXT: sb zero, 14(sp)
1036 ; RV32I-NEXT: sb zero, 13(sp)
1037 ; RV32I-NEXT: sb zero, 12(sp)
1038 ; RV32I-NEXT: sb a0, 43(sp)
1039 ; RV32I-NEXT: sb s4, 42(sp)
1040 ; RV32I-NEXT: sb s3, 41(sp)
1041 ; RV32I-NEXT: sb s2, 40(sp)
1042 ; RV32I-NEXT: sb t6, 39(sp)
1043 ; RV32I-NEXT: sb t5, 38(sp)
1044 ; RV32I-NEXT: sb t4, 37(sp)
1045 ; RV32I-NEXT: sb t3, 36(sp)
1046 ; RV32I-NEXT: sb t2, 35(sp)
1047 ; RV32I-NEXT: sb t1, 34(sp)
1048 ; RV32I-NEXT: sb t0, 33(sp)
1049 ; RV32I-NEXT: sb a7, 32(sp)
1050 ; RV32I-NEXT: sb a6, 31(sp)
1051 ; RV32I-NEXT: sb a5, 30(sp)
1052 ; RV32I-NEXT: sb a4, 29(sp)
1053 ; RV32I-NEXT: sb a3, 28(sp)
1054 ; RV32I-NEXT: slli a0, a1, 25
1055 ; RV32I-NEXT: srli a0, a0, 28
1056 ; RV32I-NEXT: addi a3, sp, 28
1057 ; RV32I-NEXT: sub a3, a3, a0
1058 ; RV32I-NEXT: lbu a0, 5(a3)
1059 ; RV32I-NEXT: lbu a4, 4(a3)
1060 ; RV32I-NEXT: lbu a5, 6(a3)
1061 ; RV32I-NEXT: lbu a6, 7(a3)
1062 ; RV32I-NEXT: slli a0, a0, 8
1063 ; RV32I-NEXT: or a0, a0, a4
1064 ; RV32I-NEXT: slli a5, a5, 16
1065 ; RV32I-NEXT: slli a6, a6, 24
1066 ; RV32I-NEXT: or a4, a6, a5
1067 ; RV32I-NEXT: or a5, a4, a0
1068 ; RV32I-NEXT: andi a4, a1, 7
1069 ; RV32I-NEXT: sll a0, a5, a4
1070 ; RV32I-NEXT: lbu a1, 1(a3)
1071 ; RV32I-NEXT: lbu a6, 0(a3)
1072 ; RV32I-NEXT: lbu a7, 2(a3)
1073 ; RV32I-NEXT: lbu t0, 3(a3)
1074 ; RV32I-NEXT: slli a1, a1, 8
1075 ; RV32I-NEXT: or a1, a1, a6
1076 ; RV32I-NEXT: slli a7, a7, 16
1077 ; RV32I-NEXT: slli t0, t0, 24
1078 ; RV32I-NEXT: or a6, t0, a7
1079 ; RV32I-NEXT: or a6, a6, a1
1080 ; RV32I-NEXT: srli a1, a6, 1
1081 ; RV32I-NEXT: xori a7, a4, 31
1082 ; RV32I-NEXT: srl a1, a1, a7
1083 ; RV32I-NEXT: or a1, a0, a1
1084 ; RV32I-NEXT: lbu t0, 13(a3)
1085 ; RV32I-NEXT: lbu t1, 12(a3)
1086 ; RV32I-NEXT: lbu t2, 14(a3)
1087 ; RV32I-NEXT: lbu t3, 15(a3)
1088 ; RV32I-NEXT: slli t0, t0, 8
1089 ; RV32I-NEXT: or t0, t0, t1
1090 ; RV32I-NEXT: slli t2, t2, 16
1091 ; RV32I-NEXT: slli t3, t3, 24
1092 ; RV32I-NEXT: or t1, t3, t2
1093 ; RV32I-NEXT: or t0, t1, t0
1094 ; RV32I-NEXT: sll t0, t0, a4
1095 ; RV32I-NEXT: lbu t1, 9(a3)
1096 ; RV32I-NEXT: lbu t2, 8(a3)
1097 ; RV32I-NEXT: lbu t3, 10(a3)
1098 ; RV32I-NEXT: lbu a3, 11(a3)
1099 ; RV32I-NEXT: slli t1, t1, 8
1100 ; RV32I-NEXT: or t1, t1, t2
1101 ; RV32I-NEXT: slli t3, t3, 16
1102 ; RV32I-NEXT: slli a3, a3, 24
1103 ; RV32I-NEXT: or a3, a3, t3
1104 ; RV32I-NEXT: or a3, a3, t1
1105 ; RV32I-NEXT: srli t1, a3, 1
1106 ; RV32I-NEXT: srl a7, t1, a7
1107 ; RV32I-NEXT: or a7, t0, a7
1108 ; RV32I-NEXT: sll a3, a3, a4
1109 ; RV32I-NEXT: srli a5, a5, 1
1110 ; RV32I-NEXT: not t1, a4
1111 ; RV32I-NEXT: srl a5, a5, t1
1112 ; RV32I-NEXT: or a5, a3, a5
1113 ; RV32I-NEXT: sll a4, a6, a4
1114 ; RV32I-NEXT: sb a4, 0(a2)
1115 ; RV32I-NEXT: srli a6, a3, 16
1116 ; RV32I-NEXT: sb a6, 10(a2)
1117 ; RV32I-NEXT: srli a6, a3, 24
1118 ; RV32I-NEXT: sb a6, 11(a2)
1119 ; RV32I-NEXT: srli a3, a3, 8
1120 ; RV32I-NEXT: sb a3, 9(a2)
1121 ; RV32I-NEXT: srli a3, t0, 16
1122 ; RV32I-NEXT: sb a3, 14(a2)
1123 ; RV32I-NEXT: srli a3, t0, 24
1124 ; RV32I-NEXT: sb a3, 15(a2)
1125 ; RV32I-NEXT: srli a3, t0, 8
1126 ; RV32I-NEXT: sb a3, 13(a2)
1127 ; RV32I-NEXT: srli a3, a4, 16
1128 ; RV32I-NEXT: sb a3, 2(a2)
1129 ; RV32I-NEXT: srli a3, a4, 24
1130 ; RV32I-NEXT: sb a3, 3(a2)
1131 ; RV32I-NEXT: srli a4, a4, 8
1132 ; RV32I-NEXT: sb a4, 1(a2)
1133 ; RV32I-NEXT: srli a3, a0, 16
1134 ; RV32I-NEXT: sb a3, 6(a2)
1135 ; RV32I-NEXT: srli a3, a0, 24
1136 ; RV32I-NEXT: sb a3, 7(a2)
1137 ; RV32I-NEXT: srli a0, a0, 8
1138 ; RV32I-NEXT: sb a0, 5(a2)
1139 ; RV32I-NEXT: sb a5, 8(a2)
1140 ; RV32I-NEXT: sb a7, 12(a2)
1141 ; RV32I-NEXT: sb a1, 4(a2)
1142 ; RV32I-NEXT: lw s0, 60(sp) # 4-byte Folded Reload
1143 ; RV32I-NEXT: lw s1, 56(sp) # 4-byte Folded Reload
1144 ; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
1145 ; RV32I-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
1146 ; RV32I-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
1147 ; RV32I-NEXT: addi sp, sp, 64
1149 %src = load i128, ptr %src.ptr, align 1
1150 %bitOff = load i128, ptr %bitOff.ptr, align 1
1151 %res = shl i128 %src, %bitOff
1152 store i128 %res, ptr %dst, align 1
1155 define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
1156 ; RV64I-LABEL: ashr_16bytes:
1158 ; RV64I-NEXT: lbu a3, 9(a0)
1159 ; RV64I-NEXT: lbu a4, 8(a0)
1160 ; RV64I-NEXT: lbu a5, 10(a0)
1161 ; RV64I-NEXT: lbu a6, 11(a0)
1162 ; RV64I-NEXT: slli a3, a3, 8
1163 ; RV64I-NEXT: or a3, a3, a4
1164 ; RV64I-NEXT: slli a5, a5, 16
1165 ; RV64I-NEXT: slli a6, a6, 24
1166 ; RV64I-NEXT: or a4, a6, a5
1167 ; RV64I-NEXT: or a3, a4, a3
1168 ; RV64I-NEXT: lbu a4, 13(a0)
1169 ; RV64I-NEXT: lbu a5, 12(a0)
1170 ; RV64I-NEXT: lbu a6, 14(a0)
1171 ; RV64I-NEXT: lbu a7, 15(a0)
1172 ; RV64I-NEXT: slli a4, a4, 8
1173 ; RV64I-NEXT: or a4, a4, a5
1174 ; RV64I-NEXT: slli a6, a6, 16
1175 ; RV64I-NEXT: slli a7, a7, 24
1176 ; RV64I-NEXT: or a5, a7, a6
1177 ; RV64I-NEXT: or a4, a5, a4
1178 ; RV64I-NEXT: slli a5, a4, 32
1179 ; RV64I-NEXT: or a3, a5, a3
1180 ; RV64I-NEXT: lbu a5, 1(a1)
1181 ; RV64I-NEXT: lbu a6, 0(a1)
1182 ; RV64I-NEXT: lbu a7, 2(a1)
1183 ; RV64I-NEXT: lbu t0, 3(a1)
1184 ; RV64I-NEXT: slli a5, a5, 8
1185 ; RV64I-NEXT: or a5, a5, a6
1186 ; RV64I-NEXT: slli a7, a7, 16
1187 ; RV64I-NEXT: slli t0, t0, 24
1188 ; RV64I-NEXT: or a6, t0, a7
1189 ; RV64I-NEXT: or a5, a6, a5
1190 ; RV64I-NEXT: lbu a6, 5(a1)
1191 ; RV64I-NEXT: lbu a7, 4(a1)
1192 ; RV64I-NEXT: lbu t0, 6(a1)
1193 ; RV64I-NEXT: lbu a1, 7(a1)
1194 ; RV64I-NEXT: slli a6, a6, 8
1195 ; RV64I-NEXT: or a6, a6, a7
1196 ; RV64I-NEXT: slli t0, t0, 16
1197 ; RV64I-NEXT: slli a1, a1, 24
1198 ; RV64I-NEXT: or a1, a1, t0
1199 ; RV64I-NEXT: or a1, a1, a6
1200 ; RV64I-NEXT: slli a1, a1, 32
1201 ; RV64I-NEXT: or a5, a1, a5
1202 ; RV64I-NEXT: addi a6, a5, -64
1203 ; RV64I-NEXT: sra a1, a3, a5
1204 ; RV64I-NEXT: bltz a6, .LBB8_2
1205 ; RV64I-NEXT: # %bb.1:
1206 ; RV64I-NEXT: sraiw a3, a4, 31
1207 ; RV64I-NEXT: mv a0, a1
1208 ; RV64I-NEXT: mv a1, a3
1209 ; RV64I-NEXT: j .LBB8_3
1210 ; RV64I-NEXT: .LBB8_2:
1211 ; RV64I-NEXT: lbu a4, 1(a0)
1212 ; RV64I-NEXT: lbu a6, 0(a0)
1213 ; RV64I-NEXT: lbu a7, 2(a0)
1214 ; RV64I-NEXT: lbu t0, 3(a0)
1215 ; RV64I-NEXT: slli a4, a4, 8
1216 ; RV64I-NEXT: or a4, a4, a6
1217 ; RV64I-NEXT: slli a7, a7, 16
1218 ; RV64I-NEXT: slli t0, t0, 24
1219 ; RV64I-NEXT: or a6, t0, a7
1220 ; RV64I-NEXT: or a4, a6, a4
1221 ; RV64I-NEXT: lbu a6, 5(a0)
1222 ; RV64I-NEXT: lbu a7, 4(a0)
1223 ; RV64I-NEXT: lbu t0, 6(a0)
1224 ; RV64I-NEXT: lbu a0, 7(a0)
1225 ; RV64I-NEXT: slli a6, a6, 8
1226 ; RV64I-NEXT: or a6, a6, a7
1227 ; RV64I-NEXT: slli t0, t0, 16
1228 ; RV64I-NEXT: slli a0, a0, 24
1229 ; RV64I-NEXT: or a0, a0, t0
1230 ; RV64I-NEXT: or a0, a0, a6
1231 ; RV64I-NEXT: slli a0, a0, 32
1232 ; RV64I-NEXT: or a0, a0, a4
1233 ; RV64I-NEXT: srl a0, a0, a5
1234 ; RV64I-NEXT: not a4, a5
1235 ; RV64I-NEXT: slli a3, a3, 1
1236 ; RV64I-NEXT: sll a3, a3, a4
1237 ; RV64I-NEXT: or a0, a0, a3
1238 ; RV64I-NEXT: .LBB8_3:
1239 ; RV64I-NEXT: sb a1, 8(a2)
1240 ; RV64I-NEXT: srli a3, a1, 56
1241 ; RV64I-NEXT: sb a3, 15(a2)
1242 ; RV64I-NEXT: srli a3, a1, 48
1243 ; RV64I-NEXT: sb a3, 14(a2)
1244 ; RV64I-NEXT: srli a3, a1, 40
1245 ; RV64I-NEXT: sb a3, 13(a2)
1246 ; RV64I-NEXT: srli a3, a1, 32
1247 ; RV64I-NEXT: sb a3, 12(a2)
1248 ; RV64I-NEXT: srli a3, a1, 24
1249 ; RV64I-NEXT: sb a3, 11(a2)
1250 ; RV64I-NEXT: srli a3, a1, 16
1251 ; RV64I-NEXT: sb a3, 10(a2)
1252 ; RV64I-NEXT: srli a1, a1, 8
1253 ; RV64I-NEXT: sb a1, 9(a2)
1254 ; RV64I-NEXT: sb a0, 0(a2)
1255 ; RV64I-NEXT: srli a1, a0, 56
1256 ; RV64I-NEXT: sb a1, 7(a2)
1257 ; RV64I-NEXT: srli a1, a0, 48
1258 ; RV64I-NEXT: sb a1, 6(a2)
1259 ; RV64I-NEXT: srli a1, a0, 40
1260 ; RV64I-NEXT: sb a1, 5(a2)
1261 ; RV64I-NEXT: srli a1, a0, 32
1262 ; RV64I-NEXT: sb a1, 4(a2)
1263 ; RV64I-NEXT: srli a1, a0, 24
1264 ; RV64I-NEXT: sb a1, 3(a2)
1265 ; RV64I-NEXT: srli a1, a0, 16
1266 ; RV64I-NEXT: sb a1, 2(a2)
1267 ; RV64I-NEXT: srli a0, a0, 8
1268 ; RV64I-NEXT: sb a0, 1(a2)
1271 ; RV32I-LABEL: ashr_16bytes:
1273 ; RV32I-NEXT: addi sp, sp, -64
1274 ; RV32I-NEXT: sw s0, 60(sp) # 4-byte Folded Spill
1275 ; RV32I-NEXT: sw s1, 56(sp) # 4-byte Folded Spill
1276 ; RV32I-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
1277 ; RV32I-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
1278 ; RV32I-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
1279 ; RV32I-NEXT: sw s5, 40(sp) # 4-byte Folded Spill
1280 ; RV32I-NEXT: lbu a3, 15(a0)
1281 ; RV32I-NEXT: slli a4, a3, 24
1282 ; RV32I-NEXT: lbu a5, 0(a0)
1283 ; RV32I-NEXT: lbu a6, 1(a0)
1284 ; RV32I-NEXT: lbu a7, 2(a0)
1285 ; RV32I-NEXT: lbu t0, 3(a0)
1286 ; RV32I-NEXT: lbu t1, 4(a0)
1287 ; RV32I-NEXT: lbu t2, 5(a0)
1288 ; RV32I-NEXT: lbu t3, 6(a0)
1289 ; RV32I-NEXT: lbu t4, 7(a0)
1290 ; RV32I-NEXT: lbu t5, 8(a0)
1291 ; RV32I-NEXT: lbu t6, 9(a0)
1292 ; RV32I-NEXT: lbu s0, 10(a0)
1293 ; RV32I-NEXT: lbu s1, 1(a1)
1294 ; RV32I-NEXT: lbu s2, 0(a1)
1295 ; RV32I-NEXT: lbu s3, 11(a0)
1296 ; RV32I-NEXT: lbu s4, 12(a0)
1297 ; RV32I-NEXT: slli s1, s1, 8
1298 ; RV32I-NEXT: or s1, s1, s2
1299 ; RV32I-NEXT: lbu s2, 2(a1)
1300 ; RV32I-NEXT: lbu a1, 3(a1)
1301 ; RV32I-NEXT: lbu s5, 13(a0)
1302 ; RV32I-NEXT: lbu a0, 14(a0)
1303 ; RV32I-NEXT: slli s2, s2, 16
1304 ; RV32I-NEXT: slli a1, a1, 24
1305 ; RV32I-NEXT: or a1, a1, s2
1306 ; RV32I-NEXT: or a1, a1, s1
1307 ; RV32I-NEXT: sb a3, 23(sp)
1308 ; RV32I-NEXT: sb a0, 22(sp)
1309 ; RV32I-NEXT: sb s5, 21(sp)
1310 ; RV32I-NEXT: sb s4, 20(sp)
1311 ; RV32I-NEXT: sb s3, 19(sp)
1312 ; RV32I-NEXT: sb s0, 18(sp)
1313 ; RV32I-NEXT: sb t6, 17(sp)
1314 ; RV32I-NEXT: sb t5, 16(sp)
1315 ; RV32I-NEXT: sb t4, 15(sp)
1316 ; RV32I-NEXT: sb t3, 14(sp)
1317 ; RV32I-NEXT: sb t2, 13(sp)
1318 ; RV32I-NEXT: sb t1, 12(sp)
1319 ; RV32I-NEXT: sb t0, 11(sp)
1320 ; RV32I-NEXT: sb a7, 10(sp)
1321 ; RV32I-NEXT: sb a6, 9(sp)
1322 ; RV32I-NEXT: sb a5, 8(sp)
1323 ; RV32I-NEXT: srai a4, a4, 31
1324 ; RV32I-NEXT: sb a4, 36(sp)
1325 ; RV32I-NEXT: sb a4, 32(sp)
1326 ; RV32I-NEXT: sb a4, 28(sp)
1327 ; RV32I-NEXT: sb a4, 24(sp)
1328 ; RV32I-NEXT: srli a0, a4, 24
1329 ; RV32I-NEXT: sb a0, 39(sp)
1330 ; RV32I-NEXT: srli a3, a4, 16
1331 ; RV32I-NEXT: sb a3, 38(sp)
1332 ; RV32I-NEXT: srli a4, a4, 8
1333 ; RV32I-NEXT: sb a4, 37(sp)
1334 ; RV32I-NEXT: sb a0, 35(sp)
1335 ; RV32I-NEXT: sb a3, 34(sp)
1336 ; RV32I-NEXT: sb a4, 33(sp)
1337 ; RV32I-NEXT: sb a0, 31(sp)
1338 ; RV32I-NEXT: sb a3, 30(sp)
1339 ; RV32I-NEXT: sb a4, 29(sp)
1340 ; RV32I-NEXT: sb a0, 27(sp)
1341 ; RV32I-NEXT: sb a3, 26(sp)
1342 ; RV32I-NEXT: sb a4, 25(sp)
1343 ; RV32I-NEXT: slli a0, a1, 25
1344 ; RV32I-NEXT: srli a0, a0, 28
1345 ; RV32I-NEXT: addi a3, sp, 8
1346 ; RV32I-NEXT: add a3, a3, a0
1347 ; RV32I-NEXT: lbu a0, 5(a3)
1348 ; RV32I-NEXT: lbu a4, 4(a3)
1349 ; RV32I-NEXT: lbu a5, 6(a3)
1350 ; RV32I-NEXT: lbu a6, 7(a3)
1351 ; RV32I-NEXT: slli a0, a0, 8
1352 ; RV32I-NEXT: or a0, a0, a4
1353 ; RV32I-NEXT: slli a5, a5, 16
1354 ; RV32I-NEXT: slli a6, a6, 24
1355 ; RV32I-NEXT: or a4, a6, a5
1356 ; RV32I-NEXT: or a5, a4, a0
1357 ; RV32I-NEXT: andi a4, a1, 7
1358 ; RV32I-NEXT: srl a0, a5, a4
1359 ; RV32I-NEXT: lbu a1, 9(a3)
1360 ; RV32I-NEXT: lbu a6, 8(a3)
1361 ; RV32I-NEXT: lbu a7, 10(a3)
1362 ; RV32I-NEXT: lbu t0, 11(a3)
1363 ; RV32I-NEXT: slli a1, a1, 8
1364 ; RV32I-NEXT: or a1, a1, a6
1365 ; RV32I-NEXT: slli a7, a7, 16
1366 ; RV32I-NEXT: slli t0, t0, 24
1367 ; RV32I-NEXT: or a6, t0, a7
1368 ; RV32I-NEXT: or a6, a6, a1
1369 ; RV32I-NEXT: slli a1, a6, 1
1370 ; RV32I-NEXT: not a7, a4
1371 ; RV32I-NEXT: sll a1, a1, a7
1372 ; RV32I-NEXT: or a1, a0, a1
1373 ; RV32I-NEXT: lbu a7, 1(a3)
1374 ; RV32I-NEXT: lbu t0, 0(a3)
1375 ; RV32I-NEXT: lbu t1, 2(a3)
1376 ; RV32I-NEXT: lbu t2, 3(a3)
1377 ; RV32I-NEXT: slli a7, a7, 8
1378 ; RV32I-NEXT: or a7, a7, t0
1379 ; RV32I-NEXT: slli t1, t1, 16
1380 ; RV32I-NEXT: slli t2, t2, 24
1381 ; RV32I-NEXT: or t0, t2, t1
1382 ; RV32I-NEXT: or a7, t0, a7
1383 ; RV32I-NEXT: srl a7, a7, a4
1384 ; RV32I-NEXT: slli a5, a5, 1
1385 ; RV32I-NEXT: xori t0, a4, 31
1386 ; RV32I-NEXT: sll a5, a5, t0
1387 ; RV32I-NEXT: or a5, a7, a5
1388 ; RV32I-NEXT: srl a6, a6, a4
1389 ; RV32I-NEXT: lbu t1, 13(a3)
1390 ; RV32I-NEXT: lbu t2, 12(a3)
1391 ; RV32I-NEXT: lbu t3, 14(a3)
1392 ; RV32I-NEXT: lbu a3, 15(a3)
1393 ; RV32I-NEXT: slli t1, t1, 8
1394 ; RV32I-NEXT: or t1, t1, t2
1395 ; RV32I-NEXT: slli t3, t3, 16
1396 ; RV32I-NEXT: slli a3, a3, 24
1397 ; RV32I-NEXT: or a3, a3, t3
1398 ; RV32I-NEXT: or a3, a3, t1
1399 ; RV32I-NEXT: slli t1, a3, 1
1400 ; RV32I-NEXT: sll t0, t1, t0
1401 ; RV32I-NEXT: or t0, a6, t0
1402 ; RV32I-NEXT: sra a3, a3, a4
1403 ; RV32I-NEXT: sb a6, 8(a2)
1404 ; RV32I-NEXT: sb a3, 12(a2)
1405 ; RV32I-NEXT: sb a7, 0(a2)
1406 ; RV32I-NEXT: sb a0, 4(a2)
1407 ; RV32I-NEXT: srli a4, a6, 16
1408 ; RV32I-NEXT: sb a4, 10(a2)
1409 ; RV32I-NEXT: srli a4, a6, 8
1410 ; RV32I-NEXT: sb a4, 9(a2)
1411 ; RV32I-NEXT: srli a4, a3, 16
1412 ; RV32I-NEXT: sb a4, 14(a2)
1413 ; RV32I-NEXT: srli a4, a3, 24
1414 ; RV32I-NEXT: sb a4, 15(a2)
1415 ; RV32I-NEXT: srli a3, a3, 8
1416 ; RV32I-NEXT: sb a3, 13(a2)
1417 ; RV32I-NEXT: srli a3, a7, 16
1418 ; RV32I-NEXT: sb a3, 2(a2)
1419 ; RV32I-NEXT: srli a3, a7, 8
1420 ; RV32I-NEXT: sb a3, 1(a2)
1421 ; RV32I-NEXT: srli a3, a0, 16
1422 ; RV32I-NEXT: sb a3, 6(a2)
1423 ; RV32I-NEXT: srli a0, a0, 8
1424 ; RV32I-NEXT: sb a0, 5(a2)
1425 ; RV32I-NEXT: srli a0, t0, 24
1426 ; RV32I-NEXT: sb a0, 11(a2)
1427 ; RV32I-NEXT: srli a5, a5, 24
1428 ; RV32I-NEXT: sb a5, 3(a2)
1429 ; RV32I-NEXT: srli a1, a1, 24
1430 ; RV32I-NEXT: sb a1, 7(a2)
1431 ; RV32I-NEXT: lw s0, 60(sp) # 4-byte Folded Reload
1432 ; RV32I-NEXT: lw s1, 56(sp) # 4-byte Folded Reload
1433 ; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
1434 ; RV32I-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
1435 ; RV32I-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
1436 ; RV32I-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
1437 ; RV32I-NEXT: addi sp, sp, 64
1439 %src = load i128, ptr %src.ptr, align 1
1440 %bitOff = load i128, ptr %bitOff.ptr, align 1
1441 %res = ashr i128 %src, %bitOff
1442 store i128 %res, ptr %dst, align 1
1446 define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
1447 ; RV64I-LABEL: lshr_32bytes:
1449 ; RV64I-NEXT: addi sp, sp, -224
1450 ; RV64I-NEXT: sd ra, 216(sp) # 8-byte Folded Spill
1451 ; RV64I-NEXT: sd s0, 208(sp) # 8-byte Folded Spill
1452 ; RV64I-NEXT: sd s1, 200(sp) # 8-byte Folded Spill
1453 ; RV64I-NEXT: sd s2, 192(sp) # 8-byte Folded Spill
1454 ; RV64I-NEXT: sd s3, 184(sp) # 8-byte Folded Spill
1455 ; RV64I-NEXT: sd s4, 176(sp) # 8-byte Folded Spill
1456 ; RV64I-NEXT: sd s5, 168(sp) # 8-byte Folded Spill
1457 ; RV64I-NEXT: sd s6, 160(sp) # 8-byte Folded Spill
1458 ; RV64I-NEXT: sd s7, 152(sp) # 8-byte Folded Spill
1459 ; RV64I-NEXT: sd s8, 144(sp) # 8-byte Folded Spill
1460 ; RV64I-NEXT: sd s9, 136(sp) # 8-byte Folded Spill
1461 ; RV64I-NEXT: sd s10, 128(sp) # 8-byte Folded Spill
1462 ; RV64I-NEXT: sd s11, 120(sp) # 8-byte Folded Spill
1463 ; RV64I-NEXT: lbu a3, 0(a0)
1464 ; RV64I-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
1465 ; RV64I-NEXT: lbu a3, 1(a0)
1466 ; RV64I-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
1467 ; RV64I-NEXT: lbu a3, 2(a0)
1468 ; RV64I-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
1469 ; RV64I-NEXT: lbu a3, 3(a0)
1470 ; RV64I-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
1471 ; RV64I-NEXT: lbu a3, 4(a0)
1472 ; RV64I-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
1473 ; RV64I-NEXT: lbu a3, 5(a0)
1474 ; RV64I-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
1475 ; RV64I-NEXT: lbu t1, 6(a0)
1476 ; RV64I-NEXT: lbu t2, 7(a0)
1477 ; RV64I-NEXT: lbu t3, 8(a0)
1478 ; RV64I-NEXT: lbu t4, 9(a0)
1479 ; RV64I-NEXT: lbu t5, 10(a0)
1480 ; RV64I-NEXT: lbu t6, 11(a0)
1481 ; RV64I-NEXT: lbu s0, 12(a0)
1482 ; RV64I-NEXT: lbu s1, 13(a0)
1483 ; RV64I-NEXT: lbu s2, 14(a0)
1484 ; RV64I-NEXT: lbu s3, 15(a0)
1485 ; RV64I-NEXT: lbu s4, 16(a0)
1486 ; RV64I-NEXT: lbu s5, 17(a0)
1487 ; RV64I-NEXT: lbu s6, 18(a0)
1488 ; RV64I-NEXT: lbu s7, 19(a0)
1489 ; RV64I-NEXT: lbu s8, 20(a0)
1490 ; RV64I-NEXT: lbu s9, 1(a1)
1491 ; RV64I-NEXT: lbu s10, 0(a1)
1492 ; RV64I-NEXT: lbu s11, 2(a1)
1493 ; RV64I-NEXT: lbu ra, 3(a1)
1494 ; RV64I-NEXT: slli s9, s9, 8
1495 ; RV64I-NEXT: or s9, s9, s10
1496 ; RV64I-NEXT: slli s11, s11, 16
1497 ; RV64I-NEXT: slli ra, ra, 24
1498 ; RV64I-NEXT: lbu s10, 5(a1)
1499 ; RV64I-NEXT: or s11, ra, s11
1500 ; RV64I-NEXT: or s11, s11, s9
1501 ; RV64I-NEXT: lbu s9, 4(a1)
1502 ; RV64I-NEXT: slli s10, s10, 8
1503 ; RV64I-NEXT: lbu ra, 6(a1)
1504 ; RV64I-NEXT: lbu a1, 7(a1)
1505 ; RV64I-NEXT: or s10, s10, s9
1506 ; RV64I-NEXT: lbu s9, 21(a0)
1507 ; RV64I-NEXT: slli ra, ra, 16
1508 ; RV64I-NEXT: slli a1, a1, 24
1509 ; RV64I-NEXT: or a1, a1, ra
1510 ; RV64I-NEXT: lbu ra, 22(a0)
1511 ; RV64I-NEXT: or a1, a1, s10
1512 ; RV64I-NEXT: lbu s10, 23(a0)
1513 ; RV64I-NEXT: slli a1, a1, 32
1514 ; RV64I-NEXT: or t0, a1, s11
1515 ; RV64I-NEXT: lbu s11, 24(a0)
1516 ; RV64I-NEXT: lbu a7, 25(a0)
1517 ; RV64I-NEXT: lbu a6, 26(a0)
1518 ; RV64I-NEXT: lbu a5, 27(a0)
1519 ; RV64I-NEXT: lbu a1, 31(a0)
1520 ; RV64I-NEXT: lbu a3, 30(a0)
1521 ; RV64I-NEXT: lbu a4, 29(a0)
1522 ; RV64I-NEXT: lbu a0, 28(a0)
1523 ; RV64I-NEXT: sb a1, 87(sp)
1524 ; RV64I-NEXT: sb a3, 86(sp)
1525 ; RV64I-NEXT: sb a4, 85(sp)
1526 ; RV64I-NEXT: sb a0, 84(sp)
1527 ; RV64I-NEXT: sb a5, 83(sp)
1528 ; RV64I-NEXT: sb a6, 82(sp)
1529 ; RV64I-NEXT: sb a7, 81(sp)
1530 ; RV64I-NEXT: sb s11, 80(sp)
1531 ; RV64I-NEXT: sb s10, 79(sp)
1532 ; RV64I-NEXT: sb ra, 78(sp)
1533 ; RV64I-NEXT: sb s9, 77(sp)
1534 ; RV64I-NEXT: sb s8, 76(sp)
1535 ; RV64I-NEXT: sb s7, 75(sp)
1536 ; RV64I-NEXT: sb s6, 74(sp)
1537 ; RV64I-NEXT: sb s5, 73(sp)
1538 ; RV64I-NEXT: sb s4, 72(sp)
1539 ; RV64I-NEXT: sb s3, 71(sp)
1540 ; RV64I-NEXT: sb s2, 70(sp)
1541 ; RV64I-NEXT: sb s1, 69(sp)
1542 ; RV64I-NEXT: sb s0, 68(sp)
1543 ; RV64I-NEXT: sb t6, 67(sp)
1544 ; RV64I-NEXT: sb t5, 66(sp)
1545 ; RV64I-NEXT: sb t4, 65(sp)
1546 ; RV64I-NEXT: sb zero, 119(sp)
1547 ; RV64I-NEXT: sb zero, 118(sp)
1548 ; RV64I-NEXT: sb zero, 117(sp)
1549 ; RV64I-NEXT: sb zero, 116(sp)
1550 ; RV64I-NEXT: sb zero, 115(sp)
1551 ; RV64I-NEXT: sb zero, 114(sp)
1552 ; RV64I-NEXT: sb zero, 113(sp)
1553 ; RV64I-NEXT: sb zero, 112(sp)
1554 ; RV64I-NEXT: sb zero, 111(sp)
1555 ; RV64I-NEXT: sb zero, 110(sp)
1556 ; RV64I-NEXT: sb zero, 109(sp)
1557 ; RV64I-NEXT: sb zero, 108(sp)
1558 ; RV64I-NEXT: sb zero, 107(sp)
1559 ; RV64I-NEXT: sb zero, 106(sp)
1560 ; RV64I-NEXT: sb zero, 105(sp)
1561 ; RV64I-NEXT: sb zero, 104(sp)
1562 ; RV64I-NEXT: sb zero, 103(sp)
1563 ; RV64I-NEXT: sb zero, 102(sp)
1564 ; RV64I-NEXT: sb zero, 101(sp)
1565 ; RV64I-NEXT: sb zero, 100(sp)
1566 ; RV64I-NEXT: sb zero, 99(sp)
1567 ; RV64I-NEXT: sb zero, 98(sp)
1568 ; RV64I-NEXT: sb zero, 97(sp)
1569 ; RV64I-NEXT: sb zero, 96(sp)
1570 ; RV64I-NEXT: sb zero, 95(sp)
1571 ; RV64I-NEXT: sb zero, 94(sp)
1572 ; RV64I-NEXT: sb zero, 93(sp)
1573 ; RV64I-NEXT: sb zero, 92(sp)
1574 ; RV64I-NEXT: sb zero, 91(sp)
1575 ; RV64I-NEXT: sb zero, 90(sp)
1576 ; RV64I-NEXT: sb zero, 89(sp)
1577 ; RV64I-NEXT: sb zero, 88(sp)
1578 ; RV64I-NEXT: sb t3, 64(sp)
1579 ; RV64I-NEXT: sb t2, 63(sp)
1580 ; RV64I-NEXT: sb t1, 62(sp)
1581 ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
1582 ; RV64I-NEXT: sb a0, 61(sp)
1583 ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
1584 ; RV64I-NEXT: sb a0, 60(sp)
1585 ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
1586 ; RV64I-NEXT: sb a0, 59(sp)
1587 ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
1588 ; RV64I-NEXT: sb a0, 58(sp)
1589 ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
1590 ; RV64I-NEXT: sb a0, 57(sp)
1591 ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
1592 ; RV64I-NEXT: sb a0, 56(sp)
1593 ; RV64I-NEXT: slli a0, t0, 56
1594 ; RV64I-NEXT: srli a0, a0, 59
1595 ; RV64I-NEXT: addi a3, sp, 56
1596 ; RV64I-NEXT: add a3, a3, a0
1597 ; RV64I-NEXT: lbu a0, 9(a3)
1598 ; RV64I-NEXT: lbu a1, 8(a3)
1599 ; RV64I-NEXT: lbu a4, 10(a3)
1600 ; RV64I-NEXT: lbu a5, 11(a3)
1601 ; RV64I-NEXT: slli a0, a0, 8
1602 ; RV64I-NEXT: or a0, a0, a1
1603 ; RV64I-NEXT: slli a4, a4, 16
1604 ; RV64I-NEXT: slli a5, a5, 24
1605 ; RV64I-NEXT: or a4, a5, a4
1606 ; RV64I-NEXT: or a0, a4, a0
1607 ; RV64I-NEXT: lbu a1, 13(a3)
1608 ; RV64I-NEXT: lbu a4, 12(a3)
1609 ; RV64I-NEXT: lbu a5, 14(a3)
1610 ; RV64I-NEXT: lbu a6, 15(a3)
1611 ; RV64I-NEXT: slli a1, a1, 8
1612 ; RV64I-NEXT: or a1, a1, a4
1613 ; RV64I-NEXT: slli a5, a5, 16
1614 ; RV64I-NEXT: slli a6, a6, 24
1615 ; RV64I-NEXT: or a4, a6, a5
1616 ; RV64I-NEXT: or a1, a4, a1
1617 ; RV64I-NEXT: slli a1, a1, 32
1618 ; RV64I-NEXT: or a4, a1, a0
1619 ; RV64I-NEXT: andi a1, t0, 7
1620 ; RV64I-NEXT: lbu a0, 17(a3)
1621 ; RV64I-NEXT: lbu a5, 16(a3)
1622 ; RV64I-NEXT: lbu a6, 18(a3)
1623 ; RV64I-NEXT: lbu a7, 19(a3)
1624 ; RV64I-NEXT: slli a0, a0, 8
1625 ; RV64I-NEXT: or a0, a0, a5
1626 ; RV64I-NEXT: slli a6, a6, 16
1627 ; RV64I-NEXT: slli a7, a7, 24
1628 ; RV64I-NEXT: or a5, a7, a6
1629 ; RV64I-NEXT: or a0, a5, a0
1630 ; RV64I-NEXT: lbu a5, 21(a3)
1631 ; RV64I-NEXT: lbu a6, 20(a3)
1632 ; RV64I-NEXT: lbu a7, 22(a3)
1633 ; RV64I-NEXT: lbu t0, 23(a3)
1634 ; RV64I-NEXT: slli a5, a5, 8
1635 ; RV64I-NEXT: or a5, a5, a6
1636 ; RV64I-NEXT: slli a7, a7, 16
1637 ; RV64I-NEXT: slli t0, t0, 24
1638 ; RV64I-NEXT: or a6, t0, a7
1639 ; RV64I-NEXT: or a5, a6, a5
1640 ; RV64I-NEXT: slli a5, a5, 32
1641 ; RV64I-NEXT: or a5, a5, a0
1642 ; RV64I-NEXT: slli a0, a5, 1
1643 ; RV64I-NEXT: not a6, a1
1644 ; RV64I-NEXT: sll a0, a0, a6
1645 ; RV64I-NEXT: lbu a6, 1(a3)
1646 ; RV64I-NEXT: lbu a7, 0(a3)
1647 ; RV64I-NEXT: lbu t0, 2(a3)
1648 ; RV64I-NEXT: lbu t1, 3(a3)
1649 ; RV64I-NEXT: slli a6, a6, 8
1650 ; RV64I-NEXT: or a6, a6, a7
1651 ; RV64I-NEXT: slli t0, t0, 16
1652 ; RV64I-NEXT: slli t1, t1, 24
1653 ; RV64I-NEXT: or a7, t1, t0
1654 ; RV64I-NEXT: or a6, a7, a6
1655 ; RV64I-NEXT: lbu a7, 5(a3)
1656 ; RV64I-NEXT: lbu t0, 4(a3)
1657 ; RV64I-NEXT: lbu t1, 6(a3)
1658 ; RV64I-NEXT: lbu t2, 7(a3)
1659 ; RV64I-NEXT: slli a7, a7, 8
1660 ; RV64I-NEXT: or a7, a7, t0
1661 ; RV64I-NEXT: slli t1, t1, 16
1662 ; RV64I-NEXT: slli t2, t2, 24
1663 ; RV64I-NEXT: or t0, t2, t1
1664 ; RV64I-NEXT: or a7, t0, a7
1665 ; RV64I-NEXT: slli a7, a7, 32
1666 ; RV64I-NEXT: or a6, a7, a6
1667 ; RV64I-NEXT: lbu a7, 25(a3)
1668 ; RV64I-NEXT: lbu t0, 24(a3)
1669 ; RV64I-NEXT: lbu t1, 26(a3)
1670 ; RV64I-NEXT: lbu t2, 27(a3)
1671 ; RV64I-NEXT: slli a7, a7, 8
1672 ; RV64I-NEXT: or a7, a7, t0
1673 ; RV64I-NEXT: slli t1, t1, 16
1674 ; RV64I-NEXT: slli t2, t2, 24
1675 ; RV64I-NEXT: or t0, t2, t1
1676 ; RV64I-NEXT: or a7, t0, a7
1677 ; RV64I-NEXT: lbu t0, 29(a3)
1678 ; RV64I-NEXT: lbu t1, 28(a3)
1679 ; RV64I-NEXT: lbu t2, 30(a3)
1680 ; RV64I-NEXT: lbu a3, 31(a3)
1681 ; RV64I-NEXT: slli t0, t0, 8
1682 ; RV64I-NEXT: or t0, t0, t1
1683 ; RV64I-NEXT: slli t2, t2, 16
1684 ; RV64I-NEXT: slli a3, a3, 24
1685 ; RV64I-NEXT: or a3, a3, t2
1686 ; RV64I-NEXT: slli t1, a4, 1
1687 ; RV64I-NEXT: or a3, a3, t0
1688 ; RV64I-NEXT: xori t0, a1, 63
1689 ; RV64I-NEXT: sll t1, t1, t0
1690 ; RV64I-NEXT: slli a3, a3, 32
1691 ; RV64I-NEXT: or a7, a3, a7
1692 ; RV64I-NEXT: slli a3, a7, 1
1693 ; RV64I-NEXT: sll t0, a3, t0
1694 ; RV64I-NEXT: srl a3, a4, a1
1695 ; RV64I-NEXT: srl a4, a6, a1
1696 ; RV64I-NEXT: srl a5, a5, a1
1697 ; RV64I-NEXT: srl a1, a7, a1
1698 ; RV64I-NEXT: srli a6, a5, 48
1699 ; RV64I-NEXT: sb a6, 22(a2)
1700 ; RV64I-NEXT: srli a6, a5, 40
1701 ; RV64I-NEXT: sb a6, 21(a2)
1702 ; RV64I-NEXT: srli a6, a5, 32
1703 ; RV64I-NEXT: sb a6, 20(a2)
1704 ; RV64I-NEXT: srli a6, a5, 24
1705 ; RV64I-NEXT: sb a6, 19(a2)
1706 ; RV64I-NEXT: srli a6, a5, 16
1707 ; RV64I-NEXT: sb a6, 18(a2)
1708 ; RV64I-NEXT: or a6, a5, t0
1709 ; RV64I-NEXT: sb a5, 16(a2)
1710 ; RV64I-NEXT: srli a5, a5, 8
1711 ; RV64I-NEXT: sb a5, 17(a2)
1712 ; RV64I-NEXT: srli a5, a1, 56
1713 ; RV64I-NEXT: sb a5, 31(a2)
1714 ; RV64I-NEXT: srli a5, a1, 48
1715 ; RV64I-NEXT: sb a5, 30(a2)
1716 ; RV64I-NEXT: srli a5, a1, 40
1717 ; RV64I-NEXT: sb a5, 29(a2)
1718 ; RV64I-NEXT: srli a5, a1, 32
1719 ; RV64I-NEXT: sb a5, 28(a2)
1720 ; RV64I-NEXT: srli a5, a1, 24
1721 ; RV64I-NEXT: sb a5, 27(a2)
1722 ; RV64I-NEXT: srli a5, a1, 16
1723 ; RV64I-NEXT: sb a5, 26(a2)
1724 ; RV64I-NEXT: sb a1, 24(a2)
1725 ; RV64I-NEXT: srli a1, a1, 8
1726 ; RV64I-NEXT: sb a1, 25(a2)
1727 ; RV64I-NEXT: srli a1, a4, 48
1728 ; RV64I-NEXT: sb a1, 6(a2)
1729 ; RV64I-NEXT: srli a1, a4, 40
1730 ; RV64I-NEXT: sb a1, 5(a2)
1731 ; RV64I-NEXT: srli a1, a4, 32
1732 ; RV64I-NEXT: sb a1, 4(a2)
1733 ; RV64I-NEXT: srli a1, a4, 24
1734 ; RV64I-NEXT: sb a1, 3(a2)
1735 ; RV64I-NEXT: srli a1, a4, 16
1736 ; RV64I-NEXT: sb a1, 2(a2)
1737 ; RV64I-NEXT: or a1, a4, t1
1738 ; RV64I-NEXT: sb a4, 0(a2)
1739 ; RV64I-NEXT: srli a4, a4, 8
1740 ; RV64I-NEXT: sb a4, 1(a2)
1741 ; RV64I-NEXT: srli a4, a3, 48
1742 ; RV64I-NEXT: sb a4, 14(a2)
1743 ; RV64I-NEXT: srli a4, a3, 40
1744 ; RV64I-NEXT: sb a4, 13(a2)
1745 ; RV64I-NEXT: srli a4, a3, 32
1746 ; RV64I-NEXT: sb a4, 12(a2)
1747 ; RV64I-NEXT: srli a4, a3, 24
1748 ; RV64I-NEXT: sb a4, 11(a2)
1749 ; RV64I-NEXT: srli a4, a3, 16
1750 ; RV64I-NEXT: sb a4, 10(a2)
1751 ; RV64I-NEXT: or a0, a3, a0
1752 ; RV64I-NEXT: sb a3, 8(a2)
1753 ; RV64I-NEXT: srli a3, a3, 8
1754 ; RV64I-NEXT: sb a3, 9(a2)
1755 ; RV64I-NEXT: srli a3, a6, 56
1756 ; RV64I-NEXT: sb a3, 23(a2)
1757 ; RV64I-NEXT: srli a1, a1, 56
1758 ; RV64I-NEXT: sb a1, 7(a2)
1759 ; RV64I-NEXT: srli a0, a0, 56
1760 ; RV64I-NEXT: sb a0, 15(a2)
1761 ; RV64I-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
1762 ; RV64I-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
1763 ; RV64I-NEXT: ld s1, 200(sp) # 8-byte Folded Reload
1764 ; RV64I-NEXT: ld s2, 192(sp) # 8-byte Folded Reload
1765 ; RV64I-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
1766 ; RV64I-NEXT: ld s4, 176(sp) # 8-byte Folded Reload
1767 ; RV64I-NEXT: ld s5, 168(sp) # 8-byte Folded Reload
1768 ; RV64I-NEXT: ld s6, 160(sp) # 8-byte Folded Reload
1769 ; RV64I-NEXT: ld s7, 152(sp) # 8-byte Folded Reload
1770 ; RV64I-NEXT: ld s8, 144(sp) # 8-byte Folded Reload
1771 ; RV64I-NEXT: ld s9, 136(sp) # 8-byte Folded Reload
1772 ; RV64I-NEXT: ld s10, 128(sp) # 8-byte Folded Reload
1773 ; RV64I-NEXT: ld s11, 120(sp) # 8-byte Folded Reload
1774 ; RV64I-NEXT: addi sp, sp, 224
1777 ; RV32I-LABEL: lshr_32bytes:
1779 ; RV32I-NEXT: addi sp, sp, -144
1780 ; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
1781 ; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
1782 ; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
1783 ; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
1784 ; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
1785 ; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
1786 ; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
1787 ; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
1788 ; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
1789 ; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
1790 ; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
1791 ; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
1792 ; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
1793 ; RV32I-NEXT: lbu a3, 0(a0)
1794 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
1795 ; RV32I-NEXT: lbu a3, 1(a0)
1796 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
1797 ; RV32I-NEXT: lbu a3, 2(a0)
1798 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill
1799 ; RV32I-NEXT: lbu a3, 3(a0)
1800 ; RV32I-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
1801 ; RV32I-NEXT: lbu a3, 4(a0)
1802 ; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
1803 ; RV32I-NEXT: lbu a3, 5(a0)
1804 ; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
1805 ; RV32I-NEXT: lbu t1, 6(a0)
1806 ; RV32I-NEXT: lbu t2, 7(a0)
1807 ; RV32I-NEXT: lbu t3, 8(a0)
1808 ; RV32I-NEXT: lbu t4, 9(a0)
1809 ; RV32I-NEXT: lbu t5, 10(a0)
1810 ; RV32I-NEXT: lbu t6, 11(a0)
1811 ; RV32I-NEXT: lbu s0, 12(a0)
1812 ; RV32I-NEXT: lbu s1, 13(a0)
1813 ; RV32I-NEXT: lbu s2, 14(a0)
1814 ; RV32I-NEXT: lbu s3, 15(a0)
1815 ; RV32I-NEXT: lbu s4, 16(a0)
1816 ; RV32I-NEXT: lbu s5, 17(a0)
1817 ; RV32I-NEXT: lbu s6, 18(a0)
1818 ; RV32I-NEXT: lbu s7, 19(a0)
1819 ; RV32I-NEXT: lbu s10, 1(a1)
1820 ; RV32I-NEXT: lbu s8, 20(a0)
1821 ; RV32I-NEXT: lbu s9, 21(a0)
1822 ; RV32I-NEXT: lbu s11, 0(a1)
1823 ; RV32I-NEXT: slli s10, s10, 8
1824 ; RV32I-NEXT: lbu ra, 2(a1)
1825 ; RV32I-NEXT: lbu a1, 3(a1)
1826 ; RV32I-NEXT: or s10, s10, s11
1827 ; RV32I-NEXT: lbu s11, 22(a0)
1828 ; RV32I-NEXT: slli ra, ra, 16
1829 ; RV32I-NEXT: slli a1, a1, 24
1830 ; RV32I-NEXT: or a1, a1, ra
1831 ; RV32I-NEXT: lbu ra, 23(a0)
1832 ; RV32I-NEXT: or t0, a1, s10
1833 ; RV32I-NEXT: lbu s10, 24(a0)
1834 ; RV32I-NEXT: lbu a7, 25(a0)
1835 ; RV32I-NEXT: lbu a6, 26(a0)
1836 ; RV32I-NEXT: lbu a5, 27(a0)
1837 ; RV32I-NEXT: lbu a1, 31(a0)
1838 ; RV32I-NEXT: lbu a3, 30(a0)
1839 ; RV32I-NEXT: lbu a4, 29(a0)
1840 ; RV32I-NEXT: lbu a0, 28(a0)
1841 ; RV32I-NEXT: sb a1, 59(sp)
1842 ; RV32I-NEXT: sb a3, 58(sp)
1843 ; RV32I-NEXT: sb a4, 57(sp)
1844 ; RV32I-NEXT: sb a0, 56(sp)
1845 ; RV32I-NEXT: sb a5, 55(sp)
1846 ; RV32I-NEXT: sb a6, 54(sp)
1847 ; RV32I-NEXT: sb a7, 53(sp)
1848 ; RV32I-NEXT: sb s10, 52(sp)
1849 ; RV32I-NEXT: sb ra, 51(sp)
1850 ; RV32I-NEXT: sb s11, 50(sp)
1851 ; RV32I-NEXT: sb s9, 49(sp)
1852 ; RV32I-NEXT: sb s8, 48(sp)
1853 ; RV32I-NEXT: sb s7, 47(sp)
1854 ; RV32I-NEXT: sb s6, 46(sp)
1855 ; RV32I-NEXT: sb s5, 45(sp)
1856 ; RV32I-NEXT: sb s4, 44(sp)
1857 ; RV32I-NEXT: sb zero, 91(sp)
1858 ; RV32I-NEXT: sb zero, 90(sp)
1859 ; RV32I-NEXT: sb zero, 89(sp)
1860 ; RV32I-NEXT: sb zero, 88(sp)
1861 ; RV32I-NEXT: sb zero, 87(sp)
1862 ; RV32I-NEXT: sb zero, 86(sp)
1863 ; RV32I-NEXT: sb zero, 85(sp)
1864 ; RV32I-NEXT: sb zero, 84(sp)
1865 ; RV32I-NEXT: sb zero, 83(sp)
1866 ; RV32I-NEXT: sb zero, 82(sp)
1867 ; RV32I-NEXT: sb zero, 81(sp)
1868 ; RV32I-NEXT: sb zero, 80(sp)
1869 ; RV32I-NEXT: sb zero, 79(sp)
1870 ; RV32I-NEXT: sb zero, 78(sp)
1871 ; RV32I-NEXT: sb zero, 77(sp)
1872 ; RV32I-NEXT: sb zero, 76(sp)
1873 ; RV32I-NEXT: sb zero, 75(sp)
1874 ; RV32I-NEXT: sb zero, 74(sp)
1875 ; RV32I-NEXT: sb zero, 73(sp)
1876 ; RV32I-NEXT: sb zero, 72(sp)
1877 ; RV32I-NEXT: sb zero, 71(sp)
1878 ; RV32I-NEXT: sb zero, 70(sp)
1879 ; RV32I-NEXT: sb zero, 69(sp)
1880 ; RV32I-NEXT: sb zero, 68(sp)
1881 ; RV32I-NEXT: sb zero, 67(sp)
1882 ; RV32I-NEXT: sb zero, 66(sp)
1883 ; RV32I-NEXT: sb zero, 65(sp)
1884 ; RV32I-NEXT: sb zero, 64(sp)
1885 ; RV32I-NEXT: sb zero, 63(sp)
1886 ; RV32I-NEXT: sb zero, 62(sp)
1887 ; RV32I-NEXT: sb zero, 61(sp)
1888 ; RV32I-NEXT: sb zero, 60(sp)
1889 ; RV32I-NEXT: sb s3, 43(sp)
1890 ; RV32I-NEXT: sb s2, 42(sp)
1891 ; RV32I-NEXT: sb s1, 41(sp)
1892 ; RV32I-NEXT: sb s0, 40(sp)
1893 ; RV32I-NEXT: sb t6, 39(sp)
1894 ; RV32I-NEXT: sb t5, 38(sp)
1895 ; RV32I-NEXT: sb t4, 37(sp)
1896 ; RV32I-NEXT: sb t3, 36(sp)
1897 ; RV32I-NEXT: sb t2, 35(sp)
1898 ; RV32I-NEXT: sb t1, 34(sp)
1899 ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
1900 ; RV32I-NEXT: sb a0, 33(sp)
1901 ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
1902 ; RV32I-NEXT: sb a0, 32(sp)
1903 ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
1904 ; RV32I-NEXT: sb a0, 31(sp)
1905 ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
1906 ; RV32I-NEXT: sb a0, 30(sp)
1907 ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
1908 ; RV32I-NEXT: sb a0, 29(sp)
1909 ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
1910 ; RV32I-NEXT: sb a0, 28(sp)
1911 ; RV32I-NEXT: slli a0, t0, 24
1912 ; RV32I-NEXT: srli a0, a0, 27
1913 ; RV32I-NEXT: addi a4, sp, 28
1914 ; RV32I-NEXT: add a4, a4, a0
1915 ; RV32I-NEXT: lbu a0, 5(a4)
1916 ; RV32I-NEXT: lbu a1, 4(a4)
1917 ; RV32I-NEXT: lbu a3, 6(a4)
1918 ; RV32I-NEXT: lbu a5, 7(a4)
1919 ; RV32I-NEXT: slli a0, a0, 8
1920 ; RV32I-NEXT: or a0, a0, a1
1921 ; RV32I-NEXT: slli a3, a3, 16
1922 ; RV32I-NEXT: slli a5, a5, 24
1923 ; RV32I-NEXT: or a3, a5, a3
1924 ; RV32I-NEXT: or t5, a3, a0
1925 ; RV32I-NEXT: andi a3, t0, 7
1926 ; RV32I-NEXT: lbu a0, 9(a4)
1927 ; RV32I-NEXT: lbu a1, 8(a4)
1928 ; RV32I-NEXT: lbu a5, 10(a4)
1929 ; RV32I-NEXT: lbu a6, 11(a4)
1930 ; RV32I-NEXT: slli a0, a0, 8
1931 ; RV32I-NEXT: or a0, a0, a1
1932 ; RV32I-NEXT: slli a5, a5, 16
1933 ; RV32I-NEXT: slli a6, a6, 24
1934 ; RV32I-NEXT: or a1, a6, a5
1935 ; RV32I-NEXT: or a6, a1, a0
1936 ; RV32I-NEXT: slli a0, a6, 1
1937 ; RV32I-NEXT: not t1, a3
1938 ; RV32I-NEXT: sll a0, a0, t1
1939 ; RV32I-NEXT: lbu a1, 1(a4)
1940 ; RV32I-NEXT: lbu a5, 0(a4)
1941 ; RV32I-NEXT: lbu a7, 2(a4)
1942 ; RV32I-NEXT: lbu t0, 3(a4)
1943 ; RV32I-NEXT: slli a1, a1, 8
1944 ; RV32I-NEXT: or a1, a1, a5
1945 ; RV32I-NEXT: slli a7, a7, 16
1946 ; RV32I-NEXT: slli t0, t0, 24
1947 ; RV32I-NEXT: or a5, t0, a7
1948 ; RV32I-NEXT: or t0, a5, a1
1949 ; RV32I-NEXT: slli a1, t5, 1
1950 ; RV32I-NEXT: xori t2, a3, 31
1951 ; RV32I-NEXT: sll a1, a1, t2
1952 ; RV32I-NEXT: lbu a5, 13(a4)
1953 ; RV32I-NEXT: lbu a7, 12(a4)
1954 ; RV32I-NEXT: lbu t3, 14(a4)
1955 ; RV32I-NEXT: lbu t4, 15(a4)
1956 ; RV32I-NEXT: slli a5, a5, 8
1957 ; RV32I-NEXT: or a5, a5, a7
1958 ; RV32I-NEXT: slli t3, t3, 16
1959 ; RV32I-NEXT: slli t4, t4, 24
1960 ; RV32I-NEXT: or a7, t4, t3
1961 ; RV32I-NEXT: or t3, a7, a5
1962 ; RV32I-NEXT: lbu a5, 17(a4)
1963 ; RV32I-NEXT: lbu a7, 16(a4)
1964 ; RV32I-NEXT: lbu t4, 18(a4)
1965 ; RV32I-NEXT: lbu t6, 19(a4)
1966 ; RV32I-NEXT: slli a5, a5, 8
1967 ; RV32I-NEXT: or a5, a5, a7
1968 ; RV32I-NEXT: slli t4, t4, 16
1969 ; RV32I-NEXT: slli t6, t6, 24
1970 ; RV32I-NEXT: or a7, t6, t4
1971 ; RV32I-NEXT: or t4, a7, a5
1972 ; RV32I-NEXT: slli a5, t4, 1
1973 ; RV32I-NEXT: sll a7, a5, t1
1974 ; RV32I-NEXT: lbu a5, 21(a4)
1975 ; RV32I-NEXT: lbu t6, 20(a4)
1976 ; RV32I-NEXT: lbu s0, 22(a4)
1977 ; RV32I-NEXT: lbu s1, 23(a4)
1978 ; RV32I-NEXT: slli a5, a5, 8
1979 ; RV32I-NEXT: or a5, a5, t6
1980 ; RV32I-NEXT: slli s0, s0, 16
1981 ; RV32I-NEXT: slli s1, s1, 24
1982 ; RV32I-NEXT: or s0, s1, s0
1983 ; RV32I-NEXT: or s0, s0, a5
1984 ; RV32I-NEXT: lbu a5, 25(a4)
1985 ; RV32I-NEXT: lbu t6, 24(a4)
1986 ; RV32I-NEXT: lbu s1, 26(a4)
1987 ; RV32I-NEXT: lbu s2, 27(a4)
1988 ; RV32I-NEXT: slli a5, a5, 8
1989 ; RV32I-NEXT: or a5, a5, t6
1990 ; RV32I-NEXT: slli s1, s1, 16
1991 ; RV32I-NEXT: slli s2, s2, 24
1992 ; RV32I-NEXT: or t6, s2, s1
1993 ; RV32I-NEXT: or t6, t6, a5
1994 ; RV32I-NEXT: lbu a5, 29(a4)
1995 ; RV32I-NEXT: lbu s1, 28(a4)
1996 ; RV32I-NEXT: slli s2, t6, 1
1997 ; RV32I-NEXT: sll t1, s2, t1
1998 ; RV32I-NEXT: slli a5, a5, 8
1999 ; RV32I-NEXT: or a5, a5, s1
2000 ; RV32I-NEXT: lbu s1, 30(a4)
2001 ; RV32I-NEXT: lbu a4, 31(a4)
2002 ; RV32I-NEXT: slli s2, t3, 1
2003 ; RV32I-NEXT: sll s2, s2, t2
2004 ; RV32I-NEXT: slli s1, s1, 16
2005 ; RV32I-NEXT: slli a4, a4, 24
2006 ; RV32I-NEXT: or a4, a4, s1
2007 ; RV32I-NEXT: slli s1, s0, 1
2008 ; RV32I-NEXT: sll s1, s1, t2
2009 ; RV32I-NEXT: or s3, a4, a5
2010 ; RV32I-NEXT: slli a4, s3, 1
2011 ; RV32I-NEXT: sll t2, a4, t2
2012 ; RV32I-NEXT: srl a4, t5, a3
2013 ; RV32I-NEXT: srl a5, t0, a3
2014 ; RV32I-NEXT: srl t0, t3, a3
2015 ; RV32I-NEXT: srl a6, a6, a3
2016 ; RV32I-NEXT: srl t3, s0, a3
2017 ; RV32I-NEXT: srl t4, t4, a3
2018 ; RV32I-NEXT: srl t5, t6, a3
2019 ; RV32I-NEXT: srl a3, s3, a3
2020 ; RV32I-NEXT: srli t6, t5, 16
2021 ; RV32I-NEXT: sb t6, 26(a2)
2022 ; RV32I-NEXT: or t2, t5, t2
2023 ; RV32I-NEXT: sb t5, 24(a2)
2024 ; RV32I-NEXT: srli t5, t5, 8
2025 ; RV32I-NEXT: sb t5, 25(a2)
2026 ; RV32I-NEXT: srli t5, a3, 24
2027 ; RV32I-NEXT: sb t5, 31(a2)
2028 ; RV32I-NEXT: srli t5, a3, 16
2029 ; RV32I-NEXT: sb t5, 30(a2)
2030 ; RV32I-NEXT: sb a3, 28(a2)
2031 ; RV32I-NEXT: srli a3, a3, 8
2032 ; RV32I-NEXT: sb a3, 29(a2)
2033 ; RV32I-NEXT: srli a3, t4, 16
2034 ; RV32I-NEXT: sb a3, 18(a2)
2035 ; RV32I-NEXT: or a3, t4, s1
2036 ; RV32I-NEXT: sb t4, 16(a2)
2037 ; RV32I-NEXT: srli t4, t4, 8
2038 ; RV32I-NEXT: sb t4, 17(a2)
2039 ; RV32I-NEXT: srli t4, t3, 16
2040 ; RV32I-NEXT: sb t4, 22(a2)
2041 ; RV32I-NEXT: or t1, t3, t1
2042 ; RV32I-NEXT: sb t3, 20(a2)
2043 ; RV32I-NEXT: srli t3, t3, 8
2044 ; RV32I-NEXT: sb t3, 21(a2)
2045 ; RV32I-NEXT: srli t3, a6, 16
2046 ; RV32I-NEXT: sb t3, 10(a2)
2047 ; RV32I-NEXT: or t3, a6, s2
2048 ; RV32I-NEXT: sb a6, 8(a2)
2049 ; RV32I-NEXT: srli a6, a6, 8
2050 ; RV32I-NEXT: sb a6, 9(a2)
2051 ; RV32I-NEXT: srli a6, t0, 16
2052 ; RV32I-NEXT: sb a6, 14(a2)
2053 ; RV32I-NEXT: or a6, t0, a7
2054 ; RV32I-NEXT: sb t0, 12(a2)
2055 ; RV32I-NEXT: srli a7, t0, 8
2056 ; RV32I-NEXT: sb a7, 13(a2)
2057 ; RV32I-NEXT: srli a7, a5, 16
2058 ; RV32I-NEXT: sb a7, 2(a2)
2059 ; RV32I-NEXT: or a1, a5, a1
2060 ; RV32I-NEXT: sb a5, 0(a2)
2061 ; RV32I-NEXT: srli a5, a5, 8
2062 ; RV32I-NEXT: sb a5, 1(a2)
2063 ; RV32I-NEXT: srli a5, a4, 16
2064 ; RV32I-NEXT: sb a5, 6(a2)
2065 ; RV32I-NEXT: or a0, a4, a0
2066 ; RV32I-NEXT: sb a4, 4(a2)
2067 ; RV32I-NEXT: srli a4, a4, 8
2068 ; RV32I-NEXT: sb a4, 5(a2)
2069 ; RV32I-NEXT: srli a4, t2, 24
2070 ; RV32I-NEXT: sb a4, 27(a2)
2071 ; RV32I-NEXT: srli a3, a3, 24
2072 ; RV32I-NEXT: sb a3, 19(a2)
2073 ; RV32I-NEXT: srli a3, t1, 24
2074 ; RV32I-NEXT: sb a3, 23(a2)
2075 ; RV32I-NEXT: srli a3, t3, 24
2076 ; RV32I-NEXT: sb a3, 11(a2)
2077 ; RV32I-NEXT: srli a3, a6, 24
2078 ; RV32I-NEXT: sb a3, 15(a2)
2079 ; RV32I-NEXT: srli a1, a1, 24
2080 ; RV32I-NEXT: sb a1, 3(a2)
2081 ; RV32I-NEXT: srli a0, a0, 24
2082 ; RV32I-NEXT: sb a0, 7(a2)
2083 ; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
2084 ; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
2085 ; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
2086 ; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
2087 ; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
2088 ; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
2089 ; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
2090 ; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
2091 ; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
2092 ; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
2093 ; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
2094 ; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
2095 ; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
2096 ; RV32I-NEXT: addi sp, sp, 144
2098 %src = load i256, ptr %src.ptr, align 1
2099 %bitOff = load i256, ptr %bitOff.ptr, align 1
2100 %res = lshr i256 %src, %bitOff
2101 store i256 %res, ptr %dst, align 1
2104 define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
2105 ; RV64I-LABEL: shl_32bytes:
2107 ; RV64I-NEXT: addi sp, sp, -224
2108 ; RV64I-NEXT: sd ra, 216(sp) # 8-byte Folded Spill
2109 ; RV64I-NEXT: sd s0, 208(sp) # 8-byte Folded Spill
2110 ; RV64I-NEXT: sd s1, 200(sp) # 8-byte Folded Spill
2111 ; RV64I-NEXT: sd s2, 192(sp) # 8-byte Folded Spill
2112 ; RV64I-NEXT: sd s3, 184(sp) # 8-byte Folded Spill
2113 ; RV64I-NEXT: sd s4, 176(sp) # 8-byte Folded Spill
2114 ; RV64I-NEXT: sd s5, 168(sp) # 8-byte Folded Spill
2115 ; RV64I-NEXT: sd s6, 160(sp) # 8-byte Folded Spill
2116 ; RV64I-NEXT: sd s7, 152(sp) # 8-byte Folded Spill
2117 ; RV64I-NEXT: sd s8, 144(sp) # 8-byte Folded Spill
2118 ; RV64I-NEXT: sd s9, 136(sp) # 8-byte Folded Spill
2119 ; RV64I-NEXT: sd s10, 128(sp) # 8-byte Folded Spill
2120 ; RV64I-NEXT: sd s11, 120(sp) # 8-byte Folded Spill
2121 ; RV64I-NEXT: lbu a3, 0(a0)
2122 ; RV64I-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
2123 ; RV64I-NEXT: lbu a3, 1(a0)
2124 ; RV64I-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
2125 ; RV64I-NEXT: lbu a3, 2(a0)
2126 ; RV64I-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
2127 ; RV64I-NEXT: lbu a3, 3(a0)
2128 ; RV64I-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
2129 ; RV64I-NEXT: lbu a3, 4(a0)
2130 ; RV64I-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
2131 ; RV64I-NEXT: lbu a3, 5(a0)
2132 ; RV64I-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
2133 ; RV64I-NEXT: lbu t1, 6(a0)
2134 ; RV64I-NEXT: lbu t2, 7(a0)
2135 ; RV64I-NEXT: lbu t3, 8(a0)
2136 ; RV64I-NEXT: lbu t4, 9(a0)
2137 ; RV64I-NEXT: lbu t5, 10(a0)
2138 ; RV64I-NEXT: lbu t6, 11(a0)
2139 ; RV64I-NEXT: lbu s0, 12(a0)
2140 ; RV64I-NEXT: lbu s1, 13(a0)
2141 ; RV64I-NEXT: lbu s2, 14(a0)
2142 ; RV64I-NEXT: lbu s3, 15(a0)
2143 ; RV64I-NEXT: lbu s4, 16(a0)
2144 ; RV64I-NEXT: lbu s5, 17(a0)
2145 ; RV64I-NEXT: lbu s6, 18(a0)
2146 ; RV64I-NEXT: lbu s7, 19(a0)
2147 ; RV64I-NEXT: lbu s8, 20(a0)
2148 ; RV64I-NEXT: lbu s9, 1(a1)
2149 ; RV64I-NEXT: lbu s10, 0(a1)
2150 ; RV64I-NEXT: lbu s11, 2(a1)
2151 ; RV64I-NEXT: lbu ra, 3(a1)
2152 ; RV64I-NEXT: slli s9, s9, 8
2153 ; RV64I-NEXT: or s9, s9, s10
2154 ; RV64I-NEXT: slli s11, s11, 16
2155 ; RV64I-NEXT: slli ra, ra, 24
2156 ; RV64I-NEXT: lbu s10, 5(a1)
2157 ; RV64I-NEXT: or s11, ra, s11
2158 ; RV64I-NEXT: or s11, s11, s9
2159 ; RV64I-NEXT: lbu s9, 4(a1)
2160 ; RV64I-NEXT: slli s10, s10, 8
2161 ; RV64I-NEXT: lbu ra, 6(a1)
2162 ; RV64I-NEXT: lbu a1, 7(a1)
2163 ; RV64I-NEXT: or s10, s10, s9
2164 ; RV64I-NEXT: lbu s9, 21(a0)
2165 ; RV64I-NEXT: slli ra, ra, 16
2166 ; RV64I-NEXT: slli a1, a1, 24
2167 ; RV64I-NEXT: or a1, a1, ra
2168 ; RV64I-NEXT: lbu ra, 22(a0)
2169 ; RV64I-NEXT: or a1, a1, s10
2170 ; RV64I-NEXT: lbu s10, 23(a0)
2171 ; RV64I-NEXT: slli a1, a1, 32
2172 ; RV64I-NEXT: or t0, a1, s11
2173 ; RV64I-NEXT: lbu s11, 24(a0)
2174 ; RV64I-NEXT: lbu a7, 25(a0)
2175 ; RV64I-NEXT: lbu a6, 26(a0)
2176 ; RV64I-NEXT: lbu a5, 27(a0)
2177 ; RV64I-NEXT: lbu a1, 31(a0)
2178 ; RV64I-NEXT: lbu a3, 30(a0)
2179 ; RV64I-NEXT: lbu a4, 29(a0)
2180 ; RV64I-NEXT: lbu a0, 28(a0)
2181 ; RV64I-NEXT: sb a1, 119(sp)
2182 ; RV64I-NEXT: sb a3, 118(sp)
2183 ; RV64I-NEXT: sb a4, 117(sp)
2184 ; RV64I-NEXT: sb a0, 116(sp)
2185 ; RV64I-NEXT: sb a5, 115(sp)
2186 ; RV64I-NEXT: sb a6, 114(sp)
2187 ; RV64I-NEXT: sb a7, 113(sp)
2188 ; RV64I-NEXT: sb s11, 112(sp)
2189 ; RV64I-NEXT: sb s10, 111(sp)
2190 ; RV64I-NEXT: sb ra, 110(sp)
2191 ; RV64I-NEXT: sb s9, 109(sp)
2192 ; RV64I-NEXT: sb s8, 108(sp)
2193 ; RV64I-NEXT: sb s7, 107(sp)
2194 ; RV64I-NEXT: sb s6, 106(sp)
2195 ; RV64I-NEXT: sb s5, 105(sp)
2196 ; RV64I-NEXT: sb s4, 104(sp)
2197 ; RV64I-NEXT: sb s3, 103(sp)
2198 ; RV64I-NEXT: sb s2, 102(sp)
2199 ; RV64I-NEXT: sb s1, 101(sp)
2200 ; RV64I-NEXT: sb s0, 100(sp)
2201 ; RV64I-NEXT: sb t6, 99(sp)
2202 ; RV64I-NEXT: sb t5, 98(sp)
2203 ; RV64I-NEXT: sb t4, 97(sp)
2204 ; RV64I-NEXT: sb t3, 96(sp)
2205 ; RV64I-NEXT: sb zero, 87(sp)
2206 ; RV64I-NEXT: sb zero, 86(sp)
2207 ; RV64I-NEXT: sb zero, 85(sp)
2208 ; RV64I-NEXT: sb zero, 84(sp)
2209 ; RV64I-NEXT: sb zero, 83(sp)
2210 ; RV64I-NEXT: sb zero, 82(sp)
2211 ; RV64I-NEXT: sb zero, 81(sp)
2212 ; RV64I-NEXT: sb zero, 80(sp)
2213 ; RV64I-NEXT: sb zero, 79(sp)
2214 ; RV64I-NEXT: sb zero, 78(sp)
2215 ; RV64I-NEXT: sb zero, 77(sp)
2216 ; RV64I-NEXT: sb zero, 76(sp)
2217 ; RV64I-NEXT: sb zero, 75(sp)
2218 ; RV64I-NEXT: sb zero, 74(sp)
2219 ; RV64I-NEXT: sb zero, 73(sp)
2220 ; RV64I-NEXT: sb zero, 72(sp)
2221 ; RV64I-NEXT: sb zero, 71(sp)
2222 ; RV64I-NEXT: sb zero, 70(sp)
2223 ; RV64I-NEXT: sb zero, 69(sp)
2224 ; RV64I-NEXT: sb zero, 68(sp)
2225 ; RV64I-NEXT: sb zero, 67(sp)
2226 ; RV64I-NEXT: sb zero, 66(sp)
2227 ; RV64I-NEXT: sb zero, 65(sp)
2228 ; RV64I-NEXT: sb zero, 64(sp)
2229 ; RV64I-NEXT: sb zero, 63(sp)
2230 ; RV64I-NEXT: sb zero, 62(sp)
2231 ; RV64I-NEXT: sb zero, 61(sp)
2232 ; RV64I-NEXT: sb zero, 60(sp)
2233 ; RV64I-NEXT: sb zero, 59(sp)
2234 ; RV64I-NEXT: sb zero, 58(sp)
2235 ; RV64I-NEXT: sb zero, 57(sp)
2236 ; RV64I-NEXT: sb zero, 56(sp)
2237 ; RV64I-NEXT: sb t2, 95(sp)
2238 ; RV64I-NEXT: sb t1, 94(sp)
2239 ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
2240 ; RV64I-NEXT: sb a0, 93(sp)
2241 ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
2242 ; RV64I-NEXT: sb a0, 92(sp)
2243 ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
2244 ; RV64I-NEXT: sb a0, 91(sp)
2245 ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
2246 ; RV64I-NEXT: sb a0, 90(sp)
2247 ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
2248 ; RV64I-NEXT: sb a0, 89(sp)
2249 ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
2250 ; RV64I-NEXT: sb a0, 88(sp)
2251 ; RV64I-NEXT: slli a0, t0, 56
2252 ; RV64I-NEXT: srli a0, a0, 59
2253 ; RV64I-NEXT: addi a1, sp, 88
2254 ; RV64I-NEXT: sub a0, a1, a0
2255 ; RV64I-NEXT: lbu a1, 9(a0)
2256 ; RV64I-NEXT: lbu a3, 8(a0)
2257 ; RV64I-NEXT: lbu a4, 10(a0)
2258 ; RV64I-NEXT: lbu a5, 11(a0)
2259 ; RV64I-NEXT: slli a1, a1, 8
2260 ; RV64I-NEXT: or a1, a1, a3
2261 ; RV64I-NEXT: slli a4, a4, 16
2262 ; RV64I-NEXT: slli a5, a5, 24
2263 ; RV64I-NEXT: or a4, a5, a4
2264 ; RV64I-NEXT: or a1, a4, a1
2265 ; RV64I-NEXT: lbu a3, 13(a0)
2266 ; RV64I-NEXT: lbu a4, 12(a0)
2267 ; RV64I-NEXT: lbu a5, 14(a0)
2268 ; RV64I-NEXT: lbu a6, 15(a0)
2269 ; RV64I-NEXT: slli a3, a3, 8
2270 ; RV64I-NEXT: or a3, a3, a4
2271 ; RV64I-NEXT: slli a5, a5, 16
2272 ; RV64I-NEXT: slli a6, a6, 24
2273 ; RV64I-NEXT: or a4, a6, a5
2274 ; RV64I-NEXT: or a3, a4, a3
2275 ; RV64I-NEXT: slli a3, a3, 32
2276 ; RV64I-NEXT: or a3, a3, a1
2277 ; RV64I-NEXT: andi a1, t0, 7
2278 ; RV64I-NEXT: lbu a4, 1(a0)
2279 ; RV64I-NEXT: lbu a5, 0(a0)
2280 ; RV64I-NEXT: lbu a6, 2(a0)
2281 ; RV64I-NEXT: lbu a7, 3(a0)
2282 ; RV64I-NEXT: slli a4, a4, 8
2283 ; RV64I-NEXT: or a4, a4, a5
2284 ; RV64I-NEXT: slli a6, a6, 16
2285 ; RV64I-NEXT: slli a7, a7, 24
2286 ; RV64I-NEXT: or a5, a7, a6
2287 ; RV64I-NEXT: or a4, a5, a4
2288 ; RV64I-NEXT: lbu a5, 5(a0)
2289 ; RV64I-NEXT: lbu a6, 4(a0)
2290 ; RV64I-NEXT: lbu a7, 6(a0)
2291 ; RV64I-NEXT: lbu t0, 7(a0)
2292 ; RV64I-NEXT: slli a5, a5, 8
2293 ; RV64I-NEXT: or a5, a5, a6
2294 ; RV64I-NEXT: slli a7, a7, 16
2295 ; RV64I-NEXT: slli t0, t0, 24
2296 ; RV64I-NEXT: or a6, t0, a7
2297 ; RV64I-NEXT: or a5, a6, a5
2298 ; RV64I-NEXT: slli a5, a5, 32
2299 ; RV64I-NEXT: or a4, a5, a4
2300 ; RV64I-NEXT: lbu a5, 25(a0)
2301 ; RV64I-NEXT: lbu a6, 24(a0)
2302 ; RV64I-NEXT: lbu a7, 26(a0)
2303 ; RV64I-NEXT: lbu t0, 27(a0)
2304 ; RV64I-NEXT: slli a5, a5, 8
2305 ; RV64I-NEXT: or a5, a5, a6
2306 ; RV64I-NEXT: slli a7, a7, 16
2307 ; RV64I-NEXT: slli t0, t0, 24
2308 ; RV64I-NEXT: or a6, t0, a7
2309 ; RV64I-NEXT: or a5, a6, a5
2310 ; RV64I-NEXT: lbu a6, 29(a0)
2311 ; RV64I-NEXT: lbu a7, 28(a0)
2312 ; RV64I-NEXT: lbu t0, 30(a0)
2313 ; RV64I-NEXT: lbu t1, 31(a0)
2314 ; RV64I-NEXT: slli a6, a6, 8
2315 ; RV64I-NEXT: or a6, a6, a7
2316 ; RV64I-NEXT: slli t0, t0, 16
2317 ; RV64I-NEXT: slli t1, t1, 24
2318 ; RV64I-NEXT: or a7, t1, t0
2319 ; RV64I-NEXT: or a6, a7, a6
2320 ; RV64I-NEXT: slli a6, a6, 32
2321 ; RV64I-NEXT: or a5, a6, a5
2322 ; RV64I-NEXT: lbu a6, 17(a0)
2323 ; RV64I-NEXT: lbu a7, 16(a0)
2324 ; RV64I-NEXT: lbu t0, 18(a0)
2325 ; RV64I-NEXT: lbu t1, 19(a0)
2326 ; RV64I-NEXT: slli a6, a6, 8
2327 ; RV64I-NEXT: or a6, a6, a7
2328 ; RV64I-NEXT: slli t0, t0, 16
2329 ; RV64I-NEXT: slli t1, t1, 24
2330 ; RV64I-NEXT: lbu a7, 21(a0)
2331 ; RV64I-NEXT: or t0, t1, t0
2332 ; RV64I-NEXT: or a6, t0, a6
2333 ; RV64I-NEXT: lbu t0, 20(a0)
2334 ; RV64I-NEXT: slli a7, a7, 8
2335 ; RV64I-NEXT: lbu t1, 22(a0)
2336 ; RV64I-NEXT: lbu a0, 23(a0)
2337 ; RV64I-NEXT: or a7, a7, t0
2338 ; RV64I-NEXT: srli t0, a4, 1
2339 ; RV64I-NEXT: slli t1, t1, 16
2340 ; RV64I-NEXT: slli a0, a0, 24
2341 ; RV64I-NEXT: or t1, a0, t1
2342 ; RV64I-NEXT: xori t2, a1, 63
2343 ; RV64I-NEXT: srl a0, t0, t2
2344 ; RV64I-NEXT: or a7, t1, a7
2345 ; RV64I-NEXT: slli a7, a7, 32
2346 ; RV64I-NEXT: or a6, a7, a6
2347 ; RV64I-NEXT: srli a7, a6, 1
2348 ; RV64I-NEXT: srl a7, a7, t2
2349 ; RV64I-NEXT: srli t0, a3, 1
2350 ; RV64I-NEXT: not t1, a1
2351 ; RV64I-NEXT: srl t0, t0, t1
2352 ; RV64I-NEXT: sll a3, a3, a1
2353 ; RV64I-NEXT: sll a5, a5, a1
2354 ; RV64I-NEXT: sll a6, a6, a1
2355 ; RV64I-NEXT: sll a1, a4, a1
2356 ; RV64I-NEXT: srli a4, a6, 56
2357 ; RV64I-NEXT: sb a4, 23(a2)
2358 ; RV64I-NEXT: srli a4, a6, 48
2359 ; RV64I-NEXT: sb a4, 22(a2)
2360 ; RV64I-NEXT: srli a4, a6, 40
2361 ; RV64I-NEXT: sb a4, 21(a2)
2362 ; RV64I-NEXT: srli a4, a6, 32
2363 ; RV64I-NEXT: sb a4, 20(a2)
2364 ; RV64I-NEXT: srli a4, a6, 24
2365 ; RV64I-NEXT: sb a4, 19(a2)
2366 ; RV64I-NEXT: srli a4, a6, 16
2367 ; RV64I-NEXT: sb a4, 18(a2)
2368 ; RV64I-NEXT: or a4, a6, t0
2369 ; RV64I-NEXT: srli a6, a6, 8
2370 ; RV64I-NEXT: sb a6, 17(a2)
2371 ; RV64I-NEXT: srli a6, a5, 56
2372 ; RV64I-NEXT: sb a6, 31(a2)
2373 ; RV64I-NEXT: srli a6, a5, 48
2374 ; RV64I-NEXT: sb a6, 30(a2)
2375 ; RV64I-NEXT: srli a6, a5, 40
2376 ; RV64I-NEXT: sb a6, 29(a2)
2377 ; RV64I-NEXT: srli a6, a5, 32
2378 ; RV64I-NEXT: sb a6, 28(a2)
2379 ; RV64I-NEXT: srli a6, a5, 24
2380 ; RV64I-NEXT: sb a6, 27(a2)
2381 ; RV64I-NEXT: srli a6, a5, 16
2382 ; RV64I-NEXT: sb a6, 26(a2)
2383 ; RV64I-NEXT: or a6, a5, a7
2384 ; RV64I-NEXT: srli a5, a5, 8
2385 ; RV64I-NEXT: sb a5, 25(a2)
2386 ; RV64I-NEXT: srli a5, a1, 56
2387 ; RV64I-NEXT: sb a5, 7(a2)
2388 ; RV64I-NEXT: srli a5, a1, 48
2389 ; RV64I-NEXT: sb a5, 6(a2)
2390 ; RV64I-NEXT: srli a5, a1, 40
2391 ; RV64I-NEXT: sb a5, 5(a2)
2392 ; RV64I-NEXT: srli a5, a1, 32
2393 ; RV64I-NEXT: sb a5, 4(a2)
2394 ; RV64I-NEXT: srli a5, a1, 24
2395 ; RV64I-NEXT: sb a5, 3(a2)
2396 ; RV64I-NEXT: srli a5, a1, 16
2397 ; RV64I-NEXT: sb a5, 2(a2)
2398 ; RV64I-NEXT: sb a1, 0(a2)
2399 ; RV64I-NEXT: srli a1, a1, 8
2400 ; RV64I-NEXT: sb a1, 1(a2)
2401 ; RV64I-NEXT: srli a1, a3, 56
2402 ; RV64I-NEXT: sb a1, 15(a2)
2403 ; RV64I-NEXT: srli a1, a3, 48
2404 ; RV64I-NEXT: sb a1, 14(a2)
2405 ; RV64I-NEXT: srli a1, a3, 40
2406 ; RV64I-NEXT: sb a1, 13(a2)
2407 ; RV64I-NEXT: srli a1, a3, 32
2408 ; RV64I-NEXT: sb a1, 12(a2)
2409 ; RV64I-NEXT: srli a1, a3, 24
2410 ; RV64I-NEXT: sb a1, 11(a2)
2411 ; RV64I-NEXT: srli a1, a3, 16
2412 ; RV64I-NEXT: sb a1, 10(a2)
2413 ; RV64I-NEXT: or a0, a3, a0
2414 ; RV64I-NEXT: srli a3, a3, 8
2415 ; RV64I-NEXT: sb a3, 9(a2)
2416 ; RV64I-NEXT: sb a4, 16(a2)
2417 ; RV64I-NEXT: sb a6, 24(a2)
2418 ; RV64I-NEXT: sb a0, 8(a2)
2419 ; RV64I-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
2420 ; RV64I-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
2421 ; RV64I-NEXT: ld s1, 200(sp) # 8-byte Folded Reload
2422 ; RV64I-NEXT: ld s2, 192(sp) # 8-byte Folded Reload
2423 ; RV64I-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
2424 ; RV64I-NEXT: ld s4, 176(sp) # 8-byte Folded Reload
2425 ; RV64I-NEXT: ld s5, 168(sp) # 8-byte Folded Reload
2426 ; RV64I-NEXT: ld s6, 160(sp) # 8-byte Folded Reload
2427 ; RV64I-NEXT: ld s7, 152(sp) # 8-byte Folded Reload
2428 ; RV64I-NEXT: ld s8, 144(sp) # 8-byte Folded Reload
2429 ; RV64I-NEXT: ld s9, 136(sp) # 8-byte Folded Reload
2430 ; RV64I-NEXT: ld s10, 128(sp) # 8-byte Folded Reload
2431 ; RV64I-NEXT: ld s11, 120(sp) # 8-byte Folded Reload
2432 ; RV64I-NEXT: addi sp, sp, 224
2435 ; RV32I-LABEL: shl_32bytes:
2437 ; RV32I-NEXT: addi sp, sp, -144
2438 ; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
2439 ; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
2440 ; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
2441 ; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
2442 ; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
2443 ; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
2444 ; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
2445 ; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
2446 ; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
2447 ; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
2448 ; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
2449 ; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
2450 ; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
2451 ; RV32I-NEXT: lbu a3, 0(a0)
2452 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
2453 ; RV32I-NEXT: lbu a3, 1(a0)
2454 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
2455 ; RV32I-NEXT: lbu a3, 2(a0)
2456 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill
2457 ; RV32I-NEXT: lbu a3, 3(a0)
2458 ; RV32I-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
2459 ; RV32I-NEXT: lbu a3, 4(a0)
2460 ; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
2461 ; RV32I-NEXT: lbu a3, 5(a0)
2462 ; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
2463 ; RV32I-NEXT: lbu t1, 6(a0)
2464 ; RV32I-NEXT: lbu t2, 7(a0)
2465 ; RV32I-NEXT: lbu t3, 8(a0)
2466 ; RV32I-NEXT: lbu t4, 9(a0)
2467 ; RV32I-NEXT: lbu t5, 10(a0)
2468 ; RV32I-NEXT: lbu t6, 11(a0)
2469 ; RV32I-NEXT: lbu s0, 12(a0)
2470 ; RV32I-NEXT: lbu s1, 13(a0)
2471 ; RV32I-NEXT: lbu s2, 14(a0)
2472 ; RV32I-NEXT: lbu s3, 15(a0)
2473 ; RV32I-NEXT: lbu s4, 16(a0)
2474 ; RV32I-NEXT: lbu s5, 17(a0)
2475 ; RV32I-NEXT: lbu s6, 18(a0)
2476 ; RV32I-NEXT: lbu s7, 19(a0)
2477 ; RV32I-NEXT: lbu s10, 1(a1)
2478 ; RV32I-NEXT: lbu s8, 20(a0)
2479 ; RV32I-NEXT: lbu s9, 21(a0)
2480 ; RV32I-NEXT: lbu s11, 0(a1)
2481 ; RV32I-NEXT: slli s10, s10, 8
2482 ; RV32I-NEXT: lbu ra, 2(a1)
2483 ; RV32I-NEXT: lbu a1, 3(a1)
2484 ; RV32I-NEXT: or s10, s10, s11
2485 ; RV32I-NEXT: lbu s11, 22(a0)
2486 ; RV32I-NEXT: slli ra, ra, 16
2487 ; RV32I-NEXT: slli a1, a1, 24
2488 ; RV32I-NEXT: or a1, a1, ra
2489 ; RV32I-NEXT: lbu ra, 23(a0)
2490 ; RV32I-NEXT: or t0, a1, s10
2491 ; RV32I-NEXT: lbu s10, 24(a0)
2492 ; RV32I-NEXT: lbu a7, 25(a0)
2493 ; RV32I-NEXT: lbu a6, 26(a0)
2494 ; RV32I-NEXT: lbu a5, 27(a0)
2495 ; RV32I-NEXT: lbu a1, 31(a0)
2496 ; RV32I-NEXT: lbu a3, 30(a0)
2497 ; RV32I-NEXT: lbu a4, 29(a0)
2498 ; RV32I-NEXT: lbu a0, 28(a0)
2499 ; RV32I-NEXT: sb a1, 91(sp)
2500 ; RV32I-NEXT: sb a3, 90(sp)
2501 ; RV32I-NEXT: sb a4, 89(sp)
2502 ; RV32I-NEXT: sb a0, 88(sp)
2503 ; RV32I-NEXT: sb a5, 87(sp)
2504 ; RV32I-NEXT: sb a6, 86(sp)
2505 ; RV32I-NEXT: sb a7, 85(sp)
2506 ; RV32I-NEXT: sb s10, 84(sp)
2507 ; RV32I-NEXT: sb ra, 83(sp)
2508 ; RV32I-NEXT: sb s11, 82(sp)
2509 ; RV32I-NEXT: sb s9, 81(sp)
2510 ; RV32I-NEXT: sb s8, 80(sp)
2511 ; RV32I-NEXT: sb s7, 79(sp)
2512 ; RV32I-NEXT: sb s6, 78(sp)
2513 ; RV32I-NEXT: sb s5, 77(sp)
2514 ; RV32I-NEXT: sb s4, 76(sp)
2515 ; RV32I-NEXT: sb zero, 59(sp)
2516 ; RV32I-NEXT: sb zero, 58(sp)
2517 ; RV32I-NEXT: sb zero, 57(sp)
2518 ; RV32I-NEXT: sb zero, 56(sp)
2519 ; RV32I-NEXT: sb zero, 55(sp)
2520 ; RV32I-NEXT: sb zero, 54(sp)
2521 ; RV32I-NEXT: sb zero, 53(sp)
2522 ; RV32I-NEXT: sb zero, 52(sp)
2523 ; RV32I-NEXT: sb zero, 51(sp)
2524 ; RV32I-NEXT: sb zero, 50(sp)
2525 ; RV32I-NEXT: sb zero, 49(sp)
2526 ; RV32I-NEXT: sb zero, 48(sp)
2527 ; RV32I-NEXT: sb zero, 47(sp)
2528 ; RV32I-NEXT: sb zero, 46(sp)
2529 ; RV32I-NEXT: sb zero, 45(sp)
2530 ; RV32I-NEXT: sb zero, 44(sp)
2531 ; RV32I-NEXT: sb zero, 43(sp)
2532 ; RV32I-NEXT: sb zero, 42(sp)
2533 ; RV32I-NEXT: sb zero, 41(sp)
2534 ; RV32I-NEXT: sb zero, 40(sp)
2535 ; RV32I-NEXT: sb zero, 39(sp)
2536 ; RV32I-NEXT: sb zero, 38(sp)
2537 ; RV32I-NEXT: sb zero, 37(sp)
2538 ; RV32I-NEXT: sb zero, 36(sp)
2539 ; RV32I-NEXT: sb zero, 35(sp)
2540 ; RV32I-NEXT: sb zero, 34(sp)
2541 ; RV32I-NEXT: sb zero, 33(sp)
2542 ; RV32I-NEXT: sb zero, 32(sp)
2543 ; RV32I-NEXT: sb zero, 31(sp)
2544 ; RV32I-NEXT: sb zero, 30(sp)
2545 ; RV32I-NEXT: sb zero, 29(sp)
2546 ; RV32I-NEXT: sb zero, 28(sp)
2547 ; RV32I-NEXT: sb s3, 75(sp)
2548 ; RV32I-NEXT: sb s2, 74(sp)
2549 ; RV32I-NEXT: sb s1, 73(sp)
2550 ; RV32I-NEXT: sb s0, 72(sp)
2551 ; RV32I-NEXT: sb t6, 71(sp)
2552 ; RV32I-NEXT: sb t5, 70(sp)
2553 ; RV32I-NEXT: sb t4, 69(sp)
2554 ; RV32I-NEXT: sb t3, 68(sp)
2555 ; RV32I-NEXT: sb t2, 67(sp)
2556 ; RV32I-NEXT: sb t1, 66(sp)
2557 ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
2558 ; RV32I-NEXT: sb a0, 65(sp)
2559 ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
2560 ; RV32I-NEXT: sb a0, 64(sp)
2561 ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
2562 ; RV32I-NEXT: sb a0, 63(sp)
2563 ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
2564 ; RV32I-NEXT: sb a0, 62(sp)
2565 ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
2566 ; RV32I-NEXT: sb a0, 61(sp)
2567 ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
2568 ; RV32I-NEXT: sb a0, 60(sp)
2569 ; RV32I-NEXT: slli a0, t0, 24
2570 ; RV32I-NEXT: srli a0, a0, 27
2571 ; RV32I-NEXT: addi a4, sp, 60
2572 ; RV32I-NEXT: sub a4, a4, a0
2573 ; RV32I-NEXT: lbu a0, 5(a4)
2574 ; RV32I-NEXT: lbu a1, 4(a4)
2575 ; RV32I-NEXT: lbu a3, 6(a4)
2576 ; RV32I-NEXT: lbu a5, 7(a4)
2577 ; RV32I-NEXT: slli a0, a0, 8
2578 ; RV32I-NEXT: or a0, a0, a1
2579 ; RV32I-NEXT: slli a3, a3, 16
2580 ; RV32I-NEXT: slli a5, a5, 24
2581 ; RV32I-NEXT: or a3, a5, a3
2582 ; RV32I-NEXT: or t5, a3, a0
2583 ; RV32I-NEXT: andi a1, t0, 7
2584 ; RV32I-NEXT: lbu a0, 1(a4)
2585 ; RV32I-NEXT: lbu a3, 0(a4)
2586 ; RV32I-NEXT: lbu a5, 2(a4)
2587 ; RV32I-NEXT: lbu a6, 3(a4)
2588 ; RV32I-NEXT: slli a0, a0, 8
2589 ; RV32I-NEXT: or a0, a0, a3
2590 ; RV32I-NEXT: slli a5, a5, 16
2591 ; RV32I-NEXT: slli a6, a6, 24
2592 ; RV32I-NEXT: or a3, a6, a5
2593 ; RV32I-NEXT: or a6, a3, a0
2594 ; RV32I-NEXT: srli a0, a6, 1
2595 ; RV32I-NEXT: xori a7, a1, 31
2596 ; RV32I-NEXT: srl a0, a0, a7
2597 ; RV32I-NEXT: lbu a3, 13(a4)
2598 ; RV32I-NEXT: lbu a5, 12(a4)
2599 ; RV32I-NEXT: lbu t0, 14(a4)
2600 ; RV32I-NEXT: lbu t1, 15(a4)
2601 ; RV32I-NEXT: slli a3, a3, 8
2602 ; RV32I-NEXT: or a3, a3, a5
2603 ; RV32I-NEXT: slli t0, t0, 16
2604 ; RV32I-NEXT: slli t1, t1, 24
2605 ; RV32I-NEXT: or a5, t1, t0
2606 ; RV32I-NEXT: or t0, a5, a3
2607 ; RV32I-NEXT: lbu a3, 9(a4)
2608 ; RV32I-NEXT: lbu a5, 8(a4)
2609 ; RV32I-NEXT: lbu t1, 10(a4)
2610 ; RV32I-NEXT: lbu t2, 11(a4)
2611 ; RV32I-NEXT: slli a3, a3, 8
2612 ; RV32I-NEXT: or a3, a3, a5
2613 ; RV32I-NEXT: slli t1, t1, 16
2614 ; RV32I-NEXT: slli t2, t2, 24
2615 ; RV32I-NEXT: or a5, t2, t1
2616 ; RV32I-NEXT: or t1, a5, a3
2617 ; RV32I-NEXT: srli a3, t1, 1
2618 ; RV32I-NEXT: srl a5, a3, a7
2619 ; RV32I-NEXT: srli t4, t5, 1
2620 ; RV32I-NEXT: not t2, a1
2621 ; RV32I-NEXT: lbu a3, 21(a4)
2622 ; RV32I-NEXT: lbu t3, 20(a4)
2623 ; RV32I-NEXT: lbu t6, 22(a4)
2624 ; RV32I-NEXT: lbu s0, 23(a4)
2625 ; RV32I-NEXT: slli a3, a3, 8
2626 ; RV32I-NEXT: or a3, a3, t3
2627 ; RV32I-NEXT: slli t6, t6, 16
2628 ; RV32I-NEXT: slli s0, s0, 24
2629 ; RV32I-NEXT: or t3, s0, t6
2630 ; RV32I-NEXT: or t3, t3, a3
2631 ; RV32I-NEXT: lbu a3, 17(a4)
2632 ; RV32I-NEXT: lbu t6, 16(a4)
2633 ; RV32I-NEXT: lbu s0, 18(a4)
2634 ; RV32I-NEXT: lbu s1, 19(a4)
2635 ; RV32I-NEXT: slli a3, a3, 8
2636 ; RV32I-NEXT: or a3, a3, t6
2637 ; RV32I-NEXT: slli s0, s0, 16
2638 ; RV32I-NEXT: slli s1, s1, 24
2639 ; RV32I-NEXT: or s0, s1, s0
2640 ; RV32I-NEXT: or s0, s0, a3
2641 ; RV32I-NEXT: lbu a3, 29(a4)
2642 ; RV32I-NEXT: lbu t6, 28(a4)
2643 ; RV32I-NEXT: lbu s1, 30(a4)
2644 ; RV32I-NEXT: lbu s2, 31(a4)
2645 ; RV32I-NEXT: slli a3, a3, 8
2646 ; RV32I-NEXT: or a3, a3, t6
2647 ; RV32I-NEXT: slli s1, s1, 16
2648 ; RV32I-NEXT: slli s2, s2, 24
2649 ; RV32I-NEXT: or t6, s2, s1
2650 ; RV32I-NEXT: lbu s1, 25(a4)
2651 ; RV32I-NEXT: lbu s2, 24(a4)
2652 ; RV32I-NEXT: srl t4, t4, t2
2653 ; RV32I-NEXT: or t6, t6, a3
2654 ; RV32I-NEXT: slli s1, s1, 8
2655 ; RV32I-NEXT: or a3, s1, s2
2656 ; RV32I-NEXT: lbu s1, 26(a4)
2657 ; RV32I-NEXT: lbu a4, 27(a4)
2658 ; RV32I-NEXT: srli s2, s0, 1
2659 ; RV32I-NEXT: srl s2, s2, a7
2660 ; RV32I-NEXT: slli s1, s1, 16
2661 ; RV32I-NEXT: slli a4, a4, 24
2662 ; RV32I-NEXT: or a4, a4, s1
2663 ; RV32I-NEXT: srli s1, t0, 1
2664 ; RV32I-NEXT: srl s1, s1, t2
2665 ; RV32I-NEXT: or a4, a4, a3
2666 ; RV32I-NEXT: srli a3, a4, 1
2667 ; RV32I-NEXT: srl a7, a3, a7
2668 ; RV32I-NEXT: srli a3, t3, 1
2669 ; RV32I-NEXT: srl t2, a3, t2
2670 ; RV32I-NEXT: sll a3, t5, a1
2671 ; RV32I-NEXT: sll t0, t0, a1
2672 ; RV32I-NEXT: sll t1, t1, a1
2673 ; RV32I-NEXT: sll t3, t3, a1
2674 ; RV32I-NEXT: sll t5, s0, a1
2675 ; RV32I-NEXT: sll t6, t6, a1
2676 ; RV32I-NEXT: sll a4, a4, a1
2677 ; RV32I-NEXT: sll a1, a6, a1
2678 ; RV32I-NEXT: srli a6, a4, 24
2679 ; RV32I-NEXT: sb a6, 27(a2)
2680 ; RV32I-NEXT: srli a6, a4, 16
2681 ; RV32I-NEXT: sb a6, 26(a2)
2682 ; RV32I-NEXT: or a6, a4, t2
2683 ; RV32I-NEXT: srli a4, a4, 8
2684 ; RV32I-NEXT: sb a4, 25(a2)
2685 ; RV32I-NEXT: srli a4, t6, 24
2686 ; RV32I-NEXT: sb a4, 31(a2)
2687 ; RV32I-NEXT: srli a4, t6, 16
2688 ; RV32I-NEXT: sb a4, 30(a2)
2689 ; RV32I-NEXT: or a4, t6, a7
2690 ; RV32I-NEXT: srli a7, t6, 8
2691 ; RV32I-NEXT: sb a7, 29(a2)
2692 ; RV32I-NEXT: srli a7, t5, 24
2693 ; RV32I-NEXT: sb a7, 19(a2)
2694 ; RV32I-NEXT: srli a7, t5, 16
2695 ; RV32I-NEXT: sb a7, 18(a2)
2696 ; RV32I-NEXT: or a7, t5, s1
2697 ; RV32I-NEXT: srli t2, t5, 8
2698 ; RV32I-NEXT: sb t2, 17(a2)
2699 ; RV32I-NEXT: srli t2, t3, 24
2700 ; RV32I-NEXT: sb t2, 23(a2)
2701 ; RV32I-NEXT: srli t2, t3, 16
2702 ; RV32I-NEXT: sb t2, 22(a2)
2703 ; RV32I-NEXT: or t2, t3, s2
2704 ; RV32I-NEXT: srli t3, t3, 8
2705 ; RV32I-NEXT: sb t3, 21(a2)
2706 ; RV32I-NEXT: srli t3, t1, 24
2707 ; RV32I-NEXT: sb t3, 11(a2)
2708 ; RV32I-NEXT: srli t3, t1, 16
2709 ; RV32I-NEXT: sb t3, 10(a2)
2710 ; RV32I-NEXT: or t3, t1, t4
2711 ; RV32I-NEXT: srli t1, t1, 8
2712 ; RV32I-NEXT: sb t1, 9(a2)
2713 ; RV32I-NEXT: srli t1, t0, 24
2714 ; RV32I-NEXT: sb t1, 15(a2)
2715 ; RV32I-NEXT: srli t1, t0, 16
2716 ; RV32I-NEXT: sb t1, 14(a2)
2717 ; RV32I-NEXT: or a5, t0, a5
2718 ; RV32I-NEXT: srli t0, t0, 8
2719 ; RV32I-NEXT: sb t0, 13(a2)
2720 ; RV32I-NEXT: srli t0, a1, 24
2721 ; RV32I-NEXT: sb t0, 3(a2)
2722 ; RV32I-NEXT: srli t0, a1, 16
2723 ; RV32I-NEXT: sb t0, 2(a2)
2724 ; RV32I-NEXT: sb a1, 0(a2)
2725 ; RV32I-NEXT: srli a1, a1, 8
2726 ; RV32I-NEXT: sb a1, 1(a2)
2727 ; RV32I-NEXT: srli a1, a3, 24
2728 ; RV32I-NEXT: sb a1, 7(a2)
2729 ; RV32I-NEXT: srli a1, a3, 16
2730 ; RV32I-NEXT: sb a1, 6(a2)
2731 ; RV32I-NEXT: or a0, a3, a0
2732 ; RV32I-NEXT: srli a3, a3, 8
2733 ; RV32I-NEXT: sb a3, 5(a2)
2734 ; RV32I-NEXT: sb a6, 24(a2)
2735 ; RV32I-NEXT: sb a4, 28(a2)
2736 ; RV32I-NEXT: sb a7, 16(a2)
2737 ; RV32I-NEXT: sb t2, 20(a2)
2738 ; RV32I-NEXT: sb t3, 8(a2)
2739 ; RV32I-NEXT: sb a5, 12(a2)
2740 ; RV32I-NEXT: sb a0, 4(a2)
2741 ; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
2742 ; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
2743 ; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
2744 ; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
2745 ; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
2746 ; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
2747 ; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
2748 ; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
2749 ; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
2750 ; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
2751 ; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
2752 ; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
2753 ; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
2754 ; RV32I-NEXT: addi sp, sp, 144
2756 %src = load i256, ptr %src.ptr, align 1
2757 %bitOff = load i256, ptr %bitOff.ptr, align 1
2758 %res = shl i256 %src, %bitOff
2759 store i256 %res, ptr %dst, align 1
2762 define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
2763 ; RV64I-LABEL: ashr_32bytes:
2765 ; RV64I-NEXT: addi sp, sp, -224
2766 ; RV64I-NEXT: sd ra, 216(sp) # 8-byte Folded Spill
2767 ; RV64I-NEXT: sd s0, 208(sp) # 8-byte Folded Spill
2768 ; RV64I-NEXT: sd s1, 200(sp) # 8-byte Folded Spill
2769 ; RV64I-NEXT: sd s2, 192(sp) # 8-byte Folded Spill
2770 ; RV64I-NEXT: sd s3, 184(sp) # 8-byte Folded Spill
2771 ; RV64I-NEXT: sd s4, 176(sp) # 8-byte Folded Spill
2772 ; RV64I-NEXT: sd s5, 168(sp) # 8-byte Folded Spill
2773 ; RV64I-NEXT: sd s6, 160(sp) # 8-byte Folded Spill
2774 ; RV64I-NEXT: sd s7, 152(sp) # 8-byte Folded Spill
2775 ; RV64I-NEXT: sd s8, 144(sp) # 8-byte Folded Spill
2776 ; RV64I-NEXT: sd s9, 136(sp) # 8-byte Folded Spill
2777 ; RV64I-NEXT: sd s10, 128(sp) # 8-byte Folded Spill
2778 ; RV64I-NEXT: sd s11, 120(sp) # 8-byte Folded Spill
2779 ; RV64I-NEXT: lbu t1, 31(a0)
2780 ; RV64I-NEXT: lbu a3, 0(a0)
2781 ; RV64I-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
2782 ; RV64I-NEXT: lbu a3, 1(a0)
2783 ; RV64I-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
2784 ; RV64I-NEXT: lbu a3, 2(a0)
2785 ; RV64I-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
2786 ; RV64I-NEXT: lbu a3, 3(a0)
2787 ; RV64I-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
2788 ; RV64I-NEXT: lbu a3, 4(a0)
2789 ; RV64I-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
2790 ; RV64I-NEXT: lbu a3, 5(a0)
2791 ; RV64I-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
2792 ; RV64I-NEXT: lbu t3, 6(a0)
2793 ; RV64I-NEXT: lbu t4, 7(a0)
2794 ; RV64I-NEXT: lbu t5, 8(a0)
2795 ; RV64I-NEXT: lbu t6, 9(a0)
2796 ; RV64I-NEXT: lbu s0, 10(a0)
2797 ; RV64I-NEXT: lbu s1, 11(a0)
2798 ; RV64I-NEXT: lbu s2, 12(a0)
2799 ; RV64I-NEXT: lbu s3, 13(a0)
2800 ; RV64I-NEXT: lbu s4, 14(a0)
2801 ; RV64I-NEXT: lbu s5, 15(a0)
2802 ; RV64I-NEXT: lbu s6, 16(a0)
2803 ; RV64I-NEXT: lbu s7, 17(a0)
2804 ; RV64I-NEXT: lbu s8, 18(a0)
2805 ; RV64I-NEXT: lbu s9, 19(a0)
2806 ; RV64I-NEXT: lbu a3, 1(a1)
2807 ; RV64I-NEXT: lbu s10, 0(a1)
2808 ; RV64I-NEXT: lbu s11, 2(a1)
2809 ; RV64I-NEXT: lbu ra, 3(a1)
2810 ; RV64I-NEXT: slli a3, a3, 8
2811 ; RV64I-NEXT: or a3, a3, s10
2812 ; RV64I-NEXT: slli s11, s11, 16
2813 ; RV64I-NEXT: slli ra, ra, 24
2814 ; RV64I-NEXT: lbu s10, 5(a1)
2815 ; RV64I-NEXT: or s11, ra, s11
2816 ; RV64I-NEXT: or a3, s11, a3
2817 ; RV64I-NEXT: lbu s11, 4(a1)
2818 ; RV64I-NEXT: slli s10, s10, 8
2819 ; RV64I-NEXT: lbu ra, 6(a1)
2820 ; RV64I-NEXT: lbu a1, 7(a1)
2821 ; RV64I-NEXT: or s10, s10, s11
2822 ; RV64I-NEXT: lbu s11, 20(a0)
2823 ; RV64I-NEXT: slli ra, ra, 16
2824 ; RV64I-NEXT: slli a1, a1, 24
2825 ; RV64I-NEXT: or a1, a1, ra
2826 ; RV64I-NEXT: lbu ra, 21(a0)
2827 ; RV64I-NEXT: or a1, a1, s10
2828 ; RV64I-NEXT: lbu s10, 22(a0)
2829 ; RV64I-NEXT: slli a1, a1, 32
2830 ; RV64I-NEXT: or t2, a1, a3
2831 ; RV64I-NEXT: lbu t0, 23(a0)
2832 ; RV64I-NEXT: lbu a7, 24(a0)
2833 ; RV64I-NEXT: lbu a6, 25(a0)
2834 ; RV64I-NEXT: lbu a5, 26(a0)
2835 ; RV64I-NEXT: lbu a1, 30(a0)
2836 ; RV64I-NEXT: lbu a3, 29(a0)
2837 ; RV64I-NEXT: lbu a4, 28(a0)
2838 ; RV64I-NEXT: lbu a0, 27(a0)
2839 ; RV64I-NEXT: sb a1, 86(sp)
2840 ; RV64I-NEXT: sb a3, 85(sp)
2841 ; RV64I-NEXT: sb a4, 84(sp)
2842 ; RV64I-NEXT: sb a0, 83(sp)
2843 ; RV64I-NEXT: sb a5, 82(sp)
2844 ; RV64I-NEXT: sb a6, 81(sp)
2845 ; RV64I-NEXT: sb a7, 80(sp)
2846 ; RV64I-NEXT: sb t0, 79(sp)
2847 ; RV64I-NEXT: sb s10, 78(sp)
2848 ; RV64I-NEXT: sb ra, 77(sp)
2849 ; RV64I-NEXT: sb s11, 76(sp)
2850 ; RV64I-NEXT: sb s9, 75(sp)
2851 ; RV64I-NEXT: sb s8, 74(sp)
2852 ; RV64I-NEXT: sb s7, 73(sp)
2853 ; RV64I-NEXT: sb s6, 72(sp)
2854 ; RV64I-NEXT: sb s5, 71(sp)
2855 ; RV64I-NEXT: sb s4, 70(sp)
2856 ; RV64I-NEXT: sb s3, 69(sp)
2857 ; RV64I-NEXT: sb s2, 68(sp)
2858 ; RV64I-NEXT: sb s1, 67(sp)
2859 ; RV64I-NEXT: sb s0, 66(sp)
2860 ; RV64I-NEXT: sb t6, 65(sp)
2861 ; RV64I-NEXT: sb t5, 64(sp)
2862 ; RV64I-NEXT: sb t1, 87(sp)
2863 ; RV64I-NEXT: slli t1, t1, 56
2864 ; RV64I-NEXT: sb t4, 63(sp)
2865 ; RV64I-NEXT: sb t3, 62(sp)
2866 ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
2867 ; RV64I-NEXT: sb a0, 61(sp)
2868 ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
2869 ; RV64I-NEXT: sb a0, 60(sp)
2870 ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
2871 ; RV64I-NEXT: sb a0, 59(sp)
2872 ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
2873 ; RV64I-NEXT: sb a0, 58(sp)
2874 ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
2875 ; RV64I-NEXT: sb a0, 57(sp)
2876 ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
2877 ; RV64I-NEXT: sb a0, 56(sp)
2878 ; RV64I-NEXT: srai a0, t1, 63
2879 ; RV64I-NEXT: sb a0, 112(sp)
2880 ; RV64I-NEXT: sb a0, 104(sp)
2881 ; RV64I-NEXT: sb a0, 96(sp)
2882 ; RV64I-NEXT: sb a0, 88(sp)
2883 ; RV64I-NEXT: srli a1, a0, 56
2884 ; RV64I-NEXT: sb a1, 119(sp)
2885 ; RV64I-NEXT: srli a3, a0, 48
2886 ; RV64I-NEXT: sb a3, 118(sp)
2887 ; RV64I-NEXT: srli a4, a0, 40
2888 ; RV64I-NEXT: sb a4, 117(sp)
2889 ; RV64I-NEXT: srli a5, a0, 32
2890 ; RV64I-NEXT: sb a5, 116(sp)
2891 ; RV64I-NEXT: srli a6, a0, 24
2892 ; RV64I-NEXT: sb a6, 115(sp)
2893 ; RV64I-NEXT: srli a7, a0, 16
2894 ; RV64I-NEXT: sb a7, 114(sp)
2895 ; RV64I-NEXT: srli a0, a0, 8
2896 ; RV64I-NEXT: sb a0, 113(sp)
2897 ; RV64I-NEXT: sb a1, 111(sp)
2898 ; RV64I-NEXT: sb a3, 110(sp)
2899 ; RV64I-NEXT: sb a4, 109(sp)
2900 ; RV64I-NEXT: sb a5, 108(sp)
2901 ; RV64I-NEXT: sb a6, 107(sp)
2902 ; RV64I-NEXT: sb a7, 106(sp)
2903 ; RV64I-NEXT: sb a0, 105(sp)
2904 ; RV64I-NEXT: sb a1, 103(sp)
2905 ; RV64I-NEXT: sb a3, 102(sp)
2906 ; RV64I-NEXT: sb a4, 101(sp)
2907 ; RV64I-NEXT: sb a5, 100(sp)
2908 ; RV64I-NEXT: sb a6, 99(sp)
2909 ; RV64I-NEXT: sb a7, 98(sp)
2910 ; RV64I-NEXT: sb a0, 97(sp)
2911 ; RV64I-NEXT: sb a1, 95(sp)
2912 ; RV64I-NEXT: sb a3, 94(sp)
2913 ; RV64I-NEXT: sb a4, 93(sp)
2914 ; RV64I-NEXT: sb a5, 92(sp)
2915 ; RV64I-NEXT: sb a6, 91(sp)
2916 ; RV64I-NEXT: sb a7, 90(sp)
2917 ; RV64I-NEXT: sb a0, 89(sp)
2918 ; RV64I-NEXT: slli a0, t2, 56
2919 ; RV64I-NEXT: srli a0, a0, 59
2920 ; RV64I-NEXT: addi a1, sp, 56
2921 ; RV64I-NEXT: add a1, a1, a0
2922 ; RV64I-NEXT: lbu a0, 9(a1)
2923 ; RV64I-NEXT: lbu a3, 8(a1)
2924 ; RV64I-NEXT: lbu a4, 10(a1)
2925 ; RV64I-NEXT: lbu a5, 11(a1)
2926 ; RV64I-NEXT: slli a0, a0, 8
2927 ; RV64I-NEXT: or a0, a0, a3
2928 ; RV64I-NEXT: slli a4, a4, 16
2929 ; RV64I-NEXT: slli a5, a5, 24
2930 ; RV64I-NEXT: or a4, a5, a4
2931 ; RV64I-NEXT: or a0, a4, a0
2932 ; RV64I-NEXT: lbu a3, 13(a1)
2933 ; RV64I-NEXT: lbu a4, 12(a1)
2934 ; RV64I-NEXT: lbu a5, 14(a1)
2935 ; RV64I-NEXT: lbu a6, 15(a1)
2936 ; RV64I-NEXT: slli a3, a3, 8
2937 ; RV64I-NEXT: or a3, a3, a4
2938 ; RV64I-NEXT: slli a5, a5, 16
2939 ; RV64I-NEXT: slli a6, a6, 24
2940 ; RV64I-NEXT: or a4, a6, a5
2941 ; RV64I-NEXT: or a3, a4, a3
2942 ; RV64I-NEXT: slli a3, a3, 32
2943 ; RV64I-NEXT: or a4, a3, a0
2944 ; RV64I-NEXT: andi a3, t2, 7
2945 ; RV64I-NEXT: lbu a0, 17(a1)
2946 ; RV64I-NEXT: lbu a5, 16(a1)
2947 ; RV64I-NEXT: lbu a6, 18(a1)
2948 ; RV64I-NEXT: lbu a7, 19(a1)
2949 ; RV64I-NEXT: slli a0, a0, 8
2950 ; RV64I-NEXT: or a0, a0, a5
2951 ; RV64I-NEXT: slli a6, a6, 16
2952 ; RV64I-NEXT: slli a7, a7, 24
2953 ; RV64I-NEXT: or a5, a7, a6
2954 ; RV64I-NEXT: or a0, a5, a0
2955 ; RV64I-NEXT: lbu a5, 21(a1)
2956 ; RV64I-NEXT: lbu a6, 20(a1)
2957 ; RV64I-NEXT: lbu a7, 22(a1)
2958 ; RV64I-NEXT: lbu t0, 23(a1)
2959 ; RV64I-NEXT: slli a5, a5, 8
2960 ; RV64I-NEXT: or a5, a5, a6
2961 ; RV64I-NEXT: slli a7, a7, 16
2962 ; RV64I-NEXT: slli t0, t0, 24
2963 ; RV64I-NEXT: or a6, t0, a7
2964 ; RV64I-NEXT: or a5, a6, a5
2965 ; RV64I-NEXT: slli a5, a5, 32
2966 ; RV64I-NEXT: or a5, a5, a0
2967 ; RV64I-NEXT: slli a0, a5, 1
2968 ; RV64I-NEXT: not a6, a3
2969 ; RV64I-NEXT: sll a0, a0, a6
2970 ; RV64I-NEXT: lbu a6, 1(a1)
2971 ; RV64I-NEXT: lbu a7, 0(a1)
2972 ; RV64I-NEXT: lbu t0, 2(a1)
2973 ; RV64I-NEXT: lbu t1, 3(a1)
2974 ; RV64I-NEXT: slli a6, a6, 8
2975 ; RV64I-NEXT: or a6, a6, a7
2976 ; RV64I-NEXT: slli t0, t0, 16
2977 ; RV64I-NEXT: slli t1, t1, 24
2978 ; RV64I-NEXT: or a7, t1, t0
2979 ; RV64I-NEXT: or a6, a7, a6
2980 ; RV64I-NEXT: lbu a7, 5(a1)
2981 ; RV64I-NEXT: lbu t0, 4(a1)
2982 ; RV64I-NEXT: lbu t1, 6(a1)
2983 ; RV64I-NEXT: lbu t2, 7(a1)
2984 ; RV64I-NEXT: slli a7, a7, 8
2985 ; RV64I-NEXT: or a7, a7, t0
2986 ; RV64I-NEXT: slli t1, t1, 16
2987 ; RV64I-NEXT: slli t2, t2, 24
2988 ; RV64I-NEXT: or t0, t2, t1
2989 ; RV64I-NEXT: or a7, t0, a7
2990 ; RV64I-NEXT: slli a7, a7, 32
2991 ; RV64I-NEXT: or a6, a7, a6
2992 ; RV64I-NEXT: lbu a7, 25(a1)
2993 ; RV64I-NEXT: lbu t0, 24(a1)
2994 ; RV64I-NEXT: lbu t1, 26(a1)
2995 ; RV64I-NEXT: lbu t2, 27(a1)
2996 ; RV64I-NEXT: slli a7, a7, 8
2997 ; RV64I-NEXT: or a7, a7, t0
2998 ; RV64I-NEXT: slli t1, t1, 16
2999 ; RV64I-NEXT: slli t2, t2, 24
3000 ; RV64I-NEXT: or t0, t2, t1
3001 ; RV64I-NEXT: or a7, t0, a7
3002 ; RV64I-NEXT: lbu t0, 29(a1)
3003 ; RV64I-NEXT: lbu t1, 28(a1)
3004 ; RV64I-NEXT: lbu t2, 30(a1)
3005 ; RV64I-NEXT: lbu a1, 31(a1)
3006 ; RV64I-NEXT: slli t0, t0, 8
3007 ; RV64I-NEXT: or t0, t0, t1
3008 ; RV64I-NEXT: slli t2, t2, 16
3009 ; RV64I-NEXT: slli a1, a1, 24
3010 ; RV64I-NEXT: or a1, a1, t2
3011 ; RV64I-NEXT: slli t1, a4, 1
3012 ; RV64I-NEXT: or a1, a1, t0
3013 ; RV64I-NEXT: xori t0, a3, 63
3014 ; RV64I-NEXT: sll t1, t1, t0
3015 ; RV64I-NEXT: slli a1, a1, 32
3016 ; RV64I-NEXT: or a7, a1, a7
3017 ; RV64I-NEXT: slli a1, a7, 1
3018 ; RV64I-NEXT: sll t0, a1, t0
3019 ; RV64I-NEXT: srl a1, a4, a3
3020 ; RV64I-NEXT: srl a4, a6, a3
3021 ; RV64I-NEXT: srl a5, a5, a3
3022 ; RV64I-NEXT: sra a3, a7, a3
3023 ; RV64I-NEXT: srli a6, a5, 48
3024 ; RV64I-NEXT: sb a6, 22(a2)
3025 ; RV64I-NEXT: srli a6, a5, 40
3026 ; RV64I-NEXT: sb a6, 21(a2)
3027 ; RV64I-NEXT: srli a6, a5, 32
3028 ; RV64I-NEXT: sb a6, 20(a2)
3029 ; RV64I-NEXT: srli a6, a5, 24
3030 ; RV64I-NEXT: sb a6, 19(a2)
3031 ; RV64I-NEXT: srli a6, a5, 16
3032 ; RV64I-NEXT: sb a6, 18(a2)
3033 ; RV64I-NEXT: or a6, a5, t0
3034 ; RV64I-NEXT: sb a5, 16(a2)
3035 ; RV64I-NEXT: srli a5, a5, 8
3036 ; RV64I-NEXT: sb a5, 17(a2)
3037 ; RV64I-NEXT: srli a5, a3, 56
3038 ; RV64I-NEXT: sb a5, 31(a2)
3039 ; RV64I-NEXT: srli a5, a3, 48
3040 ; RV64I-NEXT: sb a5, 30(a2)
3041 ; RV64I-NEXT: srli a5, a3, 40
3042 ; RV64I-NEXT: sb a5, 29(a2)
3043 ; RV64I-NEXT: srli a5, a3, 32
3044 ; RV64I-NEXT: sb a5, 28(a2)
3045 ; RV64I-NEXT: srli a5, a3, 24
3046 ; RV64I-NEXT: sb a5, 27(a2)
3047 ; RV64I-NEXT: srli a5, a3, 16
3048 ; RV64I-NEXT: sb a5, 26(a2)
3049 ; RV64I-NEXT: sb a3, 24(a2)
3050 ; RV64I-NEXT: srli a3, a3, 8
3051 ; RV64I-NEXT: sb a3, 25(a2)
3052 ; RV64I-NEXT: srli a3, a4, 48
3053 ; RV64I-NEXT: sb a3, 6(a2)
3054 ; RV64I-NEXT: srli a3, a4, 40
3055 ; RV64I-NEXT: sb a3, 5(a2)
3056 ; RV64I-NEXT: srli a3, a4, 32
3057 ; RV64I-NEXT: sb a3, 4(a2)
3058 ; RV64I-NEXT: srli a3, a4, 24
3059 ; RV64I-NEXT: sb a3, 3(a2)
3060 ; RV64I-NEXT: srli a3, a4, 16
3061 ; RV64I-NEXT: sb a3, 2(a2)
3062 ; RV64I-NEXT: or a3, a4, t1
3063 ; RV64I-NEXT: sb a4, 0(a2)
3064 ; RV64I-NEXT: srli a4, a4, 8
3065 ; RV64I-NEXT: sb a4, 1(a2)
3066 ; RV64I-NEXT: srli a4, a1, 48
3067 ; RV64I-NEXT: sb a4, 14(a2)
3068 ; RV64I-NEXT: srli a4, a1, 40
3069 ; RV64I-NEXT: sb a4, 13(a2)
3070 ; RV64I-NEXT: srli a4, a1, 32
3071 ; RV64I-NEXT: sb a4, 12(a2)
3072 ; RV64I-NEXT: srli a4, a1, 24
3073 ; RV64I-NEXT: sb a4, 11(a2)
3074 ; RV64I-NEXT: srli a4, a1, 16
3075 ; RV64I-NEXT: sb a4, 10(a2)
3076 ; RV64I-NEXT: or a0, a1, a0
3077 ; RV64I-NEXT: sb a1, 8(a2)
3078 ; RV64I-NEXT: srli a1, a1, 8
3079 ; RV64I-NEXT: sb a1, 9(a2)
3080 ; RV64I-NEXT: srli a1, a6, 56
3081 ; RV64I-NEXT: sb a1, 23(a2)
3082 ; RV64I-NEXT: srli a3, a3, 56
3083 ; RV64I-NEXT: sb a3, 7(a2)
3084 ; RV64I-NEXT: srli a0, a0, 56
3085 ; RV64I-NEXT: sb a0, 15(a2)
3086 ; RV64I-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
3087 ; RV64I-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
3088 ; RV64I-NEXT: ld s1, 200(sp) # 8-byte Folded Reload
3089 ; RV64I-NEXT: ld s2, 192(sp) # 8-byte Folded Reload
3090 ; RV64I-NEXT: ld s3, 184(sp) # 8-byte Folded Reload
3091 ; RV64I-NEXT: ld s4, 176(sp) # 8-byte Folded Reload
3092 ; RV64I-NEXT: ld s5, 168(sp) # 8-byte Folded Reload
3093 ; RV64I-NEXT: ld s6, 160(sp) # 8-byte Folded Reload
3094 ; RV64I-NEXT: ld s7, 152(sp) # 8-byte Folded Reload
3095 ; RV64I-NEXT: ld s8, 144(sp) # 8-byte Folded Reload
3096 ; RV64I-NEXT: ld s9, 136(sp) # 8-byte Folded Reload
3097 ; RV64I-NEXT: ld s10, 128(sp) # 8-byte Folded Reload
3098 ; RV64I-NEXT: ld s11, 120(sp) # 8-byte Folded Reload
3099 ; RV64I-NEXT: addi sp, sp, 224
3102 ; RV32I-LABEL: ashr_32bytes:
3104 ; RV32I-NEXT: addi sp, sp, -144
3105 ; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
3106 ; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
3107 ; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
3108 ; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
3109 ; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
3110 ; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
3111 ; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
3112 ; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
3113 ; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
3114 ; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
3115 ; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
3116 ; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
3117 ; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
3118 ; RV32I-NEXT: lbu t3, 31(a0)
3119 ; RV32I-NEXT: lbu a3, 0(a0)
3120 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill
3121 ; RV32I-NEXT: lbu a3, 1(a0)
3122 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill
3123 ; RV32I-NEXT: lbu a3, 2(a0)
3124 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill
3125 ; RV32I-NEXT: lbu a3, 3(a0)
3126 ; RV32I-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
3127 ; RV32I-NEXT: lbu a3, 4(a0)
3128 ; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
3129 ; RV32I-NEXT: lbu a3, 5(a0)
3130 ; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
3131 ; RV32I-NEXT: lbu t2, 6(a0)
3132 ; RV32I-NEXT: lbu t4, 7(a0)
3133 ; RV32I-NEXT: lbu t5, 8(a0)
3134 ; RV32I-NEXT: lbu t6, 9(a0)
3135 ; RV32I-NEXT: lbu s0, 10(a0)
3136 ; RV32I-NEXT: lbu s1, 11(a0)
3137 ; RV32I-NEXT: lbu s2, 12(a0)
3138 ; RV32I-NEXT: lbu s3, 13(a0)
3139 ; RV32I-NEXT: lbu s4, 14(a0)
3140 ; RV32I-NEXT: lbu s5, 15(a0)
3141 ; RV32I-NEXT: lbu s6, 16(a0)
3142 ; RV32I-NEXT: lbu s7, 17(a0)
3143 ; RV32I-NEXT: lbu s8, 18(a0)
3144 ; RV32I-NEXT: lbu a3, 1(a1)
3145 ; RV32I-NEXT: lbu s9, 19(a0)
3146 ; RV32I-NEXT: lbu s10, 20(a0)
3147 ; RV32I-NEXT: lbu s11, 0(a1)
3148 ; RV32I-NEXT: slli a3, a3, 8
3149 ; RV32I-NEXT: lbu ra, 2(a1)
3150 ; RV32I-NEXT: lbu a1, 3(a1)
3151 ; RV32I-NEXT: or a3, a3, s11
3152 ; RV32I-NEXT: lbu s11, 21(a0)
3153 ; RV32I-NEXT: slli ra, ra, 16
3154 ; RV32I-NEXT: slli a1, a1, 24
3155 ; RV32I-NEXT: or a1, a1, ra
3156 ; RV32I-NEXT: lbu ra, 22(a0)
3157 ; RV32I-NEXT: or t1, a1, a3
3158 ; RV32I-NEXT: lbu t0, 23(a0)
3159 ; RV32I-NEXT: lbu a7, 24(a0)
3160 ; RV32I-NEXT: lbu a6, 25(a0)
3161 ; RV32I-NEXT: lbu a5, 26(a0)
3162 ; RV32I-NEXT: lbu a1, 30(a0)
3163 ; RV32I-NEXT: lbu a3, 29(a0)
3164 ; RV32I-NEXT: lbu a4, 28(a0)
3165 ; RV32I-NEXT: lbu a0, 27(a0)
3166 ; RV32I-NEXT: sb a1, 58(sp)
3167 ; RV32I-NEXT: sb a3, 57(sp)
3168 ; RV32I-NEXT: sb a4, 56(sp)
3169 ; RV32I-NEXT: sb a0, 55(sp)
3170 ; RV32I-NEXT: sb a5, 54(sp)
3171 ; RV32I-NEXT: sb a6, 53(sp)
3172 ; RV32I-NEXT: sb a7, 52(sp)
3173 ; RV32I-NEXT: sb t0, 51(sp)
3174 ; RV32I-NEXT: sb ra, 50(sp)
3175 ; RV32I-NEXT: sb s11, 49(sp)
3176 ; RV32I-NEXT: sb s10, 48(sp)
3177 ; RV32I-NEXT: sb s9, 47(sp)
3178 ; RV32I-NEXT: sb s8, 46(sp)
3179 ; RV32I-NEXT: sb s7, 45(sp)
3180 ; RV32I-NEXT: sb s6, 44(sp)
3181 ; RV32I-NEXT: sb s5, 43(sp)
3182 ; RV32I-NEXT: sb t3, 59(sp)
3183 ; RV32I-NEXT: slli t3, t3, 24
3184 ; RV32I-NEXT: sb s4, 42(sp)
3185 ; RV32I-NEXT: sb s3, 41(sp)
3186 ; RV32I-NEXT: sb s2, 40(sp)
3187 ; RV32I-NEXT: sb s1, 39(sp)
3188 ; RV32I-NEXT: sb s0, 38(sp)
3189 ; RV32I-NEXT: sb t6, 37(sp)
3190 ; RV32I-NEXT: sb t5, 36(sp)
3191 ; RV32I-NEXT: sb t4, 35(sp)
3192 ; RV32I-NEXT: sb t2, 34(sp)
3193 ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
3194 ; RV32I-NEXT: sb a0, 33(sp)
3195 ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
3196 ; RV32I-NEXT: sb a0, 32(sp)
3197 ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
3198 ; RV32I-NEXT: sb a0, 31(sp)
3199 ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
3200 ; RV32I-NEXT: sb a0, 30(sp)
3201 ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
3202 ; RV32I-NEXT: sb a0, 29(sp)
3203 ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
3204 ; RV32I-NEXT: sb a0, 28(sp)
3205 ; RV32I-NEXT: srai a0, t3, 31
3206 ; RV32I-NEXT: sb a0, 88(sp)
3207 ; RV32I-NEXT: sb a0, 84(sp)
3208 ; RV32I-NEXT: sb a0, 80(sp)
3209 ; RV32I-NEXT: sb a0, 76(sp)
3210 ; RV32I-NEXT: sb a0, 72(sp)
3211 ; RV32I-NEXT: sb a0, 68(sp)
3212 ; RV32I-NEXT: sb a0, 64(sp)
3213 ; RV32I-NEXT: sb a0, 60(sp)
3214 ; RV32I-NEXT: srli a1, a0, 24
3215 ; RV32I-NEXT: sb a1, 91(sp)
3216 ; RV32I-NEXT: srli a3, a0, 16
3217 ; RV32I-NEXT: sb a3, 90(sp)
3218 ; RV32I-NEXT: srli a0, a0, 8
3219 ; RV32I-NEXT: sb a0, 89(sp)
3220 ; RV32I-NEXT: sb a1, 87(sp)
3221 ; RV32I-NEXT: sb a3, 86(sp)
3222 ; RV32I-NEXT: sb a0, 85(sp)
3223 ; RV32I-NEXT: sb a1, 83(sp)
3224 ; RV32I-NEXT: sb a3, 82(sp)
3225 ; RV32I-NEXT: sb a0, 81(sp)
3226 ; RV32I-NEXT: sb a1, 79(sp)
3227 ; RV32I-NEXT: sb a3, 78(sp)
3228 ; RV32I-NEXT: sb a0, 77(sp)
3229 ; RV32I-NEXT: sb a1, 75(sp)
3230 ; RV32I-NEXT: sb a3, 74(sp)
3231 ; RV32I-NEXT: sb a0, 73(sp)
3232 ; RV32I-NEXT: sb a1, 71(sp)
3233 ; RV32I-NEXT: sb a3, 70(sp)
3234 ; RV32I-NEXT: sb a0, 69(sp)
3235 ; RV32I-NEXT: sb a1, 67(sp)
3236 ; RV32I-NEXT: sb a3, 66(sp)
3237 ; RV32I-NEXT: sb a0, 65(sp)
3238 ; RV32I-NEXT: sb a1, 63(sp)
3239 ; RV32I-NEXT: sb a3, 62(sp)
3240 ; RV32I-NEXT: sb a0, 61(sp)
3241 ; RV32I-NEXT: slli a0, t1, 24
3242 ; RV32I-NEXT: srli a0, a0, 27
3243 ; RV32I-NEXT: addi a4, sp, 28
3244 ; RV32I-NEXT: add a4, a4, a0
3245 ; RV32I-NEXT: lbu a0, 5(a4)
3246 ; RV32I-NEXT: lbu a1, 4(a4)
3247 ; RV32I-NEXT: lbu a3, 6(a4)
3248 ; RV32I-NEXT: lbu a5, 7(a4)
3249 ; RV32I-NEXT: slli a0, a0, 8
3250 ; RV32I-NEXT: or a0, a0, a1
3251 ; RV32I-NEXT: slli a3, a3, 16
3252 ; RV32I-NEXT: slli a5, a5, 24
3253 ; RV32I-NEXT: or a3, a5, a3
3254 ; RV32I-NEXT: or t5, a3, a0
3255 ; RV32I-NEXT: andi a3, t1, 7
3256 ; RV32I-NEXT: lbu a0, 9(a4)
3257 ; RV32I-NEXT: lbu a1, 8(a4)
3258 ; RV32I-NEXT: lbu a5, 10(a4)
3259 ; RV32I-NEXT: lbu a6, 11(a4)
3260 ; RV32I-NEXT: slli a0, a0, 8
3261 ; RV32I-NEXT: or a0, a0, a1
3262 ; RV32I-NEXT: slli a5, a5, 16
3263 ; RV32I-NEXT: slli a6, a6, 24
3264 ; RV32I-NEXT: or a1, a6, a5
3265 ; RV32I-NEXT: or a6, a1, a0
3266 ; RV32I-NEXT: slli a0, a6, 1
3267 ; RV32I-NEXT: not t1, a3
3268 ; RV32I-NEXT: sll a0, a0, t1
3269 ; RV32I-NEXT: lbu a1, 1(a4)
3270 ; RV32I-NEXT: lbu a5, 0(a4)
3271 ; RV32I-NEXT: lbu a7, 2(a4)
3272 ; RV32I-NEXT: lbu t0, 3(a4)
3273 ; RV32I-NEXT: slli a1, a1, 8
3274 ; RV32I-NEXT: or a1, a1, a5
3275 ; RV32I-NEXT: slli a7, a7, 16
3276 ; RV32I-NEXT: slli t0, t0, 24
3277 ; RV32I-NEXT: or a5, t0, a7
3278 ; RV32I-NEXT: or t0, a5, a1
3279 ; RV32I-NEXT: slli a1, t5, 1
3280 ; RV32I-NEXT: xori t2, a3, 31
3281 ; RV32I-NEXT: sll a1, a1, t2
3282 ; RV32I-NEXT: lbu a5, 13(a4)
3283 ; RV32I-NEXT: lbu a7, 12(a4)
3284 ; RV32I-NEXT: lbu t3, 14(a4)
3285 ; RV32I-NEXT: lbu t4, 15(a4)
3286 ; RV32I-NEXT: slli a5, a5, 8
3287 ; RV32I-NEXT: or a5, a5, a7
3288 ; RV32I-NEXT: slli t3, t3, 16
3289 ; RV32I-NEXT: slli t4, t4, 24
3290 ; RV32I-NEXT: or a7, t4, t3
3291 ; RV32I-NEXT: or t3, a7, a5
3292 ; RV32I-NEXT: lbu a5, 17(a4)
3293 ; RV32I-NEXT: lbu a7, 16(a4)
3294 ; RV32I-NEXT: lbu t4, 18(a4)
3295 ; RV32I-NEXT: lbu t6, 19(a4)
3296 ; RV32I-NEXT: slli a5, a5, 8
3297 ; RV32I-NEXT: or a5, a5, a7
3298 ; RV32I-NEXT: slli t4, t4, 16
3299 ; RV32I-NEXT: slli t6, t6, 24
3300 ; RV32I-NEXT: or a7, t6, t4
3301 ; RV32I-NEXT: or t4, a7, a5
3302 ; RV32I-NEXT: slli a5, t4, 1
3303 ; RV32I-NEXT: sll a7, a5, t1
3304 ; RV32I-NEXT: lbu a5, 21(a4)
3305 ; RV32I-NEXT: lbu t6, 20(a4)
3306 ; RV32I-NEXT: lbu s0, 22(a4)
3307 ; RV32I-NEXT: lbu s1, 23(a4)
3308 ; RV32I-NEXT: slli a5, a5, 8
3309 ; RV32I-NEXT: or a5, a5, t6
3310 ; RV32I-NEXT: slli s0, s0, 16
3311 ; RV32I-NEXT: slli s1, s1, 24
3312 ; RV32I-NEXT: or s0, s1, s0
3313 ; RV32I-NEXT: or s0, s0, a5
3314 ; RV32I-NEXT: lbu a5, 25(a4)
3315 ; RV32I-NEXT: lbu t6, 24(a4)
3316 ; RV32I-NEXT: lbu s1, 26(a4)
3317 ; RV32I-NEXT: lbu s2, 27(a4)
3318 ; RV32I-NEXT: slli a5, a5, 8
3319 ; RV32I-NEXT: or a5, a5, t6
3320 ; RV32I-NEXT: slli s1, s1, 16
3321 ; RV32I-NEXT: slli s2, s2, 24
3322 ; RV32I-NEXT: or t6, s2, s1
3323 ; RV32I-NEXT: or t6, t6, a5
3324 ; RV32I-NEXT: lbu a5, 29(a4)
3325 ; RV32I-NEXT: lbu s1, 28(a4)
3326 ; RV32I-NEXT: slli s2, t6, 1
3327 ; RV32I-NEXT: sll t1, s2, t1
3328 ; RV32I-NEXT: slli a5, a5, 8
3329 ; RV32I-NEXT: or a5, a5, s1
3330 ; RV32I-NEXT: lbu s1, 30(a4)
3331 ; RV32I-NEXT: lbu a4, 31(a4)
3332 ; RV32I-NEXT: slli s2, t3, 1
3333 ; RV32I-NEXT: sll s2, s2, t2
3334 ; RV32I-NEXT: slli s1, s1, 16
3335 ; RV32I-NEXT: slli a4, a4, 24
3336 ; RV32I-NEXT: or a4, a4, s1
3337 ; RV32I-NEXT: slli s1, s0, 1
3338 ; RV32I-NEXT: sll s1, s1, t2
3339 ; RV32I-NEXT: or s3, a4, a5
3340 ; RV32I-NEXT: slli a4, s3, 1
3341 ; RV32I-NEXT: sll t2, a4, t2
3342 ; RV32I-NEXT: srl a4, t5, a3
3343 ; RV32I-NEXT: srl a5, t0, a3
3344 ; RV32I-NEXT: srl t0, t3, a3
3345 ; RV32I-NEXT: srl a6, a6, a3
3346 ; RV32I-NEXT: srl t3, s0, a3
3347 ; RV32I-NEXT: srl t4, t4, a3
3348 ; RV32I-NEXT: srl t5, t6, a3
3349 ; RV32I-NEXT: sra a3, s3, a3
3350 ; RV32I-NEXT: srli t6, t5, 16
3351 ; RV32I-NEXT: sb t6, 26(a2)
3352 ; RV32I-NEXT: or t2, t5, t2
3353 ; RV32I-NEXT: sb t5, 24(a2)
3354 ; RV32I-NEXT: srli t5, t5, 8
3355 ; RV32I-NEXT: sb t5, 25(a2)
3356 ; RV32I-NEXT: srli t5, a3, 24
3357 ; RV32I-NEXT: sb t5, 31(a2)
3358 ; RV32I-NEXT: srli t5, a3, 16
3359 ; RV32I-NEXT: sb t5, 30(a2)
3360 ; RV32I-NEXT: sb a3, 28(a2)
3361 ; RV32I-NEXT: srli a3, a3, 8
3362 ; RV32I-NEXT: sb a3, 29(a2)
3363 ; RV32I-NEXT: srli a3, t4, 16
3364 ; RV32I-NEXT: sb a3, 18(a2)
3365 ; RV32I-NEXT: or a3, t4, s1
3366 ; RV32I-NEXT: sb t4, 16(a2)
3367 ; RV32I-NEXT: srli t4, t4, 8
3368 ; RV32I-NEXT: sb t4, 17(a2)
3369 ; RV32I-NEXT: srli t4, t3, 16
3370 ; RV32I-NEXT: sb t4, 22(a2)
3371 ; RV32I-NEXT: or t1, t3, t1
3372 ; RV32I-NEXT: sb t3, 20(a2)
3373 ; RV32I-NEXT: srli t3, t3, 8
3374 ; RV32I-NEXT: sb t3, 21(a2)
3375 ; RV32I-NEXT: srli t3, a6, 16
3376 ; RV32I-NEXT: sb t3, 10(a2)
3377 ; RV32I-NEXT: or t3, a6, s2
3378 ; RV32I-NEXT: sb a6, 8(a2)
3379 ; RV32I-NEXT: srli a6, a6, 8
3380 ; RV32I-NEXT: sb a6, 9(a2)
3381 ; RV32I-NEXT: srli a6, t0, 16
3382 ; RV32I-NEXT: sb a6, 14(a2)
3383 ; RV32I-NEXT: or a6, t0, a7
3384 ; RV32I-NEXT: sb t0, 12(a2)
3385 ; RV32I-NEXT: srli a7, t0, 8
3386 ; RV32I-NEXT: sb a7, 13(a2)
3387 ; RV32I-NEXT: srli a7, a5, 16
3388 ; RV32I-NEXT: sb a7, 2(a2)
3389 ; RV32I-NEXT: or a1, a5, a1
3390 ; RV32I-NEXT: sb a5, 0(a2)
3391 ; RV32I-NEXT: srli a5, a5, 8
3392 ; RV32I-NEXT: sb a5, 1(a2)
3393 ; RV32I-NEXT: srli a5, a4, 16
3394 ; RV32I-NEXT: sb a5, 6(a2)
3395 ; RV32I-NEXT: or a0, a4, a0
3396 ; RV32I-NEXT: sb a4, 4(a2)
3397 ; RV32I-NEXT: srli a4, a4, 8
3398 ; RV32I-NEXT: sb a4, 5(a2)
3399 ; RV32I-NEXT: srli a4, t2, 24
3400 ; RV32I-NEXT: sb a4, 27(a2)
3401 ; RV32I-NEXT: srli a3, a3, 24
3402 ; RV32I-NEXT: sb a3, 19(a2)
3403 ; RV32I-NEXT: srli a3, t1, 24
3404 ; RV32I-NEXT: sb a3, 23(a2)
3405 ; RV32I-NEXT: srli a3, t3, 24
3406 ; RV32I-NEXT: sb a3, 11(a2)
3407 ; RV32I-NEXT: srli a3, a6, 24
3408 ; RV32I-NEXT: sb a3, 15(a2)
3409 ; RV32I-NEXT: srli a1, a1, 24
3410 ; RV32I-NEXT: sb a1, 3(a2)
3411 ; RV32I-NEXT: srli a0, a0, 24
3412 ; RV32I-NEXT: sb a0, 7(a2)
3413 ; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
3414 ; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
3415 ; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
3416 ; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
3417 ; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
3418 ; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
3419 ; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
3420 ; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
3421 ; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
3422 ; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
3423 ; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
3424 ; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
3425 ; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
3426 ; RV32I-NEXT: addi sp, sp, 144
3428 %src = load i256, ptr %src.ptr, align 1
3429 %bitOff = load i256, ptr %bitOff.ptr, align 1
3430 %res = ashr i256 %src, %bitOff
3431 store i256 %res, ptr %dst, align 1