1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32
3 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M
5 ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=RV32MV
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=RV64MV
9 define i1 @test_srem_odd(i29 %X) nounwind {
10 ; RV32-LABEL: test_srem_odd:
12 ; RV32-NEXT: addi sp, sp, -16
13 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
14 ; RV32-NEXT: lui a1, 128424
15 ; RV32-NEXT: addi a1, a1, 331
16 ; RV32-NEXT: call __mulsi3
17 ; RV32-NEXT: lui a1, 662
18 ; RV32-NEXT: addi a1, a1, -83
19 ; RV32-NEXT: add a0, a0, a1
20 ; RV32-NEXT: slli a0, a0, 3
21 ; RV32-NEXT: srli a0, a0, 3
22 ; RV32-NEXT: lui a1, 1324
23 ; RV32-NEXT: addi a1, a1, -165
24 ; RV32-NEXT: sltu a0, a0, a1
25 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
26 ; RV32-NEXT: addi sp, sp, 16
29 ; RV64-LABEL: test_srem_odd:
31 ; RV64-NEXT: addi sp, sp, -16
32 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
33 ; RV64-NEXT: lui a1, 128424
34 ; RV64-NEXT: addiw a1, a1, 331
35 ; RV64-NEXT: call __muldi3
36 ; RV64-NEXT: lui a1, 662
37 ; RV64-NEXT: addi a1, a1, -83
38 ; RV64-NEXT: add a0, a0, a1
39 ; RV64-NEXT: slli a0, a0, 35
40 ; RV64-NEXT: srli a0, a0, 35
41 ; RV64-NEXT: lui a1, 1324
42 ; RV64-NEXT: addiw a1, a1, -165
43 ; RV64-NEXT: sltu a0, a0, a1
44 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
45 ; RV64-NEXT: addi sp, sp, 16
48 ; RV32M-LABEL: test_srem_odd:
50 ; RV32M-NEXT: lui a1, 128424
51 ; RV32M-NEXT: addi a1, a1, 331
52 ; RV32M-NEXT: mul a0, a0, a1
53 ; RV32M-NEXT: lui a1, 662
54 ; RV32M-NEXT: addi a1, a1, -83
55 ; RV32M-NEXT: add a0, a0, a1
56 ; RV32M-NEXT: slli a0, a0, 3
57 ; RV32M-NEXT: srli a0, a0, 3
58 ; RV32M-NEXT: lui a1, 1324
59 ; RV32M-NEXT: addi a1, a1, -165
60 ; RV32M-NEXT: sltu a0, a0, a1
63 ; RV64M-LABEL: test_srem_odd:
65 ; RV64M-NEXT: lui a1, 128424
66 ; RV64M-NEXT: addi a1, a1, 331
67 ; RV64M-NEXT: mul a0, a0, a1
68 ; RV64M-NEXT: lui a1, 662
69 ; RV64M-NEXT: addi a1, a1, -83
70 ; RV64M-NEXT: add a0, a0, a1
71 ; RV64M-NEXT: slli a0, a0, 35
72 ; RV64M-NEXT: srli a0, a0, 35
73 ; RV64M-NEXT: lui a1, 1324
74 ; RV64M-NEXT: addiw a1, a1, -165
75 ; RV64M-NEXT: sltu a0, a0, a1
78 ; RV32MV-LABEL: test_srem_odd:
80 ; RV32MV-NEXT: lui a1, 128424
81 ; RV32MV-NEXT: addi a1, a1, 331
82 ; RV32MV-NEXT: mul a0, a0, a1
83 ; RV32MV-NEXT: lui a1, 662
84 ; RV32MV-NEXT: addi a1, a1, -83
85 ; RV32MV-NEXT: add a0, a0, a1
86 ; RV32MV-NEXT: slli a0, a0, 3
87 ; RV32MV-NEXT: srli a0, a0, 3
88 ; RV32MV-NEXT: lui a1, 1324
89 ; RV32MV-NEXT: addi a1, a1, -165
90 ; RV32MV-NEXT: sltu a0, a0, a1
93 ; RV64MV-LABEL: test_srem_odd:
95 ; RV64MV-NEXT: lui a1, 128424
96 ; RV64MV-NEXT: addi a1, a1, 331
97 ; RV64MV-NEXT: mul a0, a0, a1
98 ; RV64MV-NEXT: lui a1, 662
99 ; RV64MV-NEXT: addi a1, a1, -83
100 ; RV64MV-NEXT: add a0, a0, a1
101 ; RV64MV-NEXT: slli a0, a0, 35
102 ; RV64MV-NEXT: srli a0, a0, 35
103 ; RV64MV-NEXT: lui a1, 1324
104 ; RV64MV-NEXT: addiw a1, a1, -165
105 ; RV64MV-NEXT: sltu a0, a0, a1
107 %srem = srem i29 %X, 99
108 %cmp = icmp eq i29 %srem, 0
112 define i1 @test_srem_even(i4 %X) nounwind {
113 ; RV32-LABEL: test_srem_even:
115 ; RV32-NEXT: addi sp, sp, -16
116 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
117 ; RV32-NEXT: slli a0, a0, 28
118 ; RV32-NEXT: srai a0, a0, 28
119 ; RV32-NEXT: li a1, 6
120 ; RV32-NEXT: call __modsi3
121 ; RV32-NEXT: addi a0, a0, -1
122 ; RV32-NEXT: seqz a0, a0
123 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
124 ; RV32-NEXT: addi sp, sp, 16
127 ; RV64-LABEL: test_srem_even:
129 ; RV64-NEXT: addi sp, sp, -16
130 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
131 ; RV64-NEXT: slli a0, a0, 60
132 ; RV64-NEXT: srai a0, a0, 60
133 ; RV64-NEXT: li a1, 6
134 ; RV64-NEXT: call __moddi3
135 ; RV64-NEXT: addi a0, a0, -1
136 ; RV64-NEXT: seqz a0, a0
137 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
138 ; RV64-NEXT: addi sp, sp, 16
141 ; RV32M-LABEL: test_srem_even:
143 ; RV32M-NEXT: slli a1, a0, 28
144 ; RV32M-NEXT: srai a1, a1, 28
145 ; RV32M-NEXT: slli a2, a1, 1
146 ; RV32M-NEXT: add a1, a2, a1
147 ; RV32M-NEXT: srli a2, a1, 4
148 ; RV32M-NEXT: slli a1, a1, 24
149 ; RV32M-NEXT: srli a1, a1, 31
150 ; RV32M-NEXT: add a1, a2, a1
151 ; RV32M-NEXT: li a2, 6
152 ; RV32M-NEXT: mul a1, a1, a2
153 ; RV32M-NEXT: sub a0, a0, a1
154 ; RV32M-NEXT: andi a0, a0, 15
155 ; RV32M-NEXT: addi a0, a0, -1
156 ; RV32M-NEXT: seqz a0, a0
159 ; RV64M-LABEL: test_srem_even:
161 ; RV64M-NEXT: slli a1, a0, 60
162 ; RV64M-NEXT: srai a1, a1, 60
163 ; RV64M-NEXT: slli a2, a1, 1
164 ; RV64M-NEXT: add a1, a2, a1
165 ; RV64M-NEXT: srli a2, a1, 4
166 ; RV64M-NEXT: slli a1, a1, 56
167 ; RV64M-NEXT: srli a1, a1, 63
168 ; RV64M-NEXT: add a1, a2, a1
169 ; RV64M-NEXT: li a2, 6
170 ; RV64M-NEXT: mul a1, a1, a2
171 ; RV64M-NEXT: subw a0, a0, a1
172 ; RV64M-NEXT: andi a0, a0, 15
173 ; RV64M-NEXT: addi a0, a0, -1
174 ; RV64M-NEXT: seqz a0, a0
177 ; RV32MV-LABEL: test_srem_even:
179 ; RV32MV-NEXT: slli a1, a0, 28
180 ; RV32MV-NEXT: srai a1, a1, 28
181 ; RV32MV-NEXT: slli a2, a1, 1
182 ; RV32MV-NEXT: add a1, a2, a1
183 ; RV32MV-NEXT: srli a2, a1, 4
184 ; RV32MV-NEXT: slli a1, a1, 24
185 ; RV32MV-NEXT: srli a1, a1, 31
186 ; RV32MV-NEXT: add a1, a2, a1
187 ; RV32MV-NEXT: li a2, 6
188 ; RV32MV-NEXT: mul a1, a1, a2
189 ; RV32MV-NEXT: sub a0, a0, a1
190 ; RV32MV-NEXT: andi a0, a0, 15
191 ; RV32MV-NEXT: addi a0, a0, -1
192 ; RV32MV-NEXT: seqz a0, a0
195 ; RV64MV-LABEL: test_srem_even:
197 ; RV64MV-NEXT: slli a1, a0, 60
198 ; RV64MV-NEXT: srai a1, a1, 60
199 ; RV64MV-NEXT: slli a2, a1, 1
200 ; RV64MV-NEXT: add a1, a2, a1
201 ; RV64MV-NEXT: srli a2, a1, 4
202 ; RV64MV-NEXT: slli a1, a1, 56
203 ; RV64MV-NEXT: srli a1, a1, 63
204 ; RV64MV-NEXT: add a1, a2, a1
205 ; RV64MV-NEXT: li a2, 6
206 ; RV64MV-NEXT: mul a1, a1, a2
207 ; RV64MV-NEXT: subw a0, a0, a1
208 ; RV64MV-NEXT: andi a0, a0, 15
209 ; RV64MV-NEXT: addi a0, a0, -1
210 ; RV64MV-NEXT: seqz a0, a0
212 %srem = srem i4 %X, 6
213 %cmp = icmp eq i4 %srem, 1
217 define i1 @test_srem_pow2_setne(i6 %X) nounwind {
218 ; RV32-LABEL: test_srem_pow2_setne:
220 ; RV32-NEXT: slli a1, a0, 26
221 ; RV32-NEXT: srai a1, a1, 26
222 ; RV32-NEXT: slli a1, a1, 21
223 ; RV32-NEXT: srli a1, a1, 30
224 ; RV32-NEXT: add a1, a0, a1
225 ; RV32-NEXT: andi a1, a1, 60
226 ; RV32-NEXT: sub a0, a0, a1
227 ; RV32-NEXT: andi a0, a0, 63
228 ; RV32-NEXT: snez a0, a0
231 ; RV64-LABEL: test_srem_pow2_setne:
233 ; RV64-NEXT: slli a1, a0, 58
234 ; RV64-NEXT: srai a1, a1, 58
235 ; RV64-NEXT: slli a1, a1, 53
236 ; RV64-NEXT: srli a1, a1, 62
237 ; RV64-NEXT: add a1, a0, a1
238 ; RV64-NEXT: andi a1, a1, 60
239 ; RV64-NEXT: subw a0, a0, a1
240 ; RV64-NEXT: andi a0, a0, 63
241 ; RV64-NEXT: snez a0, a0
244 ; RV32M-LABEL: test_srem_pow2_setne:
246 ; RV32M-NEXT: slli a1, a0, 26
247 ; RV32M-NEXT: srai a1, a1, 26
248 ; RV32M-NEXT: slli a1, a1, 21
249 ; RV32M-NEXT: srli a1, a1, 30
250 ; RV32M-NEXT: add a1, a0, a1
251 ; RV32M-NEXT: andi a1, a1, 60
252 ; RV32M-NEXT: sub a0, a0, a1
253 ; RV32M-NEXT: andi a0, a0, 63
254 ; RV32M-NEXT: snez a0, a0
257 ; RV64M-LABEL: test_srem_pow2_setne:
259 ; RV64M-NEXT: slli a1, a0, 58
260 ; RV64M-NEXT: srai a1, a1, 58
261 ; RV64M-NEXT: slli a1, a1, 53
262 ; RV64M-NEXT: srli a1, a1, 62
263 ; RV64M-NEXT: add a1, a0, a1
264 ; RV64M-NEXT: andi a1, a1, 60
265 ; RV64M-NEXT: subw a0, a0, a1
266 ; RV64M-NEXT: andi a0, a0, 63
267 ; RV64M-NEXT: snez a0, a0
270 ; RV32MV-LABEL: test_srem_pow2_setne:
272 ; RV32MV-NEXT: slli a1, a0, 26
273 ; RV32MV-NEXT: srai a1, a1, 26
274 ; RV32MV-NEXT: slli a1, a1, 21
275 ; RV32MV-NEXT: srli a1, a1, 30
276 ; RV32MV-NEXT: add a1, a0, a1
277 ; RV32MV-NEXT: andi a1, a1, 60
278 ; RV32MV-NEXT: sub a0, a0, a1
279 ; RV32MV-NEXT: andi a0, a0, 63
280 ; RV32MV-NEXT: snez a0, a0
283 ; RV64MV-LABEL: test_srem_pow2_setne:
285 ; RV64MV-NEXT: slli a1, a0, 58
286 ; RV64MV-NEXT: srai a1, a1, 58
287 ; RV64MV-NEXT: slli a1, a1, 53
288 ; RV64MV-NEXT: srli a1, a1, 62
289 ; RV64MV-NEXT: add a1, a0, a1
290 ; RV64MV-NEXT: andi a1, a1, 60
291 ; RV64MV-NEXT: subw a0, a0, a1
292 ; RV64MV-NEXT: andi a0, a0, 63
293 ; RV64MV-NEXT: snez a0, a0
295 %srem = srem i6 %X, 4
296 %cmp = icmp ne i6 %srem, 0
300 define void @test_srem_vec(ptr %X) nounwind {
301 ; RV32-LABEL: test_srem_vec:
303 ; RV32-NEXT: addi sp, sp, -32
304 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
305 ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
306 ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
307 ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
308 ; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
309 ; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
310 ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
311 ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
312 ; RV32-NEXT: mv s0, a0
313 ; RV32-NEXT: lbu a0, 12(a0)
314 ; RV32-NEXT: lw a1, 8(s0)
315 ; RV32-NEXT: slli a2, a0, 30
316 ; RV32-NEXT: lw a3, 4(s0)
317 ; RV32-NEXT: srli s1, a1, 2
318 ; RV32-NEXT: or s1, s1, a2
319 ; RV32-NEXT: slli a2, a1, 31
320 ; RV32-NEXT: srli a4, a3, 1
321 ; RV32-NEXT: or s2, a4, a2
322 ; RV32-NEXT: srli a0, a0, 2
323 ; RV32-NEXT: slli a0, a0, 31
324 ; RV32-NEXT: srai s3, a0, 31
325 ; RV32-NEXT: srli a1, a1, 1
326 ; RV32-NEXT: slli a1, a1, 31
327 ; RV32-NEXT: lw a0, 0(s0)
328 ; RV32-NEXT: srai s4, a1, 31
329 ; RV32-NEXT: slli a1, a3, 31
330 ; RV32-NEXT: srai a1, a1, 31
331 ; RV32-NEXT: li a2, 6
332 ; RV32-NEXT: li a3, 0
333 ; RV32-NEXT: call __moddi3
334 ; RV32-NEXT: mv s5, a0
335 ; RV32-NEXT: mv s6, a1
336 ; RV32-NEXT: li a2, 7
337 ; RV32-NEXT: mv a0, s2
338 ; RV32-NEXT: mv a1, s4
339 ; RV32-NEXT: li a3, 0
340 ; RV32-NEXT: call __moddi3
341 ; RV32-NEXT: mv s2, a0
342 ; RV32-NEXT: mv s4, a1
343 ; RV32-NEXT: li a2, -5
344 ; RV32-NEXT: li a3, -1
345 ; RV32-NEXT: mv a0, s1
346 ; RV32-NEXT: mv a1, s3
347 ; RV32-NEXT: call __moddi3
348 ; RV32-NEXT: or a2, s5, s6
349 ; RV32-NEXT: snez a2, a2
350 ; RV32-NEXT: xori a0, a0, 2
351 ; RV32-NEXT: or a0, a0, a1
352 ; RV32-NEXT: seqz a0, a0
353 ; RV32-NEXT: xori a1, s2, 1
354 ; RV32-NEXT: or a1, a1, s4
355 ; RV32-NEXT: seqz a1, a1
356 ; RV32-NEXT: neg a3, a2
357 ; RV32-NEXT: addi a1, a1, -1
358 ; RV32-NEXT: addi a0, a0, -1
359 ; RV32-NEXT: sw a3, 0(s0)
360 ; RV32-NEXT: andi a3, a0, 7
361 ; RV32-NEXT: sb a3, 12(s0)
362 ; RV32-NEXT: slli a3, a1, 1
363 ; RV32-NEXT: or a2, a3, a2
364 ; RV32-NEXT: sw a2, 4(s0)
365 ; RV32-NEXT: srli a2, a1, 31
366 ; RV32-NEXT: andi a1, a1, 1
367 ; RV32-NEXT: slli a1, a1, 1
368 ; RV32-NEXT: slli a0, a0, 2
369 ; RV32-NEXT: or a0, a2, a0
370 ; RV32-NEXT: or a0, a0, a1
371 ; RV32-NEXT: sw a0, 8(s0)
372 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
373 ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
374 ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
375 ; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
376 ; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
377 ; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
378 ; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
379 ; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
380 ; RV32-NEXT: addi sp, sp, 32
383 ; RV64-LABEL: test_srem_vec:
385 ; RV64-NEXT: addi sp, sp, -48
386 ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
387 ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
388 ; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
389 ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
390 ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
391 ; RV64-NEXT: mv s0, a0
392 ; RV64-NEXT: lbu a0, 12(a0)
393 ; RV64-NEXT: lwu a1, 8(s0)
394 ; RV64-NEXT: slli a0, a0, 32
395 ; RV64-NEXT: ld a2, 0(s0)
396 ; RV64-NEXT: or a0, a1, a0
397 ; RV64-NEXT: slli a0, a0, 29
398 ; RV64-NEXT: srai s1, a0, 31
399 ; RV64-NEXT: srli a0, a2, 2
400 ; RV64-NEXT: slli a1, a1, 62
401 ; RV64-NEXT: or a0, a1, a0
402 ; RV64-NEXT: srai a0, a0, 31
403 ; RV64-NEXT: slli a2, a2, 31
404 ; RV64-NEXT: srai s2, a2, 31
405 ; RV64-NEXT: li a1, 7
406 ; RV64-NEXT: call __moddi3
407 ; RV64-NEXT: mv s3, a0
408 ; RV64-NEXT: li a1, -5
409 ; RV64-NEXT: mv a0, s1
410 ; RV64-NEXT: call __moddi3
411 ; RV64-NEXT: mv s1, a0
412 ; RV64-NEXT: lui a0, 699051
413 ; RV64-NEXT: addiw a1, a0, -1365
414 ; RV64-NEXT: slli a0, a1, 32
415 ; RV64-NEXT: add a1, a1, a0
416 ; RV64-NEXT: mv a0, s2
417 ; RV64-NEXT: call __muldi3
418 ; RV64-NEXT: lui a1, %hi(.LCPI3_0)
419 ; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1)
420 ; RV64-NEXT: add a0, a0, a1
421 ; RV64-NEXT: slli a2, a0, 63
422 ; RV64-NEXT: srli a0, a0, 1
423 ; RV64-NEXT: or a0, a0, a2
424 ; RV64-NEXT: sltu a0, a1, a0
425 ; RV64-NEXT: addi s1, s1, -2
426 ; RV64-NEXT: seqz a1, s1
427 ; RV64-NEXT: addi s3, s3, -1
428 ; RV64-NEXT: seqz a2, s3
429 ; RV64-NEXT: neg a0, a0
430 ; RV64-NEXT: addi a2, a2, -1
431 ; RV64-NEXT: addi a1, a1, -1
432 ; RV64-NEXT: slli a3, a1, 2
433 ; RV64-NEXT: slli a4, a2, 31
434 ; RV64-NEXT: srli a4, a4, 62
435 ; RV64-NEXT: or a3, a4, a3
436 ; RV64-NEXT: sw a3, 8(s0)
437 ; RV64-NEXT: slli a1, a1, 29
438 ; RV64-NEXT: srli a1, a1, 61
439 ; RV64-NEXT: sb a1, 12(s0)
440 ; RV64-NEXT: slli a0, a0, 31
441 ; RV64-NEXT: srli a0, a0, 31
442 ; RV64-NEXT: slli a2, a2, 33
443 ; RV64-NEXT: or a0, a0, a2
444 ; RV64-NEXT: sd a0, 0(s0)
445 ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
446 ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
447 ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
448 ; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
449 ; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
450 ; RV64-NEXT: addi sp, sp, 48
453 ; RV32M-LABEL: test_srem_vec:
455 ; RV32M-NEXT: addi sp, sp, -32
456 ; RV32M-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
457 ; RV32M-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
458 ; RV32M-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
459 ; RV32M-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
460 ; RV32M-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
461 ; RV32M-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
462 ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
463 ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
464 ; RV32M-NEXT: mv s0, a0
465 ; RV32M-NEXT: lbu a0, 12(a0)
466 ; RV32M-NEXT: lw a1, 8(s0)
467 ; RV32M-NEXT: slli a2, a0, 30
468 ; RV32M-NEXT: lw a3, 4(s0)
469 ; RV32M-NEXT: srli s1, a1, 2
470 ; RV32M-NEXT: or s1, s1, a2
471 ; RV32M-NEXT: slli a2, a1, 31
472 ; RV32M-NEXT: srli a4, a3, 1
473 ; RV32M-NEXT: or s2, a4, a2
474 ; RV32M-NEXT: srli a0, a0, 2
475 ; RV32M-NEXT: slli a0, a0, 31
476 ; RV32M-NEXT: srai s3, a0, 31
477 ; RV32M-NEXT: srli a1, a1, 1
478 ; RV32M-NEXT: slli a1, a1, 31
479 ; RV32M-NEXT: lw a0, 0(s0)
480 ; RV32M-NEXT: srai s4, a1, 31
481 ; RV32M-NEXT: slli a1, a3, 31
482 ; RV32M-NEXT: srai a1, a1, 31
483 ; RV32M-NEXT: li a2, 6
484 ; RV32M-NEXT: li a3, 0
485 ; RV32M-NEXT: call __moddi3
486 ; RV32M-NEXT: mv s5, a0
487 ; RV32M-NEXT: mv s6, a1
488 ; RV32M-NEXT: li a2, 7
489 ; RV32M-NEXT: mv a0, s2
490 ; RV32M-NEXT: mv a1, s4
491 ; RV32M-NEXT: li a3, 0
492 ; RV32M-NEXT: call __moddi3
493 ; RV32M-NEXT: mv s2, a0
494 ; RV32M-NEXT: mv s4, a1
495 ; RV32M-NEXT: li a2, -5
496 ; RV32M-NEXT: li a3, -1
497 ; RV32M-NEXT: mv a0, s1
498 ; RV32M-NEXT: mv a1, s3
499 ; RV32M-NEXT: call __moddi3
500 ; RV32M-NEXT: or a2, s5, s6
501 ; RV32M-NEXT: snez a2, a2
502 ; RV32M-NEXT: xori a0, a0, 2
503 ; RV32M-NEXT: or a0, a0, a1
504 ; RV32M-NEXT: seqz a0, a0
505 ; RV32M-NEXT: xori a1, s2, 1
506 ; RV32M-NEXT: or a1, a1, s4
507 ; RV32M-NEXT: seqz a1, a1
508 ; RV32M-NEXT: neg a3, a2
509 ; RV32M-NEXT: addi a1, a1, -1
510 ; RV32M-NEXT: addi a0, a0, -1
511 ; RV32M-NEXT: sw a3, 0(s0)
512 ; RV32M-NEXT: andi a3, a0, 7
513 ; RV32M-NEXT: sb a3, 12(s0)
514 ; RV32M-NEXT: slli a3, a1, 1
515 ; RV32M-NEXT: or a2, a3, a2
516 ; RV32M-NEXT: sw a2, 4(s0)
517 ; RV32M-NEXT: srli a2, a1, 31
518 ; RV32M-NEXT: andi a1, a1, 1
519 ; RV32M-NEXT: slli a1, a1, 1
520 ; RV32M-NEXT: slli a0, a0, 2
521 ; RV32M-NEXT: or a0, a2, a0
522 ; RV32M-NEXT: or a0, a0, a1
523 ; RV32M-NEXT: sw a0, 8(s0)
524 ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
525 ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
526 ; RV32M-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
527 ; RV32M-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
528 ; RV32M-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
529 ; RV32M-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
530 ; RV32M-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
531 ; RV32M-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
532 ; RV32M-NEXT: addi sp, sp, 32
535 ; RV64M-LABEL: test_srem_vec:
537 ; RV64M-NEXT: ld a1, 0(a0)
538 ; RV64M-NEXT: lwu a2, 8(a0)
539 ; RV64M-NEXT: srli a3, a1, 2
540 ; RV64M-NEXT: lbu a4, 12(a0)
541 ; RV64M-NEXT: slli a5, a2, 62
542 ; RV64M-NEXT: or a3, a5, a3
543 ; RV64M-NEXT: srai a3, a3, 31
544 ; RV64M-NEXT: slli a4, a4, 32
545 ; RV64M-NEXT: or a2, a2, a4
546 ; RV64M-NEXT: slli a2, a2, 29
547 ; RV64M-NEXT: lui a4, %hi(.LCPI3_0)
548 ; RV64M-NEXT: ld a4, %lo(.LCPI3_0)(a4)
549 ; RV64M-NEXT: srai a2, a2, 31
550 ; RV64M-NEXT: slli a1, a1, 31
551 ; RV64M-NEXT: srai a1, a1, 31
552 ; RV64M-NEXT: mulh a4, a2, a4
553 ; RV64M-NEXT: srli a5, a4, 63
554 ; RV64M-NEXT: srai a4, a4, 1
555 ; RV64M-NEXT: add a4, a4, a5
556 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1)
557 ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5)
558 ; RV64M-NEXT: add a2, a2, a4
559 ; RV64M-NEXT: slli a4, a4, 2
560 ; RV64M-NEXT: add a2, a2, a4
561 ; RV64M-NEXT: mulh a4, a3, a5
562 ; RV64M-NEXT: srli a5, a4, 63
563 ; RV64M-NEXT: srai a4, a4, 1
564 ; RV64M-NEXT: add a4, a4, a5
565 ; RV64M-NEXT: slli a5, a4, 3
566 ; RV64M-NEXT: add a3, a3, a4
567 ; RV64M-NEXT: sub a3, a3, a5
568 ; RV64M-NEXT: addi a3, a3, -1
569 ; RV64M-NEXT: seqz a3, a3
570 ; RV64M-NEXT: lui a4, 699051
571 ; RV64M-NEXT: addiw a4, a4, -1365
572 ; RV64M-NEXT: slli a5, a4, 32
573 ; RV64M-NEXT: add a4, a4, a5
574 ; RV64M-NEXT: lui a5, %hi(.LCPI3_2)
575 ; RV64M-NEXT: ld a5, %lo(.LCPI3_2)(a5)
576 ; RV64M-NEXT: addi a2, a2, -2
577 ; RV64M-NEXT: seqz a2, a2
578 ; RV64M-NEXT: mul a1, a1, a4
579 ; RV64M-NEXT: add a1, a1, a5
580 ; RV64M-NEXT: slli a4, a1, 63
581 ; RV64M-NEXT: srli a1, a1, 1
582 ; RV64M-NEXT: or a1, a1, a4
583 ; RV64M-NEXT: sltu a1, a5, a1
584 ; RV64M-NEXT: addi a2, a2, -1
585 ; RV64M-NEXT: addi a3, a3, -1
586 ; RV64M-NEXT: neg a1, a1
587 ; RV64M-NEXT: slli a4, a3, 33
588 ; RV64M-NEXT: slli a1, a1, 31
589 ; RV64M-NEXT: srli a1, a1, 31
590 ; RV64M-NEXT: or a1, a1, a4
591 ; RV64M-NEXT: sd a1, 0(a0)
592 ; RV64M-NEXT: slli a1, a2, 2
593 ; RV64M-NEXT: slli a3, a3, 31
594 ; RV64M-NEXT: srli a3, a3, 62
595 ; RV64M-NEXT: or a1, a3, a1
596 ; RV64M-NEXT: sw a1, 8(a0)
597 ; RV64M-NEXT: slli a2, a2, 29
598 ; RV64M-NEXT: srli a2, a2, 61
599 ; RV64M-NEXT: sb a2, 12(a0)
602 ; RV32MV-LABEL: test_srem_vec:
604 ; RV32MV-NEXT: addi sp, sp, -48
605 ; RV32MV-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
606 ; RV32MV-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
607 ; RV32MV-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
608 ; RV32MV-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
609 ; RV32MV-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
610 ; RV32MV-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
611 ; RV32MV-NEXT: csrr a1, vlenb
612 ; RV32MV-NEXT: slli a1, a1, 1
613 ; RV32MV-NEXT: sub sp, sp, a1
614 ; RV32MV-NEXT: mv s0, a0
615 ; RV32MV-NEXT: lbu a0, 12(a0)
616 ; RV32MV-NEXT: lw a1, 8(s0)
617 ; RV32MV-NEXT: slli a2, a0, 30
618 ; RV32MV-NEXT: lw a3, 4(s0)
619 ; RV32MV-NEXT: srli s1, a1, 2
620 ; RV32MV-NEXT: or s1, s1, a2
621 ; RV32MV-NEXT: slli a2, a1, 31
622 ; RV32MV-NEXT: srli a4, a3, 1
623 ; RV32MV-NEXT: or s2, a4, a2
624 ; RV32MV-NEXT: srli a0, a0, 2
625 ; RV32MV-NEXT: slli a0, a0, 31
626 ; RV32MV-NEXT: srai s3, a0, 31
627 ; RV32MV-NEXT: srli a1, a1, 1
628 ; RV32MV-NEXT: slli a1, a1, 31
629 ; RV32MV-NEXT: lw a0, 0(s0)
630 ; RV32MV-NEXT: srai s4, a1, 31
631 ; RV32MV-NEXT: slli a1, a3, 31
632 ; RV32MV-NEXT: srai a1, a1, 31
633 ; RV32MV-NEXT: li a2, 6
634 ; RV32MV-NEXT: li a3, 0
635 ; RV32MV-NEXT: call __moddi3
636 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
637 ; RV32MV-NEXT: vmv.v.x v8, a0
638 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1
639 ; RV32MV-NEXT: addi a0, sp, 16
640 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
641 ; RV32MV-NEXT: li a2, 7
642 ; RV32MV-NEXT: mv a0, s2
643 ; RV32MV-NEXT: mv a1, s4
644 ; RV32MV-NEXT: li a3, 0
645 ; RV32MV-NEXT: call __moddi3
646 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
647 ; RV32MV-NEXT: addi a2, sp, 16
648 ; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
649 ; RV32MV-NEXT: vslide1down.vx v8, v8, a0
650 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1
651 ; RV32MV-NEXT: addi a0, sp, 16
652 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
653 ; RV32MV-NEXT: li a2, -5
654 ; RV32MV-NEXT: li a3, -1
655 ; RV32MV-NEXT: mv a0, s1
656 ; RV32MV-NEXT: mv a1, s3
657 ; RV32MV-NEXT: call __moddi3
658 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
659 ; RV32MV-NEXT: addi a2, sp, 16
660 ; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
661 ; RV32MV-NEXT: vslide1down.vx v8, v8, a0
662 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1
663 ; RV32MV-NEXT: vslidedown.vi v8, v8, 2
664 ; RV32MV-NEXT: li a0, 511
665 ; RV32MV-NEXT: vsetvli zero, zero, e16, m1, ta, ma
666 ; RV32MV-NEXT: vmv.v.x v10, a0
667 ; RV32MV-NEXT: vsetvli zero, zero, e32, m2, ta, ma
668 ; RV32MV-NEXT: vsext.vf4 v12, v10
669 ; RV32MV-NEXT: vand.vv v8, v8, v12
670 ; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
671 ; RV32MV-NEXT: vmv.v.i v10, 1
672 ; RV32MV-NEXT: vmv.v.i v11, 0
673 ; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
674 ; RV32MV-NEXT: vslideup.vi v11, v10, 2
675 ; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
676 ; RV32MV-NEXT: vmv.v.i v10, 2
677 ; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
678 ; RV32MV-NEXT: vslideup.vi v11, v10, 4
679 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
680 ; RV32MV-NEXT: vsext.vf4 v12, v11
681 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
682 ; RV32MV-NEXT: vmsne.vv v0, v8, v12
683 ; RV32MV-NEXT: vmv.v.i v8, 0
684 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
685 ; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma
686 ; RV32MV-NEXT: vse32.v v8, (s0)
687 ; RV32MV-NEXT: vslidedown.vi v10, v8, 1
688 ; RV32MV-NEXT: vmv.x.s a0, v10
689 ; RV32MV-NEXT: vslidedown.vi v10, v8, 2
690 ; RV32MV-NEXT: vmv.x.s a1, v10
691 ; RV32MV-NEXT: slli a2, a1, 1
692 ; RV32MV-NEXT: sub a2, a2, a0
693 ; RV32MV-NEXT: sw a2, 4(s0)
694 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
695 ; RV32MV-NEXT: vslidedown.vi v10, v8, 4
696 ; RV32MV-NEXT: vmv.x.s a0, v10
697 ; RV32MV-NEXT: srli a2, a0, 30
698 ; RV32MV-NEXT: vslidedown.vi v10, v8, 5
699 ; RV32MV-NEXT: vmv.x.s a3, v10
700 ; RV32MV-NEXT: slli a3, a3, 2
701 ; RV32MV-NEXT: or a2, a3, a2
702 ; RV32MV-NEXT: andi a2, a2, 7
703 ; RV32MV-NEXT: sb a2, 12(s0)
704 ; RV32MV-NEXT: srli a1, a1, 31
705 ; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma
706 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3
707 ; RV32MV-NEXT: vmv.x.s a2, v8
708 ; RV32MV-NEXT: andi a2, a2, 1
709 ; RV32MV-NEXT: slli a2, a2, 1
710 ; RV32MV-NEXT: slli a0, a0, 2
711 ; RV32MV-NEXT: or a0, a1, a0
712 ; RV32MV-NEXT: or a0, a0, a2
713 ; RV32MV-NEXT: sw a0, 8(s0)
714 ; RV32MV-NEXT: csrr a0, vlenb
715 ; RV32MV-NEXT: slli a0, a0, 1
716 ; RV32MV-NEXT: add sp, sp, a0
717 ; RV32MV-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
718 ; RV32MV-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
719 ; RV32MV-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
720 ; RV32MV-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
721 ; RV32MV-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
722 ; RV32MV-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
723 ; RV32MV-NEXT: addi sp, sp, 48
726 ; RV64MV-LABEL: test_srem_vec:
728 ; RV64MV-NEXT: ld a1, 0(a0)
729 ; RV64MV-NEXT: lwu a2, 8(a0)
730 ; RV64MV-NEXT: srli a3, a1, 2
731 ; RV64MV-NEXT: lbu a4, 12(a0)
732 ; RV64MV-NEXT: slli a5, a2, 62
733 ; RV64MV-NEXT: or a3, a5, a3
734 ; RV64MV-NEXT: srai a3, a3, 31
735 ; RV64MV-NEXT: slli a4, a4, 32
736 ; RV64MV-NEXT: or a2, a2, a4
737 ; RV64MV-NEXT: slli a2, a2, 29
738 ; RV64MV-NEXT: lui a4, %hi(.LCPI3_0)
739 ; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4)
740 ; RV64MV-NEXT: srai a2, a2, 31
741 ; RV64MV-NEXT: slli a1, a1, 31
742 ; RV64MV-NEXT: srai a1, a1, 31
743 ; RV64MV-NEXT: mulh a4, a2, a4
744 ; RV64MV-NEXT: srli a5, a4, 63
745 ; RV64MV-NEXT: srai a4, a4, 1
746 ; RV64MV-NEXT: add a4, a4, a5
747 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1)
748 ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5)
749 ; RV64MV-NEXT: add a2, a2, a4
750 ; RV64MV-NEXT: slli a4, a4, 2
751 ; RV64MV-NEXT: add a2, a2, a4
752 ; RV64MV-NEXT: mulh a4, a3, a5
753 ; RV64MV-NEXT: srli a5, a4, 63
754 ; RV64MV-NEXT: srai a4, a4, 1
755 ; RV64MV-NEXT: add a4, a4, a5
756 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_2)
757 ; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5)
758 ; RV64MV-NEXT: add a3, a3, a4
759 ; RV64MV-NEXT: slli a4, a4, 3
760 ; RV64MV-NEXT: sub a3, a3, a4
761 ; RV64MV-NEXT: mulh a4, a1, a5
762 ; RV64MV-NEXT: srli a5, a4, 63
763 ; RV64MV-NEXT: add a4, a4, a5
764 ; RV64MV-NEXT: li a5, 6
765 ; RV64MV-NEXT: mul a4, a4, a5
766 ; RV64MV-NEXT: sub a1, a1, a4
767 ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
768 ; RV64MV-NEXT: vmv.v.x v8, a1
769 ; RV64MV-NEXT: vslide1down.vx v8, v8, a3
770 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2
771 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1
772 ; RV64MV-NEXT: li a1, -1
773 ; RV64MV-NEXT: srli a1, a1, 31
774 ; RV64MV-NEXT: vand.vx v8, v8, a1
775 ; RV64MV-NEXT: lui a2, 32
776 ; RV64MV-NEXT: addi a2, a2, 256
777 ; RV64MV-NEXT: vmv.s.x v10, a2
778 ; RV64MV-NEXT: vsext.vf8 v12, v10
779 ; RV64MV-NEXT: vmsne.vv v0, v8, v12
780 ; RV64MV-NEXT: vmv.v.i v8, 0
781 ; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0
782 ; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma
783 ; RV64MV-NEXT: vslidedown.vi v10, v8, 2
784 ; RV64MV-NEXT: vmv.x.s a2, v10
785 ; RV64MV-NEXT: slli a3, a2, 31
786 ; RV64MV-NEXT: srli a3, a3, 61
787 ; RV64MV-NEXT: sb a3, 12(a0)
788 ; RV64MV-NEXT: vmv.x.s a3, v8
789 ; RV64MV-NEXT: and a1, a3, a1
790 ; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma
791 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1
792 ; RV64MV-NEXT: vmv.x.s a3, v8
793 ; RV64MV-NEXT: slli a4, a3, 33
794 ; RV64MV-NEXT: or a1, a1, a4
795 ; RV64MV-NEXT: sd a1, 0(a0)
796 ; RV64MV-NEXT: slli a2, a2, 2
797 ; RV64MV-NEXT: slli a3, a3, 31
798 ; RV64MV-NEXT: srli a3, a3, 62
799 ; RV64MV-NEXT: or a2, a3, a2
800 ; RV64MV-NEXT: sw a2, 8(a0)
802 %ld = load <3 x i33>, ptr %X
803 %srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
804 %cmp = icmp ne <3 x i33> %srem, <i33 0, i33 1, i33 2>
805 %ext = sext <3 x i1> %cmp to <3 x i33>
806 store <3 x i33> %ext, ptr %X