1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32
3 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M
5 ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=RV32MV
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=RV64MV
9 define i1 @test_srem_odd(i29 %X) nounwind {
10 ; RV32-LABEL: test_srem_odd:
12 ; RV32-NEXT: addi sp, sp, -16
13 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
14 ; RV32-NEXT: lui a1, 128424
15 ; RV32-NEXT: addi a1, a1, 331
16 ; RV32-NEXT: call __mulsi3
17 ; RV32-NEXT: lui a1, 662
18 ; RV32-NEXT: addi a1, a1, -83
19 ; RV32-NEXT: add a0, a0, a1
20 ; RV32-NEXT: lui a1, 1324
21 ; RV32-NEXT: slli a0, a0, 3
22 ; RV32-NEXT: srli a0, a0, 3
23 ; RV32-NEXT: addi a1, a1, -165
24 ; RV32-NEXT: sltu a0, a0, a1
25 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
26 ; RV32-NEXT: addi sp, sp, 16
29 ; RV64-LABEL: test_srem_odd:
31 ; RV64-NEXT: addi sp, sp, -16
32 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
33 ; RV64-NEXT: lui a1, 128424
34 ; RV64-NEXT: addiw a1, a1, 331
35 ; RV64-NEXT: call __muldi3
36 ; RV64-NEXT: lui a1, 662
37 ; RV64-NEXT: addi a1, a1, -83
38 ; RV64-NEXT: add a0, a0, a1
39 ; RV64-NEXT: lui a1, 1324
40 ; RV64-NEXT: slli a0, a0, 35
41 ; RV64-NEXT: srli a0, a0, 35
42 ; RV64-NEXT: addiw a1, a1, -165
43 ; RV64-NEXT: sltu a0, a0, a1
44 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
45 ; RV64-NEXT: addi sp, sp, 16
48 ; RV32M-LABEL: test_srem_odd:
50 ; RV32M-NEXT: lui a1, 128424
51 ; RV32M-NEXT: addi a1, a1, 331
52 ; RV32M-NEXT: mul a0, a0, a1
53 ; RV32M-NEXT: lui a1, 662
54 ; RV32M-NEXT: addi a1, a1, -83
55 ; RV32M-NEXT: add a0, a0, a1
56 ; RV32M-NEXT: lui a1, 1324
57 ; RV32M-NEXT: slli a0, a0, 3
58 ; RV32M-NEXT: srli a0, a0, 3
59 ; RV32M-NEXT: addi a1, a1, -165
60 ; RV32M-NEXT: sltu a0, a0, a1
63 ; RV64M-LABEL: test_srem_odd:
65 ; RV64M-NEXT: lui a1, 128424
66 ; RV64M-NEXT: addi a1, a1, 331
67 ; RV64M-NEXT: mul a0, a0, a1
68 ; RV64M-NEXT: lui a1, 662
69 ; RV64M-NEXT: addi a1, a1, -83
70 ; RV64M-NEXT: add a0, a0, a1
71 ; RV64M-NEXT: lui a1, 1324
72 ; RV64M-NEXT: slli a0, a0, 35
73 ; RV64M-NEXT: srli a0, a0, 35
74 ; RV64M-NEXT: addiw a1, a1, -165
75 ; RV64M-NEXT: sltu a0, a0, a1
78 ; RV32MV-LABEL: test_srem_odd:
80 ; RV32MV-NEXT: lui a1, 128424
81 ; RV32MV-NEXT: addi a1, a1, 331
82 ; RV32MV-NEXT: mul a0, a0, a1
83 ; RV32MV-NEXT: lui a1, 662
84 ; RV32MV-NEXT: addi a1, a1, -83
85 ; RV32MV-NEXT: add a0, a0, a1
86 ; RV32MV-NEXT: lui a1, 1324
87 ; RV32MV-NEXT: slli a0, a0, 3
88 ; RV32MV-NEXT: srli a0, a0, 3
89 ; RV32MV-NEXT: addi a1, a1, -165
90 ; RV32MV-NEXT: sltu a0, a0, a1
93 ; RV64MV-LABEL: test_srem_odd:
95 ; RV64MV-NEXT: lui a1, 128424
96 ; RV64MV-NEXT: addi a1, a1, 331
97 ; RV64MV-NEXT: mul a0, a0, a1
98 ; RV64MV-NEXT: lui a1, 662
99 ; RV64MV-NEXT: addi a1, a1, -83
100 ; RV64MV-NEXT: add a0, a0, a1
101 ; RV64MV-NEXT: lui a1, 1324
102 ; RV64MV-NEXT: slli a0, a0, 35
103 ; RV64MV-NEXT: srli a0, a0, 35
104 ; RV64MV-NEXT: addiw a1, a1, -165
105 ; RV64MV-NEXT: sltu a0, a0, a1
107 %srem = srem i29 %X, 99
108 %cmp = icmp eq i29 %srem, 0
112 define i1 @test_srem_even(i4 %X) nounwind {
113 ; RV32-LABEL: test_srem_even:
115 ; RV32-NEXT: addi sp, sp, -16
116 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
117 ; RV32-NEXT: slli a0, a0, 28
118 ; RV32-NEXT: srai a0, a0, 28
119 ; RV32-NEXT: li a1, 6
120 ; RV32-NEXT: call __modsi3
121 ; RV32-NEXT: addi a0, a0, -1
122 ; RV32-NEXT: seqz a0, a0
123 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
124 ; RV32-NEXT: addi sp, sp, 16
127 ; RV64-LABEL: test_srem_even:
129 ; RV64-NEXT: addi sp, sp, -16
130 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
131 ; RV64-NEXT: slli a0, a0, 60
132 ; RV64-NEXT: srai a0, a0, 60
133 ; RV64-NEXT: li a1, 6
134 ; RV64-NEXT: call __moddi3
135 ; RV64-NEXT: addi a0, a0, -1
136 ; RV64-NEXT: seqz a0, a0
137 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
138 ; RV64-NEXT: addi sp, sp, 16
141 ; RV32M-LABEL: test_srem_even:
143 ; RV32M-NEXT: slli a1, a0, 28
144 ; RV32M-NEXT: srai a1, a1, 28
145 ; RV32M-NEXT: slli a2, a1, 1
146 ; RV32M-NEXT: add a1, a2, a1
147 ; RV32M-NEXT: srli a2, a1, 31
148 ; RV32M-NEXT: srli a1, a1, 4
149 ; RV32M-NEXT: add a1, a1, a2
150 ; RV32M-NEXT: slli a2, a1, 3
151 ; RV32M-NEXT: slli a1, a1, 1
152 ; RV32M-NEXT: sub a1, a1, a2
153 ; RV32M-NEXT: add a0, a0, a1
154 ; RV32M-NEXT: andi a0, a0, 15
155 ; RV32M-NEXT: addi a0, a0, -1
156 ; RV32M-NEXT: seqz a0, a0
159 ; RV64M-LABEL: test_srem_even:
161 ; RV64M-NEXT: slli a1, a0, 60
162 ; RV64M-NEXT: srai a1, a1, 60
163 ; RV64M-NEXT: slli a2, a1, 1
164 ; RV64M-NEXT: add a1, a2, a1
165 ; RV64M-NEXT: srli a2, a1, 63
166 ; RV64M-NEXT: srli a1, a1, 4
167 ; RV64M-NEXT: add a1, a1, a2
168 ; RV64M-NEXT: slli a2, a1, 3
169 ; RV64M-NEXT: slli a1, a1, 1
170 ; RV64M-NEXT: subw a1, a1, a2
171 ; RV64M-NEXT: add a0, a0, a1
172 ; RV64M-NEXT: andi a0, a0, 15
173 ; RV64M-NEXT: addi a0, a0, -1
174 ; RV64M-NEXT: seqz a0, a0
177 ; RV32MV-LABEL: test_srem_even:
179 ; RV32MV-NEXT: slli a1, a0, 28
180 ; RV32MV-NEXT: srai a1, a1, 28
181 ; RV32MV-NEXT: slli a2, a1, 1
182 ; RV32MV-NEXT: add a1, a2, a1
183 ; RV32MV-NEXT: srli a2, a1, 31
184 ; RV32MV-NEXT: srli a1, a1, 4
185 ; RV32MV-NEXT: add a1, a1, a2
186 ; RV32MV-NEXT: slli a2, a1, 3
187 ; RV32MV-NEXT: slli a1, a1, 1
188 ; RV32MV-NEXT: sub a1, a1, a2
189 ; RV32MV-NEXT: add a0, a0, a1
190 ; RV32MV-NEXT: andi a0, a0, 15
191 ; RV32MV-NEXT: addi a0, a0, -1
192 ; RV32MV-NEXT: seqz a0, a0
195 ; RV64MV-LABEL: test_srem_even:
197 ; RV64MV-NEXT: slli a1, a0, 60
198 ; RV64MV-NEXT: srai a1, a1, 60
199 ; RV64MV-NEXT: slli a2, a1, 1
200 ; RV64MV-NEXT: add a1, a2, a1
201 ; RV64MV-NEXT: srli a2, a1, 63
202 ; RV64MV-NEXT: srli a1, a1, 4
203 ; RV64MV-NEXT: add a1, a1, a2
204 ; RV64MV-NEXT: slli a2, a1, 3
205 ; RV64MV-NEXT: slli a1, a1, 1
206 ; RV64MV-NEXT: subw a1, a1, a2
207 ; RV64MV-NEXT: add a0, a0, a1
208 ; RV64MV-NEXT: andi a0, a0, 15
209 ; RV64MV-NEXT: addi a0, a0, -1
210 ; RV64MV-NEXT: seqz a0, a0
212 %srem = srem i4 %X, 6
213 %cmp = icmp eq i4 %srem, 1
217 define i1 @test_srem_pow2_setne(i6 %X) nounwind {
218 ; RV32-LABEL: test_srem_pow2_setne:
220 ; RV32-NEXT: slli a1, a0, 26
221 ; RV32-NEXT: srai a1, a1, 1
222 ; RV32-NEXT: srli a1, a1, 30
223 ; RV32-NEXT: add a1, a0, a1
224 ; RV32-NEXT: andi a1, a1, 60
225 ; RV32-NEXT: sub a0, a0, a1
226 ; RV32-NEXT: andi a0, a0, 63
227 ; RV32-NEXT: snez a0, a0
230 ; RV64-LABEL: test_srem_pow2_setne:
232 ; RV64-NEXT: slli a1, a0, 58
233 ; RV64-NEXT: srai a1, a1, 1
234 ; RV64-NEXT: srli a1, a1, 62
235 ; RV64-NEXT: add a1, a0, a1
236 ; RV64-NEXT: andi a1, a1, 60
237 ; RV64-NEXT: subw a0, a0, a1
238 ; RV64-NEXT: andi a0, a0, 63
239 ; RV64-NEXT: snez a0, a0
242 ; RV32M-LABEL: test_srem_pow2_setne:
244 ; RV32M-NEXT: slli a1, a0, 26
245 ; RV32M-NEXT: srai a1, a1, 1
246 ; RV32M-NEXT: srli a1, a1, 30
247 ; RV32M-NEXT: add a1, a0, a1
248 ; RV32M-NEXT: andi a1, a1, 60
249 ; RV32M-NEXT: sub a0, a0, a1
250 ; RV32M-NEXT: andi a0, a0, 63
251 ; RV32M-NEXT: snez a0, a0
254 ; RV64M-LABEL: test_srem_pow2_setne:
256 ; RV64M-NEXT: slli a1, a0, 58
257 ; RV64M-NEXT: srai a1, a1, 1
258 ; RV64M-NEXT: srli a1, a1, 62
259 ; RV64M-NEXT: add a1, a0, a1
260 ; RV64M-NEXT: andi a1, a1, 60
261 ; RV64M-NEXT: subw a0, a0, a1
262 ; RV64M-NEXT: andi a0, a0, 63
263 ; RV64M-NEXT: snez a0, a0
266 ; RV32MV-LABEL: test_srem_pow2_setne:
268 ; RV32MV-NEXT: slli a1, a0, 26
269 ; RV32MV-NEXT: srai a1, a1, 1
270 ; RV32MV-NEXT: srli a1, a1, 30
271 ; RV32MV-NEXT: add a1, a0, a1
272 ; RV32MV-NEXT: andi a1, a1, 60
273 ; RV32MV-NEXT: sub a0, a0, a1
274 ; RV32MV-NEXT: andi a0, a0, 63
275 ; RV32MV-NEXT: snez a0, a0
278 ; RV64MV-LABEL: test_srem_pow2_setne:
280 ; RV64MV-NEXT: slli a1, a0, 58
281 ; RV64MV-NEXT: srai a1, a1, 1
282 ; RV64MV-NEXT: srli a1, a1, 62
283 ; RV64MV-NEXT: add a1, a0, a1
284 ; RV64MV-NEXT: andi a1, a1, 60
285 ; RV64MV-NEXT: subw a0, a0, a1
286 ; RV64MV-NEXT: andi a0, a0, 63
287 ; RV64MV-NEXT: snez a0, a0
289 %srem = srem i6 %X, 4
290 %cmp = icmp ne i6 %srem, 0
294 define void @test_srem_vec(ptr %X) nounwind {
295 ; RV32-LABEL: test_srem_vec:
297 ; RV32-NEXT: addi sp, sp, -32
298 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
299 ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
300 ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
301 ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
302 ; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
303 ; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
304 ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
305 ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
306 ; RV32-NEXT: mv s0, a0
307 ; RV32-NEXT: lbu a1, 12(a0)
308 ; RV32-NEXT: lw a2, 8(a0)
309 ; RV32-NEXT: lw a3, 4(a0)
310 ; RV32-NEXT: lw a0, 0(a0)
311 ; RV32-NEXT: slli a4, a1, 30
312 ; RV32-NEXT: srli s1, a2, 2
313 ; RV32-NEXT: slli a5, a2, 31
314 ; RV32-NEXT: or s1, s1, a4
315 ; RV32-NEXT: srli a4, a3, 1
316 ; RV32-NEXT: or s2, a4, a5
317 ; RV32-NEXT: srli a1, a1, 2
318 ; RV32-NEXT: srli a2, a2, 1
319 ; RV32-NEXT: slli a3, a3, 31
320 ; RV32-NEXT: slli a1, a1, 31
321 ; RV32-NEXT: slli a2, a2, 31
322 ; RV32-NEXT: srai s3, a1, 31
323 ; RV32-NEXT: srai s4, a2, 31
324 ; RV32-NEXT: srai a1, a3, 31
325 ; RV32-NEXT: li a2, 6
326 ; RV32-NEXT: li a3, 0
327 ; RV32-NEXT: call __moddi3
328 ; RV32-NEXT: mv s5, a0
329 ; RV32-NEXT: mv s6, a1
330 ; RV32-NEXT: li a2, 7
331 ; RV32-NEXT: mv a0, s2
332 ; RV32-NEXT: mv a1, s4
333 ; RV32-NEXT: li a3, 0
334 ; RV32-NEXT: call __moddi3
335 ; RV32-NEXT: mv s2, a0
336 ; RV32-NEXT: mv s4, a1
337 ; RV32-NEXT: li a2, -5
338 ; RV32-NEXT: li a3, -1
339 ; RV32-NEXT: mv a0, s1
340 ; RV32-NEXT: mv a1, s3
341 ; RV32-NEXT: call __moddi3
342 ; RV32-NEXT: or a2, s5, s6
343 ; RV32-NEXT: xori a0, a0, 2
344 ; RV32-NEXT: xori a3, s2, 1
345 ; RV32-NEXT: snez a2, a2
346 ; RV32-NEXT: or a0, a0, a1
347 ; RV32-NEXT: or a1, a3, s4
348 ; RV32-NEXT: seqz a0, a0
349 ; RV32-NEXT: seqz a1, a1
350 ; RV32-NEXT: neg a3, a2
351 ; RV32-NEXT: addi a1, a1, -1
352 ; RV32-NEXT: addi a0, a0, -1
353 ; RV32-NEXT: andi a4, a0, 7
354 ; RV32-NEXT: slli a5, a1, 1
355 ; RV32-NEXT: or a2, a5, a2
356 ; RV32-NEXT: srli a5, a1, 31
357 ; RV32-NEXT: andi a1, a1, 1
358 ; RV32-NEXT: slli a0, a0, 2
359 ; RV32-NEXT: slli a1, a1, 1
360 ; RV32-NEXT: or a0, a5, a0
361 ; RV32-NEXT: or a0, a0, a1
362 ; RV32-NEXT: sw a3, 0(s0)
363 ; RV32-NEXT: sw a2, 4(s0)
364 ; RV32-NEXT: sw a0, 8(s0)
365 ; RV32-NEXT: sb a4, 12(s0)
366 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
367 ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
368 ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
369 ; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
370 ; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
371 ; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
372 ; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
373 ; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
374 ; RV32-NEXT: addi sp, sp, 32
377 ; RV64-LABEL: test_srem_vec:
379 ; RV64-NEXT: addi sp, sp, -48
380 ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
381 ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
382 ; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
383 ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
384 ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
385 ; RV64-NEXT: mv s0, a0
386 ; RV64-NEXT: lbu a0, 12(a0)
387 ; RV64-NEXT: ld a1, 0(s0)
388 ; RV64-NEXT: lwu a2, 8(s0)
389 ; RV64-NEXT: slli a0, a0, 32
390 ; RV64-NEXT: srli a3, a1, 2
391 ; RV64-NEXT: or a0, a2, a0
392 ; RV64-NEXT: slli a2, a2, 62
393 ; RV64-NEXT: slli a1, a1, 31
394 ; RV64-NEXT: or a2, a2, a3
395 ; RV64-NEXT: slli s1, a0, 29
396 ; RV64-NEXT: srai a0, a2, 31
397 ; RV64-NEXT: srai s1, s1, 31
398 ; RV64-NEXT: srai s2, a1, 31
399 ; RV64-NEXT: li a1, 7
400 ; RV64-NEXT: call __moddi3
401 ; RV64-NEXT: mv s3, a0
402 ; RV64-NEXT: li a1, -5
403 ; RV64-NEXT: mv a0, s1
404 ; RV64-NEXT: call __moddi3
405 ; RV64-NEXT: mv s1, a0
406 ; RV64-NEXT: lui a0, 699051
407 ; RV64-NEXT: addiw a1, a0, -1365
408 ; RV64-NEXT: slli a0, a1, 32
409 ; RV64-NEXT: add a1, a1, a0
410 ; RV64-NEXT: mv a0, s2
411 ; RV64-NEXT: call __muldi3
412 ; RV64-NEXT: lui a1, %hi(.LCPI3_0)
413 ; RV64-NEXT: addi s1, s1, -2
414 ; RV64-NEXT: addi s3, s3, -1
415 ; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1)
416 ; RV64-NEXT: seqz a2, s1
417 ; RV64-NEXT: seqz a3, s3
418 ; RV64-NEXT: addi a3, a3, -1
419 ; RV64-NEXT: addi a2, a2, -1
420 ; RV64-NEXT: slli a4, a2, 2
421 ; RV64-NEXT: slli a5, a3, 31
422 ; RV64-NEXT: srli a5, a5, 62
423 ; RV64-NEXT: add a0, a0, a1
424 ; RV64-NEXT: or a4, a5, a4
425 ; RV64-NEXT: slli a5, a0, 63
426 ; RV64-NEXT: srli a0, a0, 1
427 ; RV64-NEXT: or a0, a0, a5
428 ; RV64-NEXT: slli a2, a2, 29
429 ; RV64-NEXT: slli a3, a3, 33
430 ; RV64-NEXT: srli a2, a2, 61
431 ; RV64-NEXT: sltu a0, a1, a0
432 ; RV64-NEXT: neg a0, a0
433 ; RV64-NEXT: slli a0, a0, 31
434 ; RV64-NEXT: srli a0, a0, 31
435 ; RV64-NEXT: or a0, a0, a3
436 ; RV64-NEXT: sd a0, 0(s0)
437 ; RV64-NEXT: sw a4, 8(s0)
438 ; RV64-NEXT: sb a2, 12(s0)
439 ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
440 ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
441 ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
442 ; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
443 ; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
444 ; RV64-NEXT: addi sp, sp, 48
447 ; RV32M-LABEL: test_srem_vec:
449 ; RV32M-NEXT: addi sp, sp, -32
450 ; RV32M-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
451 ; RV32M-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
452 ; RV32M-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
453 ; RV32M-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
454 ; RV32M-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
455 ; RV32M-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
456 ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
457 ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
458 ; RV32M-NEXT: mv s0, a0
459 ; RV32M-NEXT: lbu a1, 12(a0)
460 ; RV32M-NEXT: lw a2, 8(a0)
461 ; RV32M-NEXT: lw a3, 4(a0)
462 ; RV32M-NEXT: lw a0, 0(a0)
463 ; RV32M-NEXT: slli a4, a1, 30
464 ; RV32M-NEXT: srli s1, a2, 2
465 ; RV32M-NEXT: slli a5, a2, 31
466 ; RV32M-NEXT: or s1, s1, a4
467 ; RV32M-NEXT: srli a4, a3, 1
468 ; RV32M-NEXT: or s2, a4, a5
469 ; RV32M-NEXT: srli a1, a1, 2
470 ; RV32M-NEXT: srli a2, a2, 1
471 ; RV32M-NEXT: slli a3, a3, 31
472 ; RV32M-NEXT: slli a1, a1, 31
473 ; RV32M-NEXT: slli a2, a2, 31
474 ; RV32M-NEXT: srai s3, a1, 31
475 ; RV32M-NEXT: srai s4, a2, 31
476 ; RV32M-NEXT: srai a1, a3, 31
477 ; RV32M-NEXT: li a2, 6
478 ; RV32M-NEXT: li a3, 0
479 ; RV32M-NEXT: call __moddi3
480 ; RV32M-NEXT: mv s5, a0
481 ; RV32M-NEXT: mv s6, a1
482 ; RV32M-NEXT: li a2, 7
483 ; RV32M-NEXT: mv a0, s2
484 ; RV32M-NEXT: mv a1, s4
485 ; RV32M-NEXT: li a3, 0
486 ; RV32M-NEXT: call __moddi3
487 ; RV32M-NEXT: mv s2, a0
488 ; RV32M-NEXT: mv s4, a1
489 ; RV32M-NEXT: li a2, -5
490 ; RV32M-NEXT: li a3, -1
491 ; RV32M-NEXT: mv a0, s1
492 ; RV32M-NEXT: mv a1, s3
493 ; RV32M-NEXT: call __moddi3
494 ; RV32M-NEXT: or a2, s5, s6
495 ; RV32M-NEXT: xori a0, a0, 2
496 ; RV32M-NEXT: xori a3, s2, 1
497 ; RV32M-NEXT: snez a2, a2
498 ; RV32M-NEXT: or a0, a0, a1
499 ; RV32M-NEXT: or a1, a3, s4
500 ; RV32M-NEXT: seqz a0, a0
501 ; RV32M-NEXT: seqz a1, a1
502 ; RV32M-NEXT: neg a3, a2
503 ; RV32M-NEXT: addi a1, a1, -1
504 ; RV32M-NEXT: addi a0, a0, -1
505 ; RV32M-NEXT: andi a4, a0, 7
506 ; RV32M-NEXT: slli a5, a1, 1
507 ; RV32M-NEXT: or a2, a5, a2
508 ; RV32M-NEXT: srli a5, a1, 31
509 ; RV32M-NEXT: andi a1, a1, 1
510 ; RV32M-NEXT: slli a0, a0, 2
511 ; RV32M-NEXT: slli a1, a1, 1
512 ; RV32M-NEXT: or a0, a5, a0
513 ; RV32M-NEXT: or a0, a0, a1
514 ; RV32M-NEXT: sw a3, 0(s0)
515 ; RV32M-NEXT: sw a2, 4(s0)
516 ; RV32M-NEXT: sw a0, 8(s0)
517 ; RV32M-NEXT: sb a4, 12(s0)
518 ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
519 ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
520 ; RV32M-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
521 ; RV32M-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
522 ; RV32M-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
523 ; RV32M-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
524 ; RV32M-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
525 ; RV32M-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
526 ; RV32M-NEXT: addi sp, sp, 32
529 ; RV64M-LABEL: test_srem_vec:
531 ; RV64M-NEXT: ld a1, 0(a0)
532 ; RV64M-NEXT: lwu a2, 8(a0)
533 ; RV64M-NEXT: lbu a3, 12(a0)
534 ; RV64M-NEXT: lui a4, %hi(.LCPI3_0)
535 ; RV64M-NEXT: lui a5, 699051
536 ; RV64M-NEXT: addiw a5, a5, -1365
537 ; RV64M-NEXT: slli a6, a5, 32
538 ; RV64M-NEXT: add a5, a5, a6
539 ; RV64M-NEXT: srli a6, a1, 2
540 ; RV64M-NEXT: slli a7, a2, 62
541 ; RV64M-NEXT: or a6, a7, a6
542 ; RV64M-NEXT: lui a7, %hi(.LCPI3_1)
543 ; RV64M-NEXT: slli a3, a3, 32
544 ; RV64M-NEXT: or a2, a2, a3
545 ; RV64M-NEXT: lui a3, %hi(.LCPI3_2)
546 ; RV64M-NEXT: ld a4, %lo(.LCPI3_0)(a4)
547 ; RV64M-NEXT: ld a7, %lo(.LCPI3_1)(a7)
548 ; RV64M-NEXT: ld a3, %lo(.LCPI3_2)(a3)
549 ; RV64M-NEXT: slli a1, a1, 31
550 ; RV64M-NEXT: srai a1, a1, 31
551 ; RV64M-NEXT: srai a6, a6, 31
552 ; RV64M-NEXT: slli a2, a2, 29
553 ; RV64M-NEXT: mul a1, a1, a5
554 ; RV64M-NEXT: srai a2, a2, 31
555 ; RV64M-NEXT: mulh a5, a6, a7
556 ; RV64M-NEXT: add a1, a1, a3
557 ; RV64M-NEXT: mulh a4, a2, a4
558 ; RV64M-NEXT: srli a7, a5, 63
559 ; RV64M-NEXT: srai a5, a5, 1
560 ; RV64M-NEXT: add a5, a5, a7
561 ; RV64M-NEXT: slli a7, a1, 63
562 ; RV64M-NEXT: srli a1, a1, 1
563 ; RV64M-NEXT: or a1, a1, a7
564 ; RV64M-NEXT: srli a7, a4, 63
565 ; RV64M-NEXT: srai a4, a4, 1
566 ; RV64M-NEXT: add a4, a4, a7
567 ; RV64M-NEXT: sltu a1, a3, a1
568 ; RV64M-NEXT: add a6, a6, a5
569 ; RV64M-NEXT: slli a5, a5, 3
570 ; RV64M-NEXT: add a2, a2, a4
571 ; RV64M-NEXT: slli a4, a4, 2
572 ; RV64M-NEXT: sub a3, a6, a5
573 ; RV64M-NEXT: neg a1, a1
574 ; RV64M-NEXT: add a2, a2, a4
575 ; RV64M-NEXT: addi a3, a3, -1
576 ; RV64M-NEXT: slli a1, a1, 31
577 ; RV64M-NEXT: seqz a3, a3
578 ; RV64M-NEXT: addi a2, a2, -2
579 ; RV64M-NEXT: srli a1, a1, 31
580 ; RV64M-NEXT: seqz a2, a2
581 ; RV64M-NEXT: addi a3, a3, -1
582 ; RV64M-NEXT: addi a2, a2, -1
583 ; RV64M-NEXT: slli a4, a3, 33
584 ; RV64M-NEXT: slli a3, a3, 31
585 ; RV64M-NEXT: or a1, a1, a4
586 ; RV64M-NEXT: slli a4, a2, 2
587 ; RV64M-NEXT: srli a3, a3, 62
588 ; RV64M-NEXT: slli a2, a2, 29
589 ; RV64M-NEXT: or a3, a3, a4
590 ; RV64M-NEXT: srli a2, a2, 61
591 ; RV64M-NEXT: sd a1, 0(a0)
592 ; RV64M-NEXT: sw a3, 8(a0)
593 ; RV64M-NEXT: sb a2, 12(a0)
596 ; RV32MV-LABEL: test_srem_vec:
598 ; RV32MV-NEXT: addi sp, sp, -64
599 ; RV32MV-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
600 ; RV32MV-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
601 ; RV32MV-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
602 ; RV32MV-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
603 ; RV32MV-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
604 ; RV32MV-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
605 ; RV32MV-NEXT: csrr a1, vlenb
606 ; RV32MV-NEXT: slli a1, a1, 1
607 ; RV32MV-NEXT: sub sp, sp, a1
608 ; RV32MV-NEXT: mv s0, a0
609 ; RV32MV-NEXT: lw a1, 8(a0)
610 ; RV32MV-NEXT: lbu a2, 12(a0)
611 ; RV32MV-NEXT: lw a3, 4(a0)
612 ; RV32MV-NEXT: lw a0, 0(a0)
613 ; RV32MV-NEXT: li a4, 1
614 ; RV32MV-NEXT: slli a5, a2, 30
615 ; RV32MV-NEXT: srli s1, a1, 2
616 ; RV32MV-NEXT: slli a6, a1, 31
617 ; RV32MV-NEXT: or s1, s1, a5
618 ; RV32MV-NEXT: srli a5, a3, 1
619 ; RV32MV-NEXT: or s2, a5, a6
620 ; RV32MV-NEXT: li a5, -1
621 ; RV32MV-NEXT: srli a2, a2, 2
622 ; RV32MV-NEXT: srli a1, a1, 1
623 ; RV32MV-NEXT: slli a3, a3, 31
624 ; RV32MV-NEXT: slli a2, a2, 31
625 ; RV32MV-NEXT: slli a6, a1, 31
626 ; RV32MV-NEXT: srai a1, a3, 31
627 ; RV32MV-NEXT: srai s3, a2, 31
628 ; RV32MV-NEXT: srai s4, a6, 31
629 ; RV32MV-NEXT: sw a5, 16(sp)
630 ; RV32MV-NEXT: sw a4, 20(sp)
631 ; RV32MV-NEXT: li a2, 6
632 ; RV32MV-NEXT: li a3, 0
633 ; RV32MV-NEXT: call __moddi3
634 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
635 ; RV32MV-NEXT: vmv.v.x v8, a0
636 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1
637 ; RV32MV-NEXT: addi a0, sp, 32
638 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
639 ; RV32MV-NEXT: li a2, 7
640 ; RV32MV-NEXT: mv a0, s2
641 ; RV32MV-NEXT: mv a1, s4
642 ; RV32MV-NEXT: li a3, 0
643 ; RV32MV-NEXT: call __moddi3
644 ; RV32MV-NEXT: addi a2, sp, 32
645 ; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
646 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
647 ; RV32MV-NEXT: vslide1down.vx v8, v8, a0
648 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1
649 ; RV32MV-NEXT: addi a0, sp, 32
650 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
651 ; RV32MV-NEXT: li a2, -5
652 ; RV32MV-NEXT: li a3, -1
653 ; RV32MV-NEXT: mv a0, s1
654 ; RV32MV-NEXT: mv a1, s3
655 ; RV32MV-NEXT: call __moddi3
656 ; RV32MV-NEXT: addi a2, sp, 16
657 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
658 ; RV32MV-NEXT: vlse64.v v8, (a2), zero
659 ; RV32MV-NEXT: addi a2, sp, 32
660 ; RV32MV-NEXT: vl2r.v v10, (a2) # Unknown-size Folded Reload
661 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
662 ; RV32MV-NEXT: vslide1down.vx v10, v10, a0
663 ; RV32MV-NEXT: vslide1down.vx v10, v10, a1
664 ; RV32MV-NEXT: vslidedown.vi v10, v10, 2
665 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
666 ; RV32MV-NEXT: vand.vv v8, v10, v8
667 ; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
668 ; RV32MV-NEXT: vmv.v.i v10, 1
669 ; RV32MV-NEXT: vmv.v.i v11, 0
670 ; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
671 ; RV32MV-NEXT: vslideup.vi v11, v10, 2
672 ; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
673 ; RV32MV-NEXT: vmv.v.i v10, 2
674 ; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
675 ; RV32MV-NEXT: vslideup.vi v11, v10, 4
676 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
677 ; RV32MV-NEXT: vsext.vf4 v12, v11
678 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
679 ; RV32MV-NEXT: vmsne.vv v0, v8, v12
680 ; RV32MV-NEXT: vmv.v.i v8, 0
681 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
682 ; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
683 ; RV32MV-NEXT: vslidedown.vi v10, v8, 1
684 ; RV32MV-NEXT: vslidedown.vi v11, v8, 2
685 ; RV32MV-NEXT: vmv.x.s a0, v10
686 ; RV32MV-NEXT: vmv.x.s a1, v11
687 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
688 ; RV32MV-NEXT: vslidedown.vi v10, v8, 4
689 ; RV32MV-NEXT: vmv.x.s a2, v10
690 ; RV32MV-NEXT: vslidedown.vi v10, v8, 5
691 ; RV32MV-NEXT: vmv.x.s a3, v10
692 ; RV32MV-NEXT: slli a4, a1, 1
693 ; RV32MV-NEXT: sub a4, a4, a0
694 ; RV32MV-NEXT: srli a0, a2, 30
695 ; RV32MV-NEXT: slli a3, a3, 2
696 ; RV32MV-NEXT: or a0, a3, a0
697 ; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma
698 ; RV32MV-NEXT: vse32.v v8, (s0)
699 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3
700 ; RV32MV-NEXT: srli a1, a1, 31
701 ; RV32MV-NEXT: slli a2, a2, 2
702 ; RV32MV-NEXT: or a1, a1, a2
703 ; RV32MV-NEXT: vmv.x.s a2, v8
704 ; RV32MV-NEXT: andi a2, a2, 1
705 ; RV32MV-NEXT: slli a2, a2, 1
706 ; RV32MV-NEXT: andi a0, a0, 7
707 ; RV32MV-NEXT: or a1, a1, a2
708 ; RV32MV-NEXT: sw a4, 4(s0)
709 ; RV32MV-NEXT: sw a1, 8(s0)
710 ; RV32MV-NEXT: sb a0, 12(s0)
711 ; RV32MV-NEXT: csrr a0, vlenb
712 ; RV32MV-NEXT: slli a0, a0, 1
713 ; RV32MV-NEXT: add sp, sp, a0
714 ; RV32MV-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
715 ; RV32MV-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
716 ; RV32MV-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
717 ; RV32MV-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
718 ; RV32MV-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
719 ; RV32MV-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
720 ; RV32MV-NEXT: addi sp, sp, 64
723 ; RV64MV-LABEL: test_srem_vec:
725 ; RV64MV-NEXT: ld a1, 0(a0)
726 ; RV64MV-NEXT: lwu a2, 8(a0)
727 ; RV64MV-NEXT: lbu a3, 12(a0)
728 ; RV64MV-NEXT: lui a4, %hi(.LCPI3_0)
729 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1)
730 ; RV64MV-NEXT: lui a6, %hi(.LCPI3_2)
731 ; RV64MV-NEXT: lui a7, 32
732 ; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4)
733 ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5)
734 ; RV64MV-NEXT: ld a6, %lo(.LCPI3_2)(a6)
735 ; RV64MV-NEXT: addi a7, a7, 256
736 ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
737 ; RV64MV-NEXT: vmv.s.x v8, a7
738 ; RV64MV-NEXT: slli a3, a3, 32
739 ; RV64MV-NEXT: srli a7, a1, 2
740 ; RV64MV-NEXT: or a3, a2, a3
741 ; RV64MV-NEXT: slli a2, a2, 62
742 ; RV64MV-NEXT: slli a1, a1, 31
743 ; RV64MV-NEXT: or a2, a2, a7
744 ; RV64MV-NEXT: srai a1, a1, 31
745 ; RV64MV-NEXT: slli a3, a3, 29
746 ; RV64MV-NEXT: srai a2, a2, 31
747 ; RV64MV-NEXT: mulh a5, a1, a5
748 ; RV64MV-NEXT: srai a3, a3, 31
749 ; RV64MV-NEXT: mulh a4, a2, a4
750 ; RV64MV-NEXT: srli a7, a5, 63
751 ; RV64MV-NEXT: add a5, a5, a7
752 ; RV64MV-NEXT: srli a7, a4, 63
753 ; RV64MV-NEXT: srai a4, a4, 1
754 ; RV64MV-NEXT: mulh a6, a3, a6
755 ; RV64MV-NEXT: add a4, a4, a7
756 ; RV64MV-NEXT: slli a7, a5, 3
757 ; RV64MV-NEXT: slli a5, a5, 1
758 ; RV64MV-NEXT: sub a5, a5, a7
759 ; RV64MV-NEXT: srli a7, a6, 63
760 ; RV64MV-NEXT: srai a6, a6, 1
761 ; RV64MV-NEXT: add a6, a6, a7
762 ; RV64MV-NEXT: add a2, a2, a4
763 ; RV64MV-NEXT: slli a4, a4, 3
764 ; RV64MV-NEXT: sub a2, a2, a4
765 ; RV64MV-NEXT: add a1, a1, a5
766 ; RV64MV-NEXT: li a4, -1
767 ; RV64MV-NEXT: srli a4, a4, 31
768 ; RV64MV-NEXT: vsext.vf8 v10, v8
769 ; RV64MV-NEXT: add a3, a3, a6
770 ; RV64MV-NEXT: slli a6, a6, 2
771 ; RV64MV-NEXT: vmv.v.x v8, a1
772 ; RV64MV-NEXT: add a3, a3, a6
773 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2
774 ; RV64MV-NEXT: vslide1down.vx v8, v8, a3
775 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1
776 ; RV64MV-NEXT: vand.vx v8, v8, a4
777 ; RV64MV-NEXT: vmsne.vv v0, v8, v10
778 ; RV64MV-NEXT: vmv.v.i v8, 0
779 ; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0
780 ; RV64MV-NEXT: vslidedown.vi v10, v8, 2
781 ; RV64MV-NEXT: vmv.x.s a1, v8
782 ; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma
783 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1
784 ; RV64MV-NEXT: vmv.x.s a2, v10
785 ; RV64MV-NEXT: and a1, a1, a4
786 ; RV64MV-NEXT: vmv.x.s a3, v8
787 ; RV64MV-NEXT: slli a4, a2, 31
788 ; RV64MV-NEXT: slli a5, a3, 33
789 ; RV64MV-NEXT: slli a2, a2, 2
790 ; RV64MV-NEXT: slli a3, a3, 31
791 ; RV64MV-NEXT: srli a4, a4, 61
792 ; RV64MV-NEXT: or a1, a1, a5
793 ; RV64MV-NEXT: srli a3, a3, 62
794 ; RV64MV-NEXT: or a2, a3, a2
795 ; RV64MV-NEXT: sd a1, 0(a0)
796 ; RV64MV-NEXT: sw a2, 8(a0)
797 ; RV64MV-NEXT: sb a4, 12(a0)
799 %ld = load <3 x i33>, ptr %X
800 %srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
801 %cmp = icmp ne <3 x i33> %srem, <i33 0, i33 1, i33 2>
802 %ext = sext <3 x i1> %cmp to <3 x i33>
803 store <3 x i33> %ext, ptr %X