1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
11 define i32 @fold_srem_positive_odd(i32 %x) nounwind {
12 ; RV32I-LABEL: fold_srem_positive_odd:
14 ; RV32I-NEXT: addi sp, sp, -16
15 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
16 ; RV32I-NEXT: addi a1, zero, 95
17 ; RV32I-NEXT: call __modsi3@plt
18 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
19 ; RV32I-NEXT: addi sp, sp, 16
22 ; RV32IM-LABEL: fold_srem_positive_odd:
24 ; RV32IM-NEXT: lui a1, 706409
25 ; RV32IM-NEXT: addi a1, a1, 389
26 ; RV32IM-NEXT: mulh a1, a0, a1
27 ; RV32IM-NEXT: add a1, a1, a0
28 ; RV32IM-NEXT: srli a2, a1, 31
29 ; RV32IM-NEXT: srai a1, a1, 6
30 ; RV32IM-NEXT: add a1, a1, a2
31 ; RV32IM-NEXT: addi a2, zero, 95
32 ; RV32IM-NEXT: mul a1, a1, a2
33 ; RV32IM-NEXT: sub a0, a0, a1
36 ; RV64I-LABEL: fold_srem_positive_odd:
38 ; RV64I-NEXT: addi sp, sp, -16
39 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
40 ; RV64I-NEXT: sext.w a0, a0
41 ; RV64I-NEXT: addi a1, zero, 95
42 ; RV64I-NEXT: call __moddi3@plt
43 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
44 ; RV64I-NEXT: addi sp, sp, 16
47 ; RV64IM-LABEL: fold_srem_positive_odd:
49 ; RV64IM-NEXT: sext.w a1, a0
50 ; RV64IM-NEXT: lui a2, 706409
51 ; RV64IM-NEXT: addiw a2, a2, 389
52 ; RV64IM-NEXT: mul a1, a1, a2
53 ; RV64IM-NEXT: srli a1, a1, 32
54 ; RV64IM-NEXT: addw a1, a1, a0
55 ; RV64IM-NEXT: srliw a2, a1, 31
56 ; RV64IM-NEXT: srli a1, a1, 6
57 ; RV64IM-NEXT: addw a1, a1, a2
58 ; RV64IM-NEXT: addi a2, zero, 95
59 ; RV64IM-NEXT: mulw a1, a1, a2
60 ; RV64IM-NEXT: subw a0, a0, a1
67 define i32 @fold_srem_positive_even(i32 %x) nounwind {
68 ; RV32I-LABEL: fold_srem_positive_even:
70 ; RV32I-NEXT: addi sp, sp, -16
71 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
72 ; RV32I-NEXT: addi a1, zero, 1060
73 ; RV32I-NEXT: call __modsi3@plt
74 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
75 ; RV32I-NEXT: addi sp, sp, 16
78 ; RV32IM-LABEL: fold_srem_positive_even:
80 ; RV32IM-NEXT: lui a1, 253241
81 ; RV32IM-NEXT: addi a1, a1, -15
82 ; RV32IM-NEXT: mulh a1, a0, a1
83 ; RV32IM-NEXT: srli a2, a1, 31
84 ; RV32IM-NEXT: srai a1, a1, 8
85 ; RV32IM-NEXT: add a1, a1, a2
86 ; RV32IM-NEXT: addi a2, zero, 1060
87 ; RV32IM-NEXT: mul a1, a1, a2
88 ; RV32IM-NEXT: sub a0, a0, a1
91 ; RV64I-LABEL: fold_srem_positive_even:
93 ; RV64I-NEXT: addi sp, sp, -16
94 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
95 ; RV64I-NEXT: sext.w a0, a0
96 ; RV64I-NEXT: addi a1, zero, 1060
97 ; RV64I-NEXT: call __moddi3@plt
98 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
99 ; RV64I-NEXT: addi sp, sp, 16
102 ; RV64IM-LABEL: fold_srem_positive_even:
104 ; RV64IM-NEXT: sext.w a1, a0
105 ; RV64IM-NEXT: lui a2, 253241
106 ; RV64IM-NEXT: addiw a2, a2, -15
107 ; RV64IM-NEXT: mul a1, a1, a2
108 ; RV64IM-NEXT: srli a2, a1, 63
109 ; RV64IM-NEXT: srai a1, a1, 40
110 ; RV64IM-NEXT: addw a1, a1, a2
111 ; RV64IM-NEXT: addi a2, zero, 1060
112 ; RV64IM-NEXT: mulw a1, a1, a2
113 ; RV64IM-NEXT: subw a0, a0, a1
115 %1 = srem i32 %x, 1060
120 define i32 @fold_srem_negative_odd(i32 %x) nounwind {
121 ; RV32I-LABEL: fold_srem_negative_odd:
123 ; RV32I-NEXT: addi sp, sp, -16
124 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
125 ; RV32I-NEXT: addi a1, zero, -723
126 ; RV32I-NEXT: call __modsi3@plt
127 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
128 ; RV32I-NEXT: addi sp, sp, 16
131 ; RV32IM-LABEL: fold_srem_negative_odd:
133 ; RV32IM-NEXT: lui a1, 677296
134 ; RV32IM-NEXT: addi a1, a1, -91
135 ; RV32IM-NEXT: mulh a1, a0, a1
136 ; RV32IM-NEXT: srli a2, a1, 31
137 ; RV32IM-NEXT: srai a1, a1, 8
138 ; RV32IM-NEXT: add a1, a1, a2
139 ; RV32IM-NEXT: addi a2, zero, -723
140 ; RV32IM-NEXT: mul a1, a1, a2
141 ; RV32IM-NEXT: sub a0, a0, a1
144 ; RV64I-LABEL: fold_srem_negative_odd:
146 ; RV64I-NEXT: addi sp, sp, -16
147 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
148 ; RV64I-NEXT: sext.w a0, a0
149 ; RV64I-NEXT: addi a1, zero, -723
150 ; RV64I-NEXT: call __moddi3@plt
151 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
152 ; RV64I-NEXT: addi sp, sp, 16
155 ; RV64IM-LABEL: fold_srem_negative_odd:
157 ; RV64IM-NEXT: sext.w a1, a0
158 ; RV64IM-NEXT: lui a2, 677296
159 ; RV64IM-NEXT: addiw a2, a2, -91
160 ; RV64IM-NEXT: mul a1, a1, a2
161 ; RV64IM-NEXT: srli a2, a1, 63
162 ; RV64IM-NEXT: srai a1, a1, 40
163 ; RV64IM-NEXT: addw a1, a1, a2
164 ; RV64IM-NEXT: addi a2, zero, -723
165 ; RV64IM-NEXT: mulw a1, a1, a2
166 ; RV64IM-NEXT: subw a0, a0, a1
168 %1 = srem i32 %x, -723
173 define i32 @fold_srem_negative_even(i32 %x) nounwind {
174 ; RV32I-LABEL: fold_srem_negative_even:
176 ; RV32I-NEXT: addi sp, sp, -16
177 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
178 ; RV32I-NEXT: lui a1, 1048570
179 ; RV32I-NEXT: addi a1, a1, 1595
180 ; RV32I-NEXT: call __modsi3@plt
181 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
182 ; RV32I-NEXT: addi sp, sp, 16
185 ; RV32IM-LABEL: fold_srem_negative_even:
187 ; RV32IM-NEXT: lui a1, 1036895
188 ; RV32IM-NEXT: addi a1, a1, 999
189 ; RV32IM-NEXT: mulh a1, a0, a1
190 ; RV32IM-NEXT: srli a2, a1, 31
191 ; RV32IM-NEXT: srai a1, a1, 8
192 ; RV32IM-NEXT: add a1, a1, a2
193 ; RV32IM-NEXT: lui a2, 1048570
194 ; RV32IM-NEXT: addi a2, a2, 1595
195 ; RV32IM-NEXT: mul a1, a1, a2
196 ; RV32IM-NEXT: sub a0, a0, a1
199 ; RV64I-LABEL: fold_srem_negative_even:
201 ; RV64I-NEXT: addi sp, sp, -16
202 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
203 ; RV64I-NEXT: sext.w a0, a0
204 ; RV64I-NEXT: lui a1, 1048570
205 ; RV64I-NEXT: addiw a1, a1, 1595
206 ; RV64I-NEXT: call __moddi3@plt
207 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
208 ; RV64I-NEXT: addi sp, sp, 16
211 ; RV64IM-LABEL: fold_srem_negative_even:
213 ; RV64IM-NEXT: sext.w a1, a0
214 ; RV64IM-NEXT: lui a2, 1036895
215 ; RV64IM-NEXT: addiw a2, a2, 999
216 ; RV64IM-NEXT: mul a1, a1, a2
217 ; RV64IM-NEXT: srli a2, a1, 63
218 ; RV64IM-NEXT: srai a1, a1, 40
219 ; RV64IM-NEXT: addw a1, a1, a2
220 ; RV64IM-NEXT: lui a2, 1048570
221 ; RV64IM-NEXT: addiw a2, a2, 1595
222 ; RV64IM-NEXT: mulw a1, a1, a2
223 ; RV64IM-NEXT: subw a0, a0, a1
225 %1 = srem i32 %x, -22981
230 ; Don't fold if we can combine srem with sdiv.
231 define i32 @combine_srem_sdiv(i32 %x) nounwind {
232 ; RV32I-LABEL: combine_srem_sdiv:
234 ; RV32I-NEXT: addi sp, sp, -16
235 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
236 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
237 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
238 ; RV32I-NEXT: mv s0, a0
239 ; RV32I-NEXT: addi a1, zero, 95
240 ; RV32I-NEXT: call __modsi3@plt
241 ; RV32I-NEXT: mv s1, a0
242 ; RV32I-NEXT: addi a1, zero, 95
243 ; RV32I-NEXT: mv a0, s0
244 ; RV32I-NEXT: call __divsi3@plt
245 ; RV32I-NEXT: add a0, s1, a0
246 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
247 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
248 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
249 ; RV32I-NEXT: addi sp, sp, 16
252 ; RV32IM-LABEL: combine_srem_sdiv:
254 ; RV32IM-NEXT: lui a1, 706409
255 ; RV32IM-NEXT: addi a1, a1, 389
256 ; RV32IM-NEXT: mulh a1, a0, a1
257 ; RV32IM-NEXT: add a1, a1, a0
258 ; RV32IM-NEXT: srli a2, a1, 31
259 ; RV32IM-NEXT: srai a1, a1, 6
260 ; RV32IM-NEXT: add a1, a1, a2
261 ; RV32IM-NEXT: addi a2, zero, 95
262 ; RV32IM-NEXT: mul a2, a1, a2
263 ; RV32IM-NEXT: sub a0, a0, a2
264 ; RV32IM-NEXT: add a0, a0, a1
267 ; RV64I-LABEL: combine_srem_sdiv:
269 ; RV64I-NEXT: addi sp, sp, -32
270 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
271 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
272 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
273 ; RV64I-NEXT: sext.w s0, a0
274 ; RV64I-NEXT: addi a1, zero, 95
275 ; RV64I-NEXT: mv a0, s0
276 ; RV64I-NEXT: call __moddi3@plt
277 ; RV64I-NEXT: mv s1, a0
278 ; RV64I-NEXT: addi a1, zero, 95
279 ; RV64I-NEXT: mv a0, s0
280 ; RV64I-NEXT: call __divdi3@plt
281 ; RV64I-NEXT: addw a0, s1, a0
282 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
283 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
284 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
285 ; RV64I-NEXT: addi sp, sp, 32
288 ; RV64IM-LABEL: combine_srem_sdiv:
290 ; RV64IM-NEXT: sext.w a1, a0
291 ; RV64IM-NEXT: lui a2, 706409
292 ; RV64IM-NEXT: addiw a2, a2, 389
293 ; RV64IM-NEXT: mul a1, a1, a2
294 ; RV64IM-NEXT: srli a1, a1, 32
295 ; RV64IM-NEXT: addw a1, a1, a0
296 ; RV64IM-NEXT: srliw a2, a1, 31
297 ; RV64IM-NEXT: sraiw a1, a1, 6
298 ; RV64IM-NEXT: addw a1, a1, a2
299 ; RV64IM-NEXT: addi a2, zero, 95
300 ; RV64IM-NEXT: mulw a2, a1, a2
301 ; RV64IM-NEXT: subw a0, a0, a2
302 ; RV64IM-NEXT: addw a0, a0, a1
310 ; Don't fold for divisors that are a power of two.
311 define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
312 ; RV32I-LABEL: dont_fold_srem_power_of_two:
314 ; RV32I-NEXT: srai a1, a0, 31
315 ; RV32I-NEXT: srli a1, a1, 26
316 ; RV32I-NEXT: add a1, a0, a1
317 ; RV32I-NEXT: andi a1, a1, -64
318 ; RV32I-NEXT: sub a0, a0, a1
321 ; RV32IM-LABEL: dont_fold_srem_power_of_two:
323 ; RV32IM-NEXT: srai a1, a0, 31
324 ; RV32IM-NEXT: srli a1, a1, 26
325 ; RV32IM-NEXT: add a1, a0, a1
326 ; RV32IM-NEXT: andi a1, a1, -64
327 ; RV32IM-NEXT: sub a0, a0, a1
330 ; RV64I-LABEL: dont_fold_srem_power_of_two:
332 ; RV64I-NEXT: sraiw a1, a0, 31
333 ; RV64I-NEXT: srliw a1, a1, 26
334 ; RV64I-NEXT: add a1, a0, a1
335 ; RV64I-NEXT: andi a1, a1, -64
336 ; RV64I-NEXT: subw a0, a0, a1
339 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
341 ; RV64IM-NEXT: sraiw a1, a0, 31
342 ; RV64IM-NEXT: srliw a1, a1, 26
343 ; RV64IM-NEXT: add a1, a0, a1
344 ; RV64IM-NEXT: andi a1, a1, -64
345 ; RV64IM-NEXT: subw a0, a0, a1
351 ; Don't fold if the divisor is one.
352 define i32 @dont_fold_srem_one(i32 %x) nounwind {
353 ; CHECK-LABEL: dont_fold_srem_one:
355 ; CHECK-NEXT: mv a0, zero
361 ; Don't fold if the divisor is 2^31.
362 define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
363 ; RV32I-LABEL: dont_fold_srem_i32_smax:
365 ; RV32I-NEXT: srai a1, a0, 31
366 ; RV32I-NEXT: srli a1, a1, 1
367 ; RV32I-NEXT: add a1, a0, a1
368 ; RV32I-NEXT: lui a2, 524288
369 ; RV32I-NEXT: and a1, a1, a2
370 ; RV32I-NEXT: add a0, a0, a1
373 ; RV32IM-LABEL: dont_fold_srem_i32_smax:
375 ; RV32IM-NEXT: srai a1, a0, 31
376 ; RV32IM-NEXT: srli a1, a1, 1
377 ; RV32IM-NEXT: add a1, a0, a1
378 ; RV32IM-NEXT: lui a2, 524288
379 ; RV32IM-NEXT: and a1, a1, a2
380 ; RV32IM-NEXT: add a0, a0, a1
383 ; RV64I-LABEL: dont_fold_srem_i32_smax:
385 ; RV64I-NEXT: sraiw a1, a0, 31
386 ; RV64I-NEXT: srliw a1, a1, 1
387 ; RV64I-NEXT: add a1, a0, a1
388 ; RV64I-NEXT: lui a2, 524288
389 ; RV64I-NEXT: and a1, a1, a2
390 ; RV64I-NEXT: addw a0, a0, a1
393 ; RV64IM-LABEL: dont_fold_srem_i32_smax:
395 ; RV64IM-NEXT: sraiw a1, a0, 31
396 ; RV64IM-NEXT: srliw a1, a1, 1
397 ; RV64IM-NEXT: add a1, a0, a1
398 ; RV64IM-NEXT: lui a2, 524288
399 ; RV64IM-NEXT: and a1, a1, a2
400 ; RV64IM-NEXT: addw a0, a0, a1
402 %1 = srem i32 %x, 2147483648
406 ; Don't fold i64 srem
407 define i64 @dont_fold_srem_i64(i64 %x) nounwind {
408 ; RV32I-LABEL: dont_fold_srem_i64:
410 ; RV32I-NEXT: addi sp, sp, -16
411 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
412 ; RV32I-NEXT: addi a2, zero, 98
413 ; RV32I-NEXT: mv a3, zero
414 ; RV32I-NEXT: call __moddi3@plt
415 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
416 ; RV32I-NEXT: addi sp, sp, 16
419 ; RV32IM-LABEL: dont_fold_srem_i64:
421 ; RV32IM-NEXT: addi sp, sp, -16
422 ; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
423 ; RV32IM-NEXT: addi a2, zero, 98
424 ; RV32IM-NEXT: mv a3, zero
425 ; RV32IM-NEXT: call __moddi3@plt
426 ; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
427 ; RV32IM-NEXT: addi sp, sp, 16
430 ; RV64I-LABEL: dont_fold_srem_i64:
432 ; RV64I-NEXT: addi sp, sp, -16
433 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
434 ; RV64I-NEXT: addi a1, zero, 98
435 ; RV64I-NEXT: call __moddi3@plt
436 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
437 ; RV64I-NEXT: addi sp, sp, 16
440 ; RV64IM-LABEL: dont_fold_srem_i64:
442 ; RV64IM-NEXT: lui a1, 2675
443 ; RV64IM-NEXT: addiw a1, a1, -251
444 ; RV64IM-NEXT: slli a1, a1, 13
445 ; RV64IM-NEXT: addi a1, a1, 1839
446 ; RV64IM-NEXT: slli a1, a1, 13
447 ; RV64IM-NEXT: addi a1, a1, 167
448 ; RV64IM-NEXT: slli a1, a1, 13
449 ; RV64IM-NEXT: addi a1, a1, 1505
450 ; RV64IM-NEXT: mulh a1, a0, a1
451 ; RV64IM-NEXT: srli a2, a1, 63
452 ; RV64IM-NEXT: srai a1, a1, 5
453 ; RV64IM-NEXT: add a1, a1, a2
454 ; RV64IM-NEXT: addi a2, zero, 98
455 ; RV64IM-NEXT: mul a1, a1, a2
456 ; RV64IM-NEXT: sub a0, a0, a1