1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
11 define i32 @fold_srem_positive_odd(i32 %x) nounwind {
12 ; RV32I-LABEL: fold_srem_positive_odd:
14 ; RV32I-NEXT: li a1, 95
15 ; RV32I-NEXT: tail __modsi3
17 ; RV32IM-LABEL: fold_srem_positive_odd:
19 ; RV32IM-NEXT: lui a1, 706409
20 ; RV32IM-NEXT: addi a1, a1, 389
21 ; RV32IM-NEXT: mulh a1, a0, a1
22 ; RV32IM-NEXT: add a1, a1, a0
23 ; RV32IM-NEXT: srli a2, a1, 31
24 ; RV32IM-NEXT: srai a1, a1, 6
25 ; RV32IM-NEXT: add a1, a1, a2
26 ; RV32IM-NEXT: li a2, 95
27 ; RV32IM-NEXT: mul a1, a1, a2
28 ; RV32IM-NEXT: sub a0, a0, a1
31 ; RV64I-LABEL: fold_srem_positive_odd:
33 ; RV64I-NEXT: addi sp, sp, -16
34 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
35 ; RV64I-NEXT: sext.w a0, a0
36 ; RV64I-NEXT: li a1, 95
37 ; RV64I-NEXT: call __moddi3
38 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
39 ; RV64I-NEXT: addi sp, sp, 16
42 ; RV64IM-LABEL: fold_srem_positive_odd:
44 ; RV64IM-NEXT: sext.w a1, a0
45 ; RV64IM-NEXT: lui a2, 706409
46 ; RV64IM-NEXT: addiw a2, a2, 389
47 ; RV64IM-NEXT: mul a1, a1, a2
48 ; RV64IM-NEXT: srli a1, a1, 32
49 ; RV64IM-NEXT: add a1, a1, a0
50 ; RV64IM-NEXT: srliw a2, a1, 31
51 ; RV64IM-NEXT: sraiw a1, a1, 6
52 ; RV64IM-NEXT: add a1, a1, a2
53 ; RV64IM-NEXT: li a2, 95
54 ; RV64IM-NEXT: mul a1, a1, a2
55 ; RV64IM-NEXT: subw a0, a0, a1
62 define i32 @fold_srem_positive_even(i32 %x) nounwind {
63 ; RV32I-LABEL: fold_srem_positive_even:
65 ; RV32I-NEXT: li a1, 1060
66 ; RV32I-NEXT: tail __modsi3
68 ; RV32IM-LABEL: fold_srem_positive_even:
70 ; RV32IM-NEXT: lui a1, 253241
71 ; RV32IM-NEXT: addi a1, a1, -15
72 ; RV32IM-NEXT: mulh a1, a0, a1
73 ; RV32IM-NEXT: srli a2, a1, 31
74 ; RV32IM-NEXT: srai a1, a1, 8
75 ; RV32IM-NEXT: add a1, a1, a2
76 ; RV32IM-NEXT: li a2, 1060
77 ; RV32IM-NEXT: mul a1, a1, a2
78 ; RV32IM-NEXT: sub a0, a0, a1
81 ; RV64I-LABEL: fold_srem_positive_even:
83 ; RV64I-NEXT: addi sp, sp, -16
84 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
85 ; RV64I-NEXT: sext.w a0, a0
86 ; RV64I-NEXT: li a1, 1060
87 ; RV64I-NEXT: call __moddi3
88 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
89 ; RV64I-NEXT: addi sp, sp, 16
92 ; RV64IM-LABEL: fold_srem_positive_even:
94 ; RV64IM-NEXT: sext.w a1, a0
95 ; RV64IM-NEXT: lui a2, 253241
96 ; RV64IM-NEXT: addiw a2, a2, -15
97 ; RV64IM-NEXT: mul a1, a1, a2
98 ; RV64IM-NEXT: srli a2, a1, 63
99 ; RV64IM-NEXT: srai a1, a1, 40
100 ; RV64IM-NEXT: add a1, a1, a2
101 ; RV64IM-NEXT: li a2, 1060
102 ; RV64IM-NEXT: mul a1, a1, a2
103 ; RV64IM-NEXT: subw a0, a0, a1
105 %1 = srem i32 %x, 1060
110 define i32 @fold_srem_negative_odd(i32 %x) nounwind {
111 ; RV32I-LABEL: fold_srem_negative_odd:
113 ; RV32I-NEXT: li a1, -723
114 ; RV32I-NEXT: tail __modsi3
116 ; RV32IM-LABEL: fold_srem_negative_odd:
118 ; RV32IM-NEXT: lui a1, 677296
119 ; RV32IM-NEXT: addi a1, a1, -91
120 ; RV32IM-NEXT: mulh a1, a0, a1
121 ; RV32IM-NEXT: srli a2, a1, 31
122 ; RV32IM-NEXT: srai a1, a1, 8
123 ; RV32IM-NEXT: add a1, a1, a2
124 ; RV32IM-NEXT: li a2, -723
125 ; RV32IM-NEXT: mul a1, a1, a2
126 ; RV32IM-NEXT: sub a0, a0, a1
129 ; RV64I-LABEL: fold_srem_negative_odd:
131 ; RV64I-NEXT: addi sp, sp, -16
132 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
133 ; RV64I-NEXT: sext.w a0, a0
134 ; RV64I-NEXT: li a1, -723
135 ; RV64I-NEXT: call __moddi3
136 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
137 ; RV64I-NEXT: addi sp, sp, 16
140 ; RV64IM-LABEL: fold_srem_negative_odd:
142 ; RV64IM-NEXT: sext.w a1, a0
143 ; RV64IM-NEXT: lui a2, 677296
144 ; RV64IM-NEXT: addiw a2, a2, -91
145 ; RV64IM-NEXT: mul a1, a1, a2
146 ; RV64IM-NEXT: srli a2, a1, 63
147 ; RV64IM-NEXT: srai a1, a1, 40
148 ; RV64IM-NEXT: add a1, a1, a2
149 ; RV64IM-NEXT: li a2, -723
150 ; RV64IM-NEXT: mul a1, a1, a2
151 ; RV64IM-NEXT: subw a0, a0, a1
153 %1 = srem i32 %x, -723
158 define i32 @fold_srem_negative_even(i32 %x) nounwind {
159 ; RV32I-LABEL: fold_srem_negative_even:
161 ; RV32I-NEXT: lui a1, 1048570
162 ; RV32I-NEXT: addi a1, a1, 1595
163 ; RV32I-NEXT: tail __modsi3
165 ; RV32IM-LABEL: fold_srem_negative_even:
167 ; RV32IM-NEXT: lui a1, 1036895
168 ; RV32IM-NEXT: addi a1, a1, 999
169 ; RV32IM-NEXT: mulh a1, a0, a1
170 ; RV32IM-NEXT: srli a2, a1, 31
171 ; RV32IM-NEXT: srai a1, a1, 8
172 ; RV32IM-NEXT: add a1, a1, a2
173 ; RV32IM-NEXT: lui a2, 1048570
174 ; RV32IM-NEXT: addi a2, a2, 1595
175 ; RV32IM-NEXT: mul a1, a1, a2
176 ; RV32IM-NEXT: sub a0, a0, a1
179 ; RV64I-LABEL: fold_srem_negative_even:
181 ; RV64I-NEXT: addi sp, sp, -16
182 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
183 ; RV64I-NEXT: sext.w a0, a0
184 ; RV64I-NEXT: lui a1, 1048570
185 ; RV64I-NEXT: addiw a1, a1, 1595
186 ; RV64I-NEXT: call __moddi3
187 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
188 ; RV64I-NEXT: addi sp, sp, 16
191 ; RV64IM-LABEL: fold_srem_negative_even:
193 ; RV64IM-NEXT: sext.w a1, a0
194 ; RV64IM-NEXT: lui a2, 1036895
195 ; RV64IM-NEXT: addiw a2, a2, 999
196 ; RV64IM-NEXT: mul a1, a1, a2
197 ; RV64IM-NEXT: srli a2, a1, 63
198 ; RV64IM-NEXT: srai a1, a1, 40
199 ; RV64IM-NEXT: add a1, a1, a2
200 ; RV64IM-NEXT: lui a2, 1048570
201 ; RV64IM-NEXT: addi a2, a2, 1595
202 ; RV64IM-NEXT: mul a1, a1, a2
203 ; RV64IM-NEXT: subw a0, a0, a1
205 %1 = srem i32 %x, -22981
210 ; Don't fold if we can combine srem with sdiv.
211 define i32 @combine_srem_sdiv(i32 %x) nounwind {
212 ; RV32I-LABEL: combine_srem_sdiv:
214 ; RV32I-NEXT: addi sp, sp, -16
215 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
216 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
217 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
218 ; RV32I-NEXT: mv s0, a0
219 ; RV32I-NEXT: li a1, 95
220 ; RV32I-NEXT: call __modsi3
221 ; RV32I-NEXT: mv s1, a0
222 ; RV32I-NEXT: li a1, 95
223 ; RV32I-NEXT: mv a0, s0
224 ; RV32I-NEXT: call __divsi3
225 ; RV32I-NEXT: add a0, s1, a0
226 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
227 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
228 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
229 ; RV32I-NEXT: addi sp, sp, 16
232 ; RV32IM-LABEL: combine_srem_sdiv:
234 ; RV32IM-NEXT: lui a1, 706409
235 ; RV32IM-NEXT: addi a1, a1, 389
236 ; RV32IM-NEXT: mulh a1, a0, a1
237 ; RV32IM-NEXT: add a1, a1, a0
238 ; RV32IM-NEXT: srli a2, a1, 31
239 ; RV32IM-NEXT: srai a1, a1, 6
240 ; RV32IM-NEXT: add a1, a1, a2
241 ; RV32IM-NEXT: li a2, 95
242 ; RV32IM-NEXT: mul a2, a1, a2
243 ; RV32IM-NEXT: add a0, a0, a1
244 ; RV32IM-NEXT: sub a0, a0, a2
247 ; RV64I-LABEL: combine_srem_sdiv:
249 ; RV64I-NEXT: addi sp, sp, -32
250 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
251 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
252 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
253 ; RV64I-NEXT: sext.w s0, a0
254 ; RV64I-NEXT: li a1, 95
255 ; RV64I-NEXT: mv a0, s0
256 ; RV64I-NEXT: call __moddi3
257 ; RV64I-NEXT: mv s1, a0
258 ; RV64I-NEXT: li a1, 95
259 ; RV64I-NEXT: mv a0, s0
260 ; RV64I-NEXT: call __divdi3
261 ; RV64I-NEXT: addw a0, s1, a0
262 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
263 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
264 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
265 ; RV64I-NEXT: addi sp, sp, 32
268 ; RV64IM-LABEL: combine_srem_sdiv:
270 ; RV64IM-NEXT: sext.w a1, a0
271 ; RV64IM-NEXT: lui a2, 706409
272 ; RV64IM-NEXT: addiw a2, a2, 389
273 ; RV64IM-NEXT: mul a1, a1, a2
274 ; RV64IM-NEXT: srli a1, a1, 32
275 ; RV64IM-NEXT: add a1, a1, a0
276 ; RV64IM-NEXT: srliw a2, a1, 31
277 ; RV64IM-NEXT: sraiw a1, a1, 6
278 ; RV64IM-NEXT: add a1, a1, a2
279 ; RV64IM-NEXT: li a2, 95
280 ; RV64IM-NEXT: mul a2, a1, a2
281 ; RV64IM-NEXT: add a0, a0, a1
282 ; RV64IM-NEXT: subw a0, a0, a2
290 ; Don't fold for divisors that are a power of two.
291 define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
292 ; RV32I-LABEL: dont_fold_srem_power_of_two:
294 ; RV32I-NEXT: srai a1, a0, 31
295 ; RV32I-NEXT: srli a1, a1, 26
296 ; RV32I-NEXT: add a1, a0, a1
297 ; RV32I-NEXT: andi a1, a1, -64
298 ; RV32I-NEXT: sub a0, a0, a1
301 ; RV32IM-LABEL: dont_fold_srem_power_of_two:
303 ; RV32IM-NEXT: srai a1, a0, 31
304 ; RV32IM-NEXT: srli a1, a1, 26
305 ; RV32IM-NEXT: add a1, a0, a1
306 ; RV32IM-NEXT: andi a1, a1, -64
307 ; RV32IM-NEXT: sub a0, a0, a1
310 ; RV64I-LABEL: dont_fold_srem_power_of_two:
312 ; RV64I-NEXT: sraiw a1, a0, 31
313 ; RV64I-NEXT: srliw a1, a1, 26
314 ; RV64I-NEXT: add a1, a0, a1
315 ; RV64I-NEXT: andi a1, a1, -64
316 ; RV64I-NEXT: subw a0, a0, a1
319 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
321 ; RV64IM-NEXT: sraiw a1, a0, 31
322 ; RV64IM-NEXT: srliw a1, a1, 26
323 ; RV64IM-NEXT: add a1, a0, a1
324 ; RV64IM-NEXT: andi a1, a1, -64
325 ; RV64IM-NEXT: subw a0, a0, a1
331 ; Don't fold if the divisor is one.
332 define i32 @dont_fold_srem_one(i32 %x) nounwind {
333 ; CHECK-LABEL: dont_fold_srem_one:
335 ; CHECK-NEXT: li a0, 0
341 ; Don't fold if the divisor is 2^31.
342 define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
343 ; RV32I-LABEL: dont_fold_srem_i32_smax:
345 ; RV32I-NEXT: srai a1, a0, 31
346 ; RV32I-NEXT: srli a1, a1, 1
347 ; RV32I-NEXT: add a1, a0, a1
348 ; RV32I-NEXT: lui a2, 524288
349 ; RV32I-NEXT: and a1, a1, a2
350 ; RV32I-NEXT: add a0, a0, a1
353 ; RV32IM-LABEL: dont_fold_srem_i32_smax:
355 ; RV32IM-NEXT: srai a1, a0, 31
356 ; RV32IM-NEXT: srli a1, a1, 1
357 ; RV32IM-NEXT: add a1, a0, a1
358 ; RV32IM-NEXT: lui a2, 524288
359 ; RV32IM-NEXT: and a1, a1, a2
360 ; RV32IM-NEXT: add a0, a0, a1
363 ; RV64I-LABEL: dont_fold_srem_i32_smax:
365 ; RV64I-NEXT: sraiw a1, a0, 31
366 ; RV64I-NEXT: srliw a1, a1, 1
367 ; RV64I-NEXT: add a1, a0, a1
368 ; RV64I-NEXT: lui a2, 524288
369 ; RV64I-NEXT: and a1, a1, a2
370 ; RV64I-NEXT: addw a0, a0, a1
373 ; RV64IM-LABEL: dont_fold_srem_i32_smax:
375 ; RV64IM-NEXT: sraiw a1, a0, 31
376 ; RV64IM-NEXT: srliw a1, a1, 1
377 ; RV64IM-NEXT: add a1, a0, a1
378 ; RV64IM-NEXT: lui a2, 524288
379 ; RV64IM-NEXT: and a1, a1, a2
380 ; RV64IM-NEXT: addw a0, a0, a1
382 %1 = srem i32 %x, 2147483648
386 ; Don't fold i64 srem
387 define i64 @dont_fold_srem_i64(i64 %x) nounwind {
388 ; RV32I-LABEL: dont_fold_srem_i64:
390 ; RV32I-NEXT: addi sp, sp, -16
391 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
392 ; RV32I-NEXT: li a2, 98
393 ; RV32I-NEXT: li a3, 0
394 ; RV32I-NEXT: call __moddi3
395 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
396 ; RV32I-NEXT: addi sp, sp, 16
399 ; RV32IM-LABEL: dont_fold_srem_i64:
401 ; RV32IM-NEXT: addi sp, sp, -16
402 ; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
403 ; RV32IM-NEXT: li a2, 98
404 ; RV32IM-NEXT: li a3, 0
405 ; RV32IM-NEXT: call __moddi3
406 ; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
407 ; RV32IM-NEXT: addi sp, sp, 16
410 ; RV64I-LABEL: dont_fold_srem_i64:
412 ; RV64I-NEXT: li a1, 98
413 ; RV64I-NEXT: tail __moddi3
415 ; RV64IM-LABEL: dont_fold_srem_i64:
417 ; RV64IM-NEXT: lui a1, %hi(.LCPI8_0)
418 ; RV64IM-NEXT: ld a1, %lo(.LCPI8_0)(a1)
419 ; RV64IM-NEXT: mulh a1, a0, a1
420 ; RV64IM-NEXT: srli a2, a1, 63
421 ; RV64IM-NEXT: srai a1, a1, 5
422 ; RV64IM-NEXT: add a1, a1, a2
423 ; RV64IM-NEXT: li a2, 98
424 ; RV64IM-NEXT: mul a1, a1, a2
425 ; RV64IM-NEXT: sub a0, a0, a1