1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
11 define i32 @fold_urem_positive_odd(i32 %x) {
12 ; RV32I-LABEL: fold_urem_positive_odd:
14 ; RV32I-NEXT: addi sp, sp, -16
15 ; RV32I-NEXT: .cfi_def_cfa_offset 16
16 ; RV32I-NEXT: sw ra, 12(sp)
17 ; RV32I-NEXT: .cfi_offset ra, -4
18 ; RV32I-NEXT: addi a1, zero, 95
19 ; RV32I-NEXT: call __umodsi3
20 ; RV32I-NEXT: lw ra, 12(sp)
21 ; RV32I-NEXT: .cfi_restore ra
22 ; RV32I-NEXT: addi sp, sp, 16
23 ; RV32I-NEXT: .cfi_def_cfa_offset 0
26 ; RV32IM-LABEL: fold_urem_positive_odd:
28 ; RV32IM-NEXT: lui a1, 364242
29 ; RV32IM-NEXT: addi a1, a1, 777
30 ; RV32IM-NEXT: mulhu a1, a0, a1
31 ; RV32IM-NEXT: sub a2, a0, a1
32 ; RV32IM-NEXT: srli a2, a2, 1
33 ; RV32IM-NEXT: add a1, a2, a1
34 ; RV32IM-NEXT: srli a1, a1, 6
35 ; RV32IM-NEXT: addi a2, zero, 95
36 ; RV32IM-NEXT: mul a1, a1, a2
37 ; RV32IM-NEXT: sub a0, a0, a1
38 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
41 ; RV64I-LABEL: fold_urem_positive_odd:
43 ; RV64I-NEXT: addi sp, sp, -16
44 ; RV64I-NEXT: .cfi_def_cfa_offset 16
45 ; RV64I-NEXT: sd ra, 8(sp)
46 ; RV64I-NEXT: .cfi_offset ra, -8
47 ; RV64I-NEXT: slli a0, a0, 32
48 ; RV64I-NEXT: srli a0, a0, 32
49 ; RV64I-NEXT: addi a1, zero, 95
50 ; RV64I-NEXT: call __umoddi3
51 ; RV64I-NEXT: ld ra, 8(sp)
52 ; RV64I-NEXT: .cfi_restore ra
53 ; RV64I-NEXT: addi sp, sp, 16
54 ; RV64I-NEXT: .cfi_def_cfa_offset 0
57 ; RV64IM-LABEL: fold_urem_positive_odd:
59 ; RV64IM-NEXT: slli a0, a0, 32
60 ; RV64IM-NEXT: srli a0, a0, 32
61 ; RV64IM-NEXT: lui a1, 1423
62 ; RV64IM-NEXT: addiw a1, a1, -733
63 ; RV64IM-NEXT: slli a1, a1, 15
64 ; RV64IM-NEXT: addi a1, a1, 1035
65 ; RV64IM-NEXT: slli a1, a1, 13
66 ; RV64IM-NEXT: addi a1, a1, -1811
67 ; RV64IM-NEXT: slli a1, a1, 12
68 ; RV64IM-NEXT: addi a1, a1, 561
69 ; RV64IM-NEXT: mulhu a1, a0, a1
70 ; RV64IM-NEXT: sub a2, a0, a1
71 ; RV64IM-NEXT: srli a2, a2, 1
72 ; RV64IM-NEXT: add a1, a2, a1
73 ; RV64IM-NEXT: srli a1, a1, 6
74 ; RV64IM-NEXT: addi a2, zero, 95
75 ; RV64IM-NEXT: mul a1, a1, a2
76 ; RV64IM-NEXT: sub a0, a0, a1
77 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
84 define i32 @fold_urem_positive_even(i32 %x) {
85 ; RV32I-LABEL: fold_urem_positive_even:
87 ; RV32I-NEXT: addi sp, sp, -16
88 ; RV32I-NEXT: .cfi_def_cfa_offset 16
89 ; RV32I-NEXT: sw ra, 12(sp)
90 ; RV32I-NEXT: .cfi_offset ra, -4
91 ; RV32I-NEXT: addi a1, zero, 1060
92 ; RV32I-NEXT: call __umodsi3
93 ; RV32I-NEXT: lw ra, 12(sp)
94 ; RV32I-NEXT: .cfi_restore ra
95 ; RV32I-NEXT: addi sp, sp, 16
96 ; RV32I-NEXT: .cfi_def_cfa_offset 0
99 ; RV32IM-LABEL: fold_urem_positive_even:
101 ; RV32IM-NEXT: lui a1, 1012964
102 ; RV32IM-NEXT: addi a1, a1, -61
103 ; RV32IM-NEXT: mulhu a1, a0, a1
104 ; RV32IM-NEXT: srli a1, a1, 10
105 ; RV32IM-NEXT: addi a2, zero, 1060
106 ; RV32IM-NEXT: mul a1, a1, a2
107 ; RV32IM-NEXT: sub a0, a0, a1
108 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
111 ; RV64I-LABEL: fold_urem_positive_even:
113 ; RV64I-NEXT: addi sp, sp, -16
114 ; RV64I-NEXT: .cfi_def_cfa_offset 16
115 ; RV64I-NEXT: sd ra, 8(sp)
116 ; RV64I-NEXT: .cfi_offset ra, -8
117 ; RV64I-NEXT: slli a0, a0, 32
118 ; RV64I-NEXT: srli a0, a0, 32
119 ; RV64I-NEXT: addi a1, zero, 1060
120 ; RV64I-NEXT: call __umoddi3
121 ; RV64I-NEXT: ld ra, 8(sp)
122 ; RV64I-NEXT: .cfi_restore ra
123 ; RV64I-NEXT: addi sp, sp, 16
124 ; RV64I-NEXT: .cfi_def_cfa_offset 0
127 ; RV64IM-LABEL: fold_urem_positive_even:
129 ; RV64IM-NEXT: slli a0, a0, 32
130 ; RV64IM-NEXT: srli a0, a0, 32
131 ; RV64IM-NEXT: lui a1, 1048020
132 ; RV64IM-NEXT: addiw a1, a1, -1793
133 ; RV64IM-NEXT: slli a1, a1, 12
134 ; RV64IM-NEXT: addi a1, a1, 139
135 ; RV64IM-NEXT: slli a1, a1, 14
136 ; RV64IM-NEXT: addi a1, a1, 1793
137 ; RV64IM-NEXT: slli a1, a1, 12
138 ; RV64IM-NEXT: addi a1, a1, -139
139 ; RV64IM-NEXT: mulhu a1, a0, a1
140 ; RV64IM-NEXT: srli a1, a1, 10
141 ; RV64IM-NEXT: addi a2, zero, 1060
142 ; RV64IM-NEXT: mul a1, a1, a2
143 ; RV64IM-NEXT: sub a0, a0, a1
144 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
146 %1 = urem i32 %x, 1060
151 ; Don't fold if we can combine urem with udiv.
152 define i32 @combine_urem_udiv(i32 %x) {
153 ; RV32I-LABEL: combine_urem_udiv:
155 ; RV32I-NEXT: addi sp, sp, -16
156 ; RV32I-NEXT: .cfi_def_cfa_offset 16
157 ; RV32I-NEXT: sw ra, 12(sp)
158 ; RV32I-NEXT: sw s0, 8(sp)
159 ; RV32I-NEXT: sw s1, 4(sp)
160 ; RV32I-NEXT: .cfi_offset ra, -4
161 ; RV32I-NEXT: .cfi_offset s0, -8
162 ; RV32I-NEXT: .cfi_offset s1, -12
163 ; RV32I-NEXT: mv s0, a0
164 ; RV32I-NEXT: addi a1, zero, 95
165 ; RV32I-NEXT: call __umodsi3
166 ; RV32I-NEXT: mv s1, a0
167 ; RV32I-NEXT: addi a1, zero, 95
168 ; RV32I-NEXT: mv a0, s0
169 ; RV32I-NEXT: call __udivsi3
170 ; RV32I-NEXT: add a0, s1, a0
171 ; RV32I-NEXT: lw s1, 4(sp)
172 ; RV32I-NEXT: lw s0, 8(sp)
173 ; RV32I-NEXT: lw ra, 12(sp)
174 ; RV32I-NEXT: .cfi_restore ra
175 ; RV32I-NEXT: .cfi_restore s0
176 ; RV32I-NEXT: .cfi_restore s1
177 ; RV32I-NEXT: addi sp, sp, 16
178 ; RV32I-NEXT: .cfi_def_cfa_offset 0
181 ; RV32IM-LABEL: combine_urem_udiv:
183 ; RV32IM-NEXT: lui a1, 364242
184 ; RV32IM-NEXT: addi a1, a1, 777
185 ; RV32IM-NEXT: mulhu a1, a0, a1
186 ; RV32IM-NEXT: sub a2, a0, a1
187 ; RV32IM-NEXT: srli a2, a2, 1
188 ; RV32IM-NEXT: add a1, a2, a1
189 ; RV32IM-NEXT: srli a1, a1, 6
190 ; RV32IM-NEXT: addi a2, zero, 95
191 ; RV32IM-NEXT: mul a2, a1, a2
192 ; RV32IM-NEXT: sub a0, a0, a2
193 ; RV32IM-NEXT: add a0, a0, a1
194 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
197 ; RV64I-LABEL: combine_urem_udiv:
199 ; RV64I-NEXT: addi sp, sp, -32
200 ; RV64I-NEXT: .cfi_def_cfa_offset 32
201 ; RV64I-NEXT: sd ra, 24(sp)
202 ; RV64I-NEXT: sd s0, 16(sp)
203 ; RV64I-NEXT: sd s1, 8(sp)
204 ; RV64I-NEXT: .cfi_offset ra, -8
205 ; RV64I-NEXT: .cfi_offset s0, -16
206 ; RV64I-NEXT: .cfi_offset s1, -24
207 ; RV64I-NEXT: slli a0, a0, 32
208 ; RV64I-NEXT: srli s0, a0, 32
209 ; RV64I-NEXT: addi a1, zero, 95
210 ; RV64I-NEXT: mv a0, s0
211 ; RV64I-NEXT: call __umoddi3
212 ; RV64I-NEXT: mv s1, a0
213 ; RV64I-NEXT: addi a1, zero, 95
214 ; RV64I-NEXT: mv a0, s0
215 ; RV64I-NEXT: call __udivdi3
216 ; RV64I-NEXT: add a0, s1, a0
217 ; RV64I-NEXT: ld s1, 8(sp)
218 ; RV64I-NEXT: ld s0, 16(sp)
219 ; RV64I-NEXT: ld ra, 24(sp)
220 ; RV64I-NEXT: .cfi_restore ra
221 ; RV64I-NEXT: .cfi_restore s0
222 ; RV64I-NEXT: .cfi_restore s1
223 ; RV64I-NEXT: addi sp, sp, 32
224 ; RV64I-NEXT: .cfi_def_cfa_offset 0
227 ; RV64IM-LABEL: combine_urem_udiv:
229 ; RV64IM-NEXT: slli a0, a0, 32
230 ; RV64IM-NEXT: srli a0, a0, 32
231 ; RV64IM-NEXT: lui a1, 1423
232 ; RV64IM-NEXT: addiw a1, a1, -733
233 ; RV64IM-NEXT: slli a1, a1, 15
234 ; RV64IM-NEXT: addi a1, a1, 1035
235 ; RV64IM-NEXT: slli a1, a1, 13
236 ; RV64IM-NEXT: addi a1, a1, -1811
237 ; RV64IM-NEXT: slli a1, a1, 12
238 ; RV64IM-NEXT: addi a1, a1, 561
239 ; RV64IM-NEXT: mulhu a1, a0, a1
240 ; RV64IM-NEXT: sub a2, a0, a1
241 ; RV64IM-NEXT: srli a2, a2, 1
242 ; RV64IM-NEXT: add a1, a2, a1
243 ; RV64IM-NEXT: srli a1, a1, 6
244 ; RV64IM-NEXT: addi a2, zero, 95
245 ; RV64IM-NEXT: mul a2, a1, a2
246 ; RV64IM-NEXT: sub a0, a0, a2
247 ; RV64IM-NEXT: add a0, a0, a1
248 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
256 ; Don't fold for divisors that are a power of two.
257 define i32 @dont_fold_urem_power_of_two(i32 %x) {
258 ; CHECK-LABEL: dont_fold_urem_power_of_two:
260 ; CHECK-NEXT: andi a0, a0, 63
261 ; CHECK-NEXT: .cfi_def_cfa_offset 0
267 ; Don't fold if the divisor is one.
268 define i32 @dont_fold_urem_one(i32 %x) {
269 ; CHECK-LABEL: dont_fold_urem_one:
271 ; CHECK-NEXT: mv a0, zero
272 ; CHECK-NEXT: .cfi_def_cfa_offset 0
278 ; Don't fold if the divisor is 2^32.
279 define i32 @dont_fold_urem_i32_umax(i32 %x) {
280 ; CHECK-LABEL: dont_fold_urem_i32_umax:
282 ; CHECK-NEXT: .cfi_def_cfa_offset 0
284 %1 = urem i32 %x, 4294967296
288 ; Don't fold i64 urem
289 define i64 @dont_fold_urem_i64(i64 %x) {
290 ; RV32I-LABEL: dont_fold_urem_i64:
292 ; RV32I-NEXT: addi sp, sp, -16
293 ; RV32I-NEXT: .cfi_def_cfa_offset 16
294 ; RV32I-NEXT: sw ra, 12(sp)
295 ; RV32I-NEXT: .cfi_offset ra, -4
296 ; RV32I-NEXT: addi a2, zero, 98
297 ; RV32I-NEXT: mv a3, zero
298 ; RV32I-NEXT: call __umoddi3
299 ; RV32I-NEXT: lw ra, 12(sp)
300 ; RV32I-NEXT: .cfi_restore ra
301 ; RV32I-NEXT: addi sp, sp, 16
302 ; RV32I-NEXT: .cfi_def_cfa_offset 0
305 ; RV32IM-LABEL: dont_fold_urem_i64:
307 ; RV32IM-NEXT: addi sp, sp, -16
308 ; RV32IM-NEXT: .cfi_def_cfa_offset 16
309 ; RV32IM-NEXT: sw ra, 12(sp)
310 ; RV32IM-NEXT: .cfi_offset ra, -4
311 ; RV32IM-NEXT: addi a2, zero, 98
312 ; RV32IM-NEXT: mv a3, zero
313 ; RV32IM-NEXT: call __umoddi3
314 ; RV32IM-NEXT: lw ra, 12(sp)
315 ; RV32IM-NEXT: .cfi_restore ra
316 ; RV32IM-NEXT: addi sp, sp, 16
317 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
320 ; RV64I-LABEL: dont_fold_urem_i64:
322 ; RV64I-NEXT: addi sp, sp, -16
323 ; RV64I-NEXT: .cfi_def_cfa_offset 16
324 ; RV64I-NEXT: sd ra, 8(sp)
325 ; RV64I-NEXT: .cfi_offset ra, -8
326 ; RV64I-NEXT: addi a1, zero, 98
327 ; RV64I-NEXT: call __umoddi3
328 ; RV64I-NEXT: ld ra, 8(sp)
329 ; RV64I-NEXT: .cfi_restore ra
330 ; RV64I-NEXT: addi sp, sp, 16
331 ; RV64I-NEXT: .cfi_def_cfa_offset 0
334 ; RV64IM-LABEL: dont_fold_urem_i64:
336 ; RV64IM-NEXT: srli a1, a0, 1
337 ; RV64IM-NEXT: lui a2, 2675
338 ; RV64IM-NEXT: addiw a2, a2, -251
339 ; RV64IM-NEXT: slli a2, a2, 13
340 ; RV64IM-NEXT: addi a2, a2, 1839
341 ; RV64IM-NEXT: slli a2, a2, 13
342 ; RV64IM-NEXT: addi a2, a2, 167
343 ; RV64IM-NEXT: slli a2, a2, 13
344 ; RV64IM-NEXT: addi a2, a2, 1505
345 ; RV64IM-NEXT: mulhu a1, a1, a2
346 ; RV64IM-NEXT: srli a1, a1, 4
347 ; RV64IM-NEXT: addi a2, zero, 98
348 ; RV64IM-NEXT: mul a1, a1, a2
349 ; RV64IM-NEXT: sub a0, a0, a1
350 ; RV64IM-NEXT: .cfi_def_cfa_offset 0