1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
11 define i32 @fold_urem_positive_odd(i32 %x) nounwind {
12 ; RV32I-LABEL: fold_urem_positive_odd:
14 ; RV32I-NEXT: li a1, 95
15 ; RV32I-NEXT: tail __umodsi3
17 ; RV32IM-LABEL: fold_urem_positive_odd:
19 ; RV32IM-NEXT: lui a1, 364242
20 ; RV32IM-NEXT: addi a1, a1, 777
21 ; RV32IM-NEXT: mulhu a1, a0, a1
22 ; RV32IM-NEXT: sub a2, a0, a1
23 ; RV32IM-NEXT: srli a2, a2, 1
24 ; RV32IM-NEXT: add a1, a2, a1
25 ; RV32IM-NEXT: srli a1, a1, 6
26 ; RV32IM-NEXT: li a2, 95
27 ; RV32IM-NEXT: mul a1, a1, a2
28 ; RV32IM-NEXT: sub a0, a0, a1
31 ; RV64I-LABEL: fold_urem_positive_odd:
33 ; RV64I-NEXT: addi sp, sp, -16
34 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
35 ; RV64I-NEXT: slli a0, a0, 32
36 ; RV64I-NEXT: srli a0, a0, 32
37 ; RV64I-NEXT: li a1, 95
38 ; RV64I-NEXT: call __umoddi3
39 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
40 ; RV64I-NEXT: addi sp, sp, 16
43 ; RV64IM-LABEL: fold_urem_positive_odd:
45 ; RV64IM-NEXT: slli a1, a0, 32
46 ; RV64IM-NEXT: lui a2, 364242
47 ; RV64IM-NEXT: addi a2, a2, 777
48 ; RV64IM-NEXT: slli a2, a2, 32
49 ; RV64IM-NEXT: mulhu a1, a1, a2
50 ; RV64IM-NEXT: srli a1, a1, 32
51 ; RV64IM-NEXT: subw a2, a0, a1
52 ; RV64IM-NEXT: srliw a2, a2, 1
53 ; RV64IM-NEXT: add a1, a2, a1
54 ; RV64IM-NEXT: srli a1, a1, 6
55 ; RV64IM-NEXT: li a2, 95
56 ; RV64IM-NEXT: mul a1, a1, a2
57 ; RV64IM-NEXT: subw a0, a0, a1
64 define i32 @fold_urem_positive_even(i32 %x) nounwind {
65 ; RV32I-LABEL: fold_urem_positive_even:
67 ; RV32I-NEXT: li a1, 1060
68 ; RV32I-NEXT: tail __umodsi3
70 ; RV32IM-LABEL: fold_urem_positive_even:
72 ; RV32IM-NEXT: lui a1, 1012964
73 ; RV32IM-NEXT: addi a1, a1, -61
74 ; RV32IM-NEXT: mulhu a1, a0, a1
75 ; RV32IM-NEXT: srli a1, a1, 10
76 ; RV32IM-NEXT: li a2, 1060
77 ; RV32IM-NEXT: mul a1, a1, a2
78 ; RV32IM-NEXT: sub a0, a0, a1
81 ; RV64I-LABEL: fold_urem_positive_even:
83 ; RV64I-NEXT: addi sp, sp, -16
84 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
85 ; RV64I-NEXT: slli a0, a0, 32
86 ; RV64I-NEXT: srli a0, a0, 32
87 ; RV64I-NEXT: li a1, 1060
88 ; RV64I-NEXT: call __umoddi3
89 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
90 ; RV64I-NEXT: addi sp, sp, 16
93 ; RV64IM-LABEL: fold_urem_positive_even:
95 ; RV64IM-NEXT: slli a1, a0, 32
96 ; RV64IM-NEXT: lui a2, 1012964
97 ; RV64IM-NEXT: addi a2, a2, -61
98 ; RV64IM-NEXT: slli a2, a2, 32
99 ; RV64IM-NEXT: mulhu a1, a1, a2
100 ; RV64IM-NEXT: srli a1, a1, 42
101 ; RV64IM-NEXT: li a2, 1060
102 ; RV64IM-NEXT: mul a1, a1, a2
103 ; RV64IM-NEXT: subw a0, a0, a1
105 %1 = urem i32 %x, 1060
110 ; Don't fold if we can combine urem with udiv.
111 define i32 @combine_urem_udiv(i32 %x) nounwind {
112 ; RV32I-LABEL: combine_urem_udiv:
114 ; RV32I-NEXT: addi sp, sp, -16
115 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
116 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
117 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
118 ; RV32I-NEXT: mv s0, a0
119 ; RV32I-NEXT: li a1, 95
120 ; RV32I-NEXT: call __umodsi3
121 ; RV32I-NEXT: mv s1, a0
122 ; RV32I-NEXT: li a1, 95
123 ; RV32I-NEXT: mv a0, s0
124 ; RV32I-NEXT: call __udivsi3
125 ; RV32I-NEXT: add a0, s1, a0
126 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
127 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
128 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
129 ; RV32I-NEXT: addi sp, sp, 16
132 ; RV32IM-LABEL: combine_urem_udiv:
134 ; RV32IM-NEXT: lui a1, 364242
135 ; RV32IM-NEXT: addi a1, a1, 777
136 ; RV32IM-NEXT: mulhu a1, a0, a1
137 ; RV32IM-NEXT: sub a2, a0, a1
138 ; RV32IM-NEXT: srli a2, a2, 1
139 ; RV32IM-NEXT: add a1, a2, a1
140 ; RV32IM-NEXT: srli a1, a1, 6
141 ; RV32IM-NEXT: li a2, 95
142 ; RV32IM-NEXT: mul a2, a1, a2
143 ; RV32IM-NEXT: add a0, a0, a1
144 ; RV32IM-NEXT: sub a0, a0, a2
147 ; RV64I-LABEL: combine_urem_udiv:
149 ; RV64I-NEXT: addi sp, sp, -32
150 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
151 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
152 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
153 ; RV64I-NEXT: slli a0, a0, 32
154 ; RV64I-NEXT: srli s0, a0, 32
155 ; RV64I-NEXT: li a1, 95
156 ; RV64I-NEXT: mv a0, s0
157 ; RV64I-NEXT: call __umoddi3
158 ; RV64I-NEXT: mv s1, a0
159 ; RV64I-NEXT: li a1, 95
160 ; RV64I-NEXT: mv a0, s0
161 ; RV64I-NEXT: call __udivdi3
162 ; RV64I-NEXT: add a0, s1, a0
163 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
164 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
165 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
166 ; RV64I-NEXT: addi sp, sp, 32
169 ; RV64IM-LABEL: combine_urem_udiv:
171 ; RV64IM-NEXT: slli a1, a0, 32
172 ; RV64IM-NEXT: lui a2, 364242
173 ; RV64IM-NEXT: addi a2, a2, 777
174 ; RV64IM-NEXT: slli a2, a2, 32
175 ; RV64IM-NEXT: mulhu a1, a1, a2
176 ; RV64IM-NEXT: srli a1, a1, 32
177 ; RV64IM-NEXT: subw a2, a0, a1
178 ; RV64IM-NEXT: srliw a2, a2, 1
179 ; RV64IM-NEXT: add a1, a2, a1
180 ; RV64IM-NEXT: srli a1, a1, 6
181 ; RV64IM-NEXT: li a2, 95
182 ; RV64IM-NEXT: mul a2, a1, a2
183 ; RV64IM-NEXT: add a0, a0, a1
184 ; RV64IM-NEXT: subw a0, a0, a2
192 ; Don't fold for divisors that are a power of two.
193 define i32 @dont_fold_urem_power_of_two(i32 %x) nounwind {
194 ; CHECK-LABEL: dont_fold_urem_power_of_two:
196 ; CHECK-NEXT: andi a0, a0, 63
202 ; Don't fold if the divisor is one.
203 define i32 @dont_fold_urem_one(i32 %x) nounwind {
204 ; CHECK-LABEL: dont_fold_urem_one:
206 ; CHECK-NEXT: li a0, 0
212 ; Don't fold if the divisor is 2^32.
213 define i32 @dont_fold_urem_i32_umax(i32 %x) nounwind {
214 ; CHECK-LABEL: dont_fold_urem_i32_umax:
217 %1 = urem i32 %x, 4294967296
221 ; Don't fold i64 urem
222 define i64 @dont_fold_urem_i64(i64 %x) nounwind {
223 ; RV32I-LABEL: dont_fold_urem_i64:
225 ; RV32I-NEXT: addi sp, sp, -16
226 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
227 ; RV32I-NEXT: li a2, 98
228 ; RV32I-NEXT: li a3, 0
229 ; RV32I-NEXT: call __umoddi3
230 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
231 ; RV32I-NEXT: addi sp, sp, 16
234 ; RV32IM-LABEL: dont_fold_urem_i64:
236 ; RV32IM-NEXT: addi sp, sp, -16
237 ; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
238 ; RV32IM-NEXT: li a2, 98
239 ; RV32IM-NEXT: li a3, 0
240 ; RV32IM-NEXT: call __umoddi3
241 ; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
242 ; RV32IM-NEXT: addi sp, sp, 16
245 ; RV64I-LABEL: dont_fold_urem_i64:
247 ; RV64I-NEXT: li a1, 98
248 ; RV64I-NEXT: tail __umoddi3
250 ; RV64IM-LABEL: dont_fold_urem_i64:
252 ; RV64IM-NEXT: lui a1, %hi(.LCPI6_0)
253 ; RV64IM-NEXT: ld a1, %lo(.LCPI6_0)(a1)
254 ; RV64IM-NEXT: srli a2, a0, 1
255 ; RV64IM-NEXT: mulhu a1, a2, a1
256 ; RV64IM-NEXT: srli a1, a1, 4
257 ; RV64IM-NEXT: li a2, 98
258 ; RV64IM-NEXT: mul a1, a1, a2
259 ; RV64IM-NEXT: sub a0, a0, a1