1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
12 define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
13 ; RV32I-LABEL: fold_urem_vec_1:
15 ; RV32I-NEXT: addi sp, sp, -32
16 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
17 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
18 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
19 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
20 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
21 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
22 ; RV32I-NEXT: lhu s0, 12(a1)
23 ; RV32I-NEXT: lhu s1, 8(a1)
24 ; RV32I-NEXT: lhu s2, 4(a1)
25 ; RV32I-NEXT: lhu a2, 0(a1)
26 ; RV32I-NEXT: mv s3, a0
27 ; RV32I-NEXT: li a1, 95
28 ; RV32I-NEXT: mv a0, a2
29 ; RV32I-NEXT: call __umodsi3
30 ; RV32I-NEXT: mv s4, a0
31 ; RV32I-NEXT: li a1, 124
32 ; RV32I-NEXT: mv a0, s2
33 ; RV32I-NEXT: call __umodsi3
34 ; RV32I-NEXT: mv s2, a0
35 ; RV32I-NEXT: li a1, 98
36 ; RV32I-NEXT: mv a0, s1
37 ; RV32I-NEXT: call __umodsi3
38 ; RV32I-NEXT: mv s1, a0
39 ; RV32I-NEXT: li a1, 1003
40 ; RV32I-NEXT: mv a0, s0
41 ; RV32I-NEXT: call __umodsi3
42 ; RV32I-NEXT: sh a0, 6(s3)
43 ; RV32I-NEXT: sh s1, 4(s3)
44 ; RV32I-NEXT: sh s2, 2(s3)
45 ; RV32I-NEXT: sh s4, 0(s3)
46 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
47 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
48 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
49 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
50 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
51 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
52 ; RV32I-NEXT: addi sp, sp, 32
55 ; RV32IM-LABEL: fold_urem_vec_1:
57 ; RV32IM-NEXT: lhu a2, 12(a1)
58 ; RV32IM-NEXT: lhu a3, 8(a1)
59 ; RV32IM-NEXT: lhu a4, 4(a1)
60 ; RV32IM-NEXT: lhu a1, 0(a1)
61 ; RV32IM-NEXT: lui a5, 8456
62 ; RV32IM-NEXT: addi a5, a5, 1058
63 ; RV32IM-NEXT: mulhu a5, a4, a5
64 ; RV32IM-NEXT: slli a6, a5, 7
65 ; RV32IM-NEXT: slli a5, a5, 2
66 ; RV32IM-NEXT: sub a5, a5, a6
67 ; RV32IM-NEXT: add a4, a4, a5
68 ; RV32IM-NEXT: lui a5, 11038
69 ; RV32IM-NEXT: addi a5, a5, -1465
70 ; RV32IM-NEXT: mulhu a5, a1, a5
71 ; RV32IM-NEXT: li a6, 95
72 ; RV32IM-NEXT: mul a5, a5, a6
73 ; RV32IM-NEXT: sub a1, a1, a5
74 ; RV32IM-NEXT: lui a5, 10700
75 ; RV32IM-NEXT: addi a5, a5, -1003
76 ; RV32IM-NEXT: mulhu a5, a3, a5
77 ; RV32IM-NEXT: li a6, 98
78 ; RV32IM-NEXT: mul a5, a5, a6
79 ; RV32IM-NEXT: sub a3, a3, a5
80 ; RV32IM-NEXT: lui a5, 1045
81 ; RV32IM-NEXT: addi a5, a5, 1801
82 ; RV32IM-NEXT: mulhu a5, a2, a5
83 ; RV32IM-NEXT: li a6, 1003
84 ; RV32IM-NEXT: mul a5, a5, a6
85 ; RV32IM-NEXT: sub a2, a2, a5
86 ; RV32IM-NEXT: sh a2, 6(a0)
87 ; RV32IM-NEXT: sh a3, 4(a0)
88 ; RV32IM-NEXT: sh a1, 0(a0)
89 ; RV32IM-NEXT: sh a4, 2(a0)
92 ; RV64I-LABEL: fold_urem_vec_1:
94 ; RV64I-NEXT: addi sp, sp, -48
95 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
96 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
97 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
98 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
99 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
100 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
101 ; RV64I-NEXT: lhu s0, 24(a1)
102 ; RV64I-NEXT: lhu s1, 16(a1)
103 ; RV64I-NEXT: lhu s2, 8(a1)
104 ; RV64I-NEXT: lhu a2, 0(a1)
105 ; RV64I-NEXT: mv s3, a0
106 ; RV64I-NEXT: li a1, 95
107 ; RV64I-NEXT: mv a0, a2
108 ; RV64I-NEXT: call __umoddi3
109 ; RV64I-NEXT: mv s4, a0
110 ; RV64I-NEXT: li a1, 124
111 ; RV64I-NEXT: mv a0, s2
112 ; RV64I-NEXT: call __umoddi3
113 ; RV64I-NEXT: mv s2, a0
114 ; RV64I-NEXT: li a1, 98
115 ; RV64I-NEXT: mv a0, s1
116 ; RV64I-NEXT: call __umoddi3
117 ; RV64I-NEXT: mv s1, a0
118 ; RV64I-NEXT: li a1, 1003
119 ; RV64I-NEXT: mv a0, s0
120 ; RV64I-NEXT: call __umoddi3
121 ; RV64I-NEXT: sh a0, 6(s3)
122 ; RV64I-NEXT: sh s1, 4(s3)
123 ; RV64I-NEXT: sh s2, 2(s3)
124 ; RV64I-NEXT: sh s4, 0(s3)
125 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
126 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
127 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
128 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
129 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
130 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
131 ; RV64I-NEXT: addi sp, sp, 48
134 ; RV64IM-LABEL: fold_urem_vec_1:
136 ; RV64IM-NEXT: lhu a2, 8(a1)
137 ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0)
138 ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3)
139 ; RV64IM-NEXT: lhu a4, 24(a1)
140 ; RV64IM-NEXT: lhu a5, 16(a1)
141 ; RV64IM-NEXT: lhu a1, 0(a1)
142 ; RV64IM-NEXT: mulhu a3, a2, a3
143 ; RV64IM-NEXT: slli a6, a3, 7
144 ; RV64IM-NEXT: lui a7, %hi(.LCPI0_1)
145 ; RV64IM-NEXT: ld a7, %lo(.LCPI0_1)(a7)
146 ; RV64IM-NEXT: slli a3, a3, 2
147 ; RV64IM-NEXT: subw a3, a3, a6
148 ; RV64IM-NEXT: add a2, a2, a3
149 ; RV64IM-NEXT: mulhu a3, a1, a7
150 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_2)
151 ; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6)
152 ; RV64IM-NEXT: li a7, 95
153 ; RV64IM-NEXT: mul a3, a3, a7
154 ; RV64IM-NEXT: subw a1, a1, a3
155 ; RV64IM-NEXT: mulhu a3, a5, a6
156 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_3)
157 ; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6)
158 ; RV64IM-NEXT: li a7, 98
159 ; RV64IM-NEXT: mul a3, a3, a7
160 ; RV64IM-NEXT: subw a5, a5, a3
161 ; RV64IM-NEXT: mulhu a3, a4, a6
162 ; RV64IM-NEXT: li a6, 1003
163 ; RV64IM-NEXT: mul a3, a3, a6
164 ; RV64IM-NEXT: subw a4, a4, a3
165 ; RV64IM-NEXT: sh a4, 6(a0)
166 ; RV64IM-NEXT: sh a5, 4(a0)
167 ; RV64IM-NEXT: sh a1, 0(a0)
168 ; RV64IM-NEXT: sh a2, 2(a0)
170 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
174 define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
175 ; RV32I-LABEL: fold_urem_vec_2:
177 ; RV32I-NEXT: addi sp, sp, -32
178 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
179 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
180 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
181 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
182 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
183 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
184 ; RV32I-NEXT: lhu s0, 12(a1)
185 ; RV32I-NEXT: lhu s1, 8(a1)
186 ; RV32I-NEXT: lhu s2, 4(a1)
187 ; RV32I-NEXT: lhu a2, 0(a1)
188 ; RV32I-NEXT: mv s3, a0
189 ; RV32I-NEXT: li a1, 95
190 ; RV32I-NEXT: mv a0, a2
191 ; RV32I-NEXT: call __umodsi3
192 ; RV32I-NEXT: mv s4, a0
193 ; RV32I-NEXT: li a1, 95
194 ; RV32I-NEXT: mv a0, s2
195 ; RV32I-NEXT: call __umodsi3
196 ; RV32I-NEXT: mv s2, a0
197 ; RV32I-NEXT: li a1, 95
198 ; RV32I-NEXT: mv a0, s1
199 ; RV32I-NEXT: call __umodsi3
200 ; RV32I-NEXT: mv s1, a0
201 ; RV32I-NEXT: li a1, 95
202 ; RV32I-NEXT: mv a0, s0
203 ; RV32I-NEXT: call __umodsi3
204 ; RV32I-NEXT: sh a0, 6(s3)
205 ; RV32I-NEXT: sh s1, 4(s3)
206 ; RV32I-NEXT: sh s2, 2(s3)
207 ; RV32I-NEXT: sh s4, 0(s3)
208 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
209 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
210 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
211 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
212 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
213 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
214 ; RV32I-NEXT: addi sp, sp, 32
217 ; RV32IM-LABEL: fold_urem_vec_2:
219 ; RV32IM-NEXT: lhu a2, 12(a1)
220 ; RV32IM-NEXT: lhu a3, 8(a1)
221 ; RV32IM-NEXT: lhu a4, 0(a1)
222 ; RV32IM-NEXT: lhu a1, 4(a1)
223 ; RV32IM-NEXT: lui a5, 11038
224 ; RV32IM-NEXT: addi a5, a5, -1465
225 ; RV32IM-NEXT: mulhu a6, a4, a5
226 ; RV32IM-NEXT: li a7, 95
227 ; RV32IM-NEXT: mul a6, a6, a7
228 ; RV32IM-NEXT: sub a4, a4, a6
229 ; RV32IM-NEXT: mulhu a6, a1, a5
230 ; RV32IM-NEXT: mul a6, a6, a7
231 ; RV32IM-NEXT: sub a1, a1, a6
232 ; RV32IM-NEXT: mulhu a6, a3, a5
233 ; RV32IM-NEXT: mul a6, a6, a7
234 ; RV32IM-NEXT: sub a3, a3, a6
235 ; RV32IM-NEXT: mulhu a5, a2, a5
236 ; RV32IM-NEXT: mul a5, a5, a7
237 ; RV32IM-NEXT: sub a2, a2, a5
238 ; RV32IM-NEXT: sh a2, 6(a0)
239 ; RV32IM-NEXT: sh a3, 4(a0)
240 ; RV32IM-NEXT: sh a1, 2(a0)
241 ; RV32IM-NEXT: sh a4, 0(a0)
244 ; RV64I-LABEL: fold_urem_vec_2:
246 ; RV64I-NEXT: addi sp, sp, -48
247 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
248 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
249 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
250 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
251 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
252 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
253 ; RV64I-NEXT: lhu s0, 24(a1)
254 ; RV64I-NEXT: lhu s1, 16(a1)
255 ; RV64I-NEXT: lhu s2, 8(a1)
256 ; RV64I-NEXT: lhu a2, 0(a1)
257 ; RV64I-NEXT: mv s3, a0
258 ; RV64I-NEXT: li a1, 95
259 ; RV64I-NEXT: mv a0, a2
260 ; RV64I-NEXT: call __umoddi3
261 ; RV64I-NEXT: mv s4, a0
262 ; RV64I-NEXT: li a1, 95
263 ; RV64I-NEXT: mv a0, s2
264 ; RV64I-NEXT: call __umoddi3
265 ; RV64I-NEXT: mv s2, a0
266 ; RV64I-NEXT: li a1, 95
267 ; RV64I-NEXT: mv a0, s1
268 ; RV64I-NEXT: call __umoddi3
269 ; RV64I-NEXT: mv s1, a0
270 ; RV64I-NEXT: li a1, 95
271 ; RV64I-NEXT: mv a0, s0
272 ; RV64I-NEXT: call __umoddi3
273 ; RV64I-NEXT: sh a0, 6(s3)
274 ; RV64I-NEXT: sh s1, 4(s3)
275 ; RV64I-NEXT: sh s2, 2(s3)
276 ; RV64I-NEXT: sh s4, 0(s3)
277 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
278 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
279 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
280 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
281 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
282 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
283 ; RV64I-NEXT: addi sp, sp, 48
286 ; RV64IM-LABEL: fold_urem_vec_2:
288 ; RV64IM-NEXT: lhu a2, 0(a1)
289 ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0)
290 ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3)
291 ; RV64IM-NEXT: lhu a4, 24(a1)
292 ; RV64IM-NEXT: lhu a5, 16(a1)
293 ; RV64IM-NEXT: lhu a1, 8(a1)
294 ; RV64IM-NEXT: mulhu a6, a2, a3
295 ; RV64IM-NEXT: li a7, 95
296 ; RV64IM-NEXT: mul a6, a6, a7
297 ; RV64IM-NEXT: subw a2, a2, a6
298 ; RV64IM-NEXT: mulhu a6, a1, a3
299 ; RV64IM-NEXT: mul a6, a6, a7
300 ; RV64IM-NEXT: subw a1, a1, a6
301 ; RV64IM-NEXT: mulhu a6, a5, a3
302 ; RV64IM-NEXT: mul a6, a6, a7
303 ; RV64IM-NEXT: subw a5, a5, a6
304 ; RV64IM-NEXT: mulhu a3, a4, a3
305 ; RV64IM-NEXT: mul a3, a3, a7
306 ; RV64IM-NEXT: subw a4, a4, a3
307 ; RV64IM-NEXT: sh a4, 6(a0)
308 ; RV64IM-NEXT: sh a5, 4(a0)
309 ; RV64IM-NEXT: sh a1, 2(a0)
310 ; RV64IM-NEXT: sh a2, 0(a0)
312 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
317 ; Don't fold if we can combine urem with udiv.
318 define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
319 ; RV32I-LABEL: combine_urem_udiv:
321 ; RV32I-NEXT: addi sp, sp, -48
322 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
323 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
324 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
325 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
326 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
327 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
328 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
329 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
330 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
331 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
332 ; RV32I-NEXT: lhu s1, 0(a1)
333 ; RV32I-NEXT: lhu s2, 4(a1)
334 ; RV32I-NEXT: lhu s3, 8(a1)
335 ; RV32I-NEXT: lhu s4, 12(a1)
336 ; RV32I-NEXT: mv s0, a0
337 ; RV32I-NEXT: li a1, 95
338 ; RV32I-NEXT: mv a0, s4
339 ; RV32I-NEXT: call __umodsi3
340 ; RV32I-NEXT: mv s5, a0
341 ; RV32I-NEXT: li a1, 95
342 ; RV32I-NEXT: mv a0, s3
343 ; RV32I-NEXT: call __umodsi3
344 ; RV32I-NEXT: mv s6, a0
345 ; RV32I-NEXT: li a1, 95
346 ; RV32I-NEXT: mv a0, s2
347 ; RV32I-NEXT: call __umodsi3
348 ; RV32I-NEXT: mv s7, a0
349 ; RV32I-NEXT: li a1, 95
350 ; RV32I-NEXT: mv a0, s1
351 ; RV32I-NEXT: call __umodsi3
352 ; RV32I-NEXT: mv s8, a0
353 ; RV32I-NEXT: li a1, 95
354 ; RV32I-NEXT: mv a0, s4
355 ; RV32I-NEXT: call __udivsi3
356 ; RV32I-NEXT: mv s4, a0
357 ; RV32I-NEXT: li a1, 95
358 ; RV32I-NEXT: mv a0, s3
359 ; RV32I-NEXT: call __udivsi3
360 ; RV32I-NEXT: mv s3, a0
361 ; RV32I-NEXT: li a1, 95
362 ; RV32I-NEXT: mv a0, s2
363 ; RV32I-NEXT: call __udivsi3
364 ; RV32I-NEXT: mv s2, a0
365 ; RV32I-NEXT: li a1, 95
366 ; RV32I-NEXT: mv a0, s1
367 ; RV32I-NEXT: call __udivsi3
368 ; RV32I-NEXT: add a0, s8, a0
369 ; RV32I-NEXT: add s2, s7, s2
370 ; RV32I-NEXT: add s3, s6, s3
371 ; RV32I-NEXT: add s4, s5, s4
372 ; RV32I-NEXT: sh s4, 6(s0)
373 ; RV32I-NEXT: sh s3, 4(s0)
374 ; RV32I-NEXT: sh s2, 2(s0)
375 ; RV32I-NEXT: sh a0, 0(s0)
376 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
377 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
378 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
379 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
380 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
381 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
382 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
383 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
384 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
385 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
386 ; RV32I-NEXT: addi sp, sp, 48
389 ; RV32IM-LABEL: combine_urem_udiv:
391 ; RV32IM-NEXT: lhu a2, 0(a1)
392 ; RV32IM-NEXT: lhu a3, 4(a1)
393 ; RV32IM-NEXT: lhu a4, 12(a1)
394 ; RV32IM-NEXT: lhu a1, 8(a1)
395 ; RV32IM-NEXT: lui a5, 11038
396 ; RV32IM-NEXT: addi a5, a5, -1465
397 ; RV32IM-NEXT: mulhu a6, a4, a5
398 ; RV32IM-NEXT: li a7, 95
399 ; RV32IM-NEXT: mul t0, a6, a7
400 ; RV32IM-NEXT: mulhu t1, a1, a5
401 ; RV32IM-NEXT: mul t2, t1, a7
402 ; RV32IM-NEXT: mulhu t3, a3, a5
403 ; RV32IM-NEXT: mul t4, t3, a7
404 ; RV32IM-NEXT: mulhu a5, a2, a5
405 ; RV32IM-NEXT: mul a7, a5, a7
406 ; RV32IM-NEXT: add a2, a2, a5
407 ; RV32IM-NEXT: sub a2, a2, a7
408 ; RV32IM-NEXT: add a3, a3, t3
409 ; RV32IM-NEXT: sub a3, a3, t4
410 ; RV32IM-NEXT: add a1, a1, t1
411 ; RV32IM-NEXT: sub a1, a1, t2
412 ; RV32IM-NEXT: add a4, a4, a6
413 ; RV32IM-NEXT: sub a4, a4, t0
414 ; RV32IM-NEXT: sh a4, 6(a0)
415 ; RV32IM-NEXT: sh a1, 4(a0)
416 ; RV32IM-NEXT: sh a3, 2(a0)
417 ; RV32IM-NEXT: sh a2, 0(a0)
420 ; RV64I-LABEL: combine_urem_udiv:
422 ; RV64I-NEXT: addi sp, sp, -80
423 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
424 ; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
425 ; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
426 ; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
427 ; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
428 ; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
429 ; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
430 ; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
431 ; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
432 ; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill
433 ; RV64I-NEXT: lhu s1, 0(a1)
434 ; RV64I-NEXT: lhu s2, 8(a1)
435 ; RV64I-NEXT: lhu s3, 16(a1)
436 ; RV64I-NEXT: lhu s4, 24(a1)
437 ; RV64I-NEXT: mv s0, a0
438 ; RV64I-NEXT: li a1, 95
439 ; RV64I-NEXT: mv a0, s4
440 ; RV64I-NEXT: call __umoddi3
441 ; RV64I-NEXT: mv s5, a0
442 ; RV64I-NEXT: li a1, 95
443 ; RV64I-NEXT: mv a0, s3
444 ; RV64I-NEXT: call __umoddi3
445 ; RV64I-NEXT: mv s6, a0
446 ; RV64I-NEXT: li a1, 95
447 ; RV64I-NEXT: mv a0, s2
448 ; RV64I-NEXT: call __umoddi3
449 ; RV64I-NEXT: mv s7, a0
450 ; RV64I-NEXT: li a1, 95
451 ; RV64I-NEXT: mv a0, s1
452 ; RV64I-NEXT: call __umoddi3
453 ; RV64I-NEXT: mv s8, a0
454 ; RV64I-NEXT: li a1, 95
455 ; RV64I-NEXT: mv a0, s4
456 ; RV64I-NEXT: call __udivdi3
457 ; RV64I-NEXT: mv s4, a0
458 ; RV64I-NEXT: li a1, 95
459 ; RV64I-NEXT: mv a0, s3
460 ; RV64I-NEXT: call __udivdi3
461 ; RV64I-NEXT: mv s3, a0
462 ; RV64I-NEXT: li a1, 95
463 ; RV64I-NEXT: mv a0, s2
464 ; RV64I-NEXT: call __udivdi3
465 ; RV64I-NEXT: mv s2, a0
466 ; RV64I-NEXT: li a1, 95
467 ; RV64I-NEXT: mv a0, s1
468 ; RV64I-NEXT: call __udivdi3
469 ; RV64I-NEXT: add a0, s8, a0
470 ; RV64I-NEXT: add s2, s7, s2
471 ; RV64I-NEXT: add s3, s6, s3
472 ; RV64I-NEXT: add s4, s5, s4
473 ; RV64I-NEXT: sh s4, 6(s0)
474 ; RV64I-NEXT: sh s3, 4(s0)
475 ; RV64I-NEXT: sh s2, 2(s0)
476 ; RV64I-NEXT: sh a0, 0(s0)
477 ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
478 ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
479 ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
480 ; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
481 ; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
482 ; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
483 ; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
484 ; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
485 ; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
486 ; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload
487 ; RV64I-NEXT: addi sp, sp, 80
490 ; RV64IM-LABEL: combine_urem_udiv:
492 ; RV64IM-NEXT: lhu a2, 24(a1)
493 ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0)
494 ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3)
495 ; RV64IM-NEXT: lhu a4, 0(a1)
496 ; RV64IM-NEXT: lhu a5, 8(a1)
497 ; RV64IM-NEXT: lhu a1, 16(a1)
498 ; RV64IM-NEXT: mulhu a6, a2, a3
499 ; RV64IM-NEXT: li a7, 95
500 ; RV64IM-NEXT: mul t0, a6, a7
501 ; RV64IM-NEXT: mulhu t1, a1, a3
502 ; RV64IM-NEXT: mul t2, t1, a7
503 ; RV64IM-NEXT: mulhu t3, a5, a3
504 ; RV64IM-NEXT: mul t4, t3, a7
505 ; RV64IM-NEXT: mulhu a3, a4, a3
506 ; RV64IM-NEXT: mul a7, a3, a7
507 ; RV64IM-NEXT: add a3, a4, a3
508 ; RV64IM-NEXT: subw a3, a3, a7
509 ; RV64IM-NEXT: add a5, a5, t3
510 ; RV64IM-NEXT: subw a4, a5, t4
511 ; RV64IM-NEXT: add a1, a1, t1
512 ; RV64IM-NEXT: subw a1, a1, t2
513 ; RV64IM-NEXT: add a2, a2, a6
514 ; RV64IM-NEXT: subw a2, a2, t0
515 ; RV64IM-NEXT: sh a2, 6(a0)
516 ; RV64IM-NEXT: sh a1, 4(a0)
517 ; RV64IM-NEXT: sh a4, 2(a0)
518 ; RV64IM-NEXT: sh a3, 0(a0)
520 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
521 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
522 %3 = add <4 x i16> %1, %2
526 ; Don't fold for divisors that are a power of two.
527 define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
528 ; RV32I-LABEL: dont_fold_urem_power_of_two:
530 ; RV32I-NEXT: addi sp, sp, -32
531 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
532 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
533 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
534 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
535 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
536 ; RV32I-NEXT: lhu s1, 8(a1)
537 ; RV32I-NEXT: lhu s2, 4(a1)
538 ; RV32I-NEXT: lhu s3, 0(a1)
539 ; RV32I-NEXT: lhu a2, 12(a1)
540 ; RV32I-NEXT: mv s0, a0
541 ; RV32I-NEXT: li a1, 95
542 ; RV32I-NEXT: mv a0, a2
543 ; RV32I-NEXT: call __umodsi3
544 ; RV32I-NEXT: andi a1, s3, 63
545 ; RV32I-NEXT: andi a2, s2, 31
546 ; RV32I-NEXT: andi s1, s1, 7
547 ; RV32I-NEXT: sh a0, 6(s0)
548 ; RV32I-NEXT: sh s1, 4(s0)
549 ; RV32I-NEXT: sh a2, 2(s0)
550 ; RV32I-NEXT: sh a1, 0(s0)
551 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
552 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
553 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
554 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
555 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
556 ; RV32I-NEXT: addi sp, sp, 32
559 ; RV32IM-LABEL: dont_fold_urem_power_of_two:
561 ; RV32IM-NEXT: lhu a2, 8(a1)
562 ; RV32IM-NEXT: lhu a3, 4(a1)
563 ; RV32IM-NEXT: lhu a4, 12(a1)
564 ; RV32IM-NEXT: lhu a1, 0(a1)
565 ; RV32IM-NEXT: lui a5, 11038
566 ; RV32IM-NEXT: addi a5, a5, -1465
567 ; RV32IM-NEXT: mulhu a5, a4, a5
568 ; RV32IM-NEXT: li a6, 95
569 ; RV32IM-NEXT: mul a5, a5, a6
570 ; RV32IM-NEXT: sub a4, a4, a5
571 ; RV32IM-NEXT: andi a1, a1, 63
572 ; RV32IM-NEXT: andi a3, a3, 31
573 ; RV32IM-NEXT: andi a2, a2, 7
574 ; RV32IM-NEXT: sh a2, 4(a0)
575 ; RV32IM-NEXT: sh a3, 2(a0)
576 ; RV32IM-NEXT: sh a1, 0(a0)
577 ; RV32IM-NEXT: sh a4, 6(a0)
580 ; RV64I-LABEL: dont_fold_urem_power_of_two:
582 ; RV64I-NEXT: addi sp, sp, -48
583 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
584 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
585 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
586 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
587 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
588 ; RV64I-NEXT: lhu s1, 16(a1)
589 ; RV64I-NEXT: lhu s2, 8(a1)
590 ; RV64I-NEXT: lhu s3, 0(a1)
591 ; RV64I-NEXT: lhu a2, 24(a1)
592 ; RV64I-NEXT: mv s0, a0
593 ; RV64I-NEXT: li a1, 95
594 ; RV64I-NEXT: mv a0, a2
595 ; RV64I-NEXT: call __umoddi3
596 ; RV64I-NEXT: andi a1, s3, 63
597 ; RV64I-NEXT: andi a2, s2, 31
598 ; RV64I-NEXT: andi s1, s1, 7
599 ; RV64I-NEXT: sh a0, 6(s0)
600 ; RV64I-NEXT: sh s1, 4(s0)
601 ; RV64I-NEXT: sh a2, 2(s0)
602 ; RV64I-NEXT: sh a1, 0(s0)
603 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
604 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
605 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
606 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
607 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
608 ; RV64I-NEXT: addi sp, sp, 48
611 ; RV64IM-LABEL: dont_fold_urem_power_of_two:
613 ; RV64IM-NEXT: lhu a2, 24(a1)
614 ; RV64IM-NEXT: lui a3, %hi(.LCPI3_0)
615 ; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3)
616 ; RV64IM-NEXT: lhu a4, 16(a1)
617 ; RV64IM-NEXT: lhu a5, 8(a1)
618 ; RV64IM-NEXT: lhu a1, 0(a1)
619 ; RV64IM-NEXT: mulhu a3, a2, a3
620 ; RV64IM-NEXT: li a6, 95
621 ; RV64IM-NEXT: mul a3, a3, a6
622 ; RV64IM-NEXT: subw a2, a2, a3
623 ; RV64IM-NEXT: andi a1, a1, 63
624 ; RV64IM-NEXT: andi a5, a5, 31
625 ; RV64IM-NEXT: andi a4, a4, 7
626 ; RV64IM-NEXT: sh a4, 4(a0)
627 ; RV64IM-NEXT: sh a5, 2(a0)
628 ; RV64IM-NEXT: sh a1, 0(a0)
629 ; RV64IM-NEXT: sh a2, 6(a0)
631 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
635 ; Don't fold if the divisor is one.
636 define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
637 ; RV32I-LABEL: dont_fold_urem_one:
639 ; RV32I-NEXT: addi sp, sp, -32
640 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
641 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
642 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
643 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
644 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
645 ; RV32I-NEXT: lhu s0, 12(a1)
646 ; RV32I-NEXT: lhu s1, 8(a1)
647 ; RV32I-NEXT: lhu a2, 4(a1)
648 ; RV32I-NEXT: mv s2, a0
649 ; RV32I-NEXT: li a1, 654
650 ; RV32I-NEXT: mv a0, a2
651 ; RV32I-NEXT: call __umodsi3
652 ; RV32I-NEXT: mv s3, a0
653 ; RV32I-NEXT: li a1, 23
654 ; RV32I-NEXT: mv a0, s1
655 ; RV32I-NEXT: call __umodsi3
656 ; RV32I-NEXT: mv s1, a0
657 ; RV32I-NEXT: lui a0, 1
658 ; RV32I-NEXT: addi a1, a0, 1327
659 ; RV32I-NEXT: mv a0, s0
660 ; RV32I-NEXT: call __umodsi3
661 ; RV32I-NEXT: sh a0, 6(s2)
662 ; RV32I-NEXT: sh s1, 4(s2)
663 ; RV32I-NEXT: sh s3, 2(s2)
664 ; RV32I-NEXT: sh zero, 0(s2)
665 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
666 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
667 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
668 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
669 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
670 ; RV32I-NEXT: addi sp, sp, 32
673 ; RV32IM-LABEL: dont_fold_urem_one:
675 ; RV32IM-NEXT: lhu a2, 12(a1)
676 ; RV32IM-NEXT: lhu a3, 4(a1)
677 ; RV32IM-NEXT: lhu a1, 8(a1)
678 ; RV32IM-NEXT: lui a4, 1603
679 ; RV32IM-NEXT: addi a4, a4, 1341
680 ; RV32IM-NEXT: mulhu a4, a3, a4
681 ; RV32IM-NEXT: li a5, 654
682 ; RV32IM-NEXT: mul a4, a4, a5
683 ; RV32IM-NEXT: sub a3, a3, a4
684 ; RV32IM-NEXT: lui a4, 45590
685 ; RV32IM-NEXT: addi a4, a4, 1069
686 ; RV32IM-NEXT: mulhu a4, a1, a4
687 ; RV32IM-NEXT: li a5, 23
688 ; RV32IM-NEXT: mul a4, a4, a5
689 ; RV32IM-NEXT: sub a1, a1, a4
690 ; RV32IM-NEXT: lui a4, 193
691 ; RV32IM-NEXT: addi a4, a4, 1464
692 ; RV32IM-NEXT: mulhu a4, a2, a4
693 ; RV32IM-NEXT: lui a5, 1
694 ; RV32IM-NEXT: addi a5, a5, 1327
695 ; RV32IM-NEXT: mul a4, a4, a5
696 ; RV32IM-NEXT: sub a2, a2, a4
697 ; RV32IM-NEXT: sh zero, 0(a0)
698 ; RV32IM-NEXT: sh a2, 6(a0)
699 ; RV32IM-NEXT: sh a1, 4(a0)
700 ; RV32IM-NEXT: sh a3, 2(a0)
703 ; RV64I-LABEL: dont_fold_urem_one:
705 ; RV64I-NEXT: addi sp, sp, -48
706 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
707 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
708 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
709 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
710 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
711 ; RV64I-NEXT: lhu s0, 24(a1)
712 ; RV64I-NEXT: lhu s1, 16(a1)
713 ; RV64I-NEXT: lhu a2, 8(a1)
714 ; RV64I-NEXT: mv s2, a0
715 ; RV64I-NEXT: li a1, 654
716 ; RV64I-NEXT: mv a0, a2
717 ; RV64I-NEXT: call __umoddi3
718 ; RV64I-NEXT: mv s3, a0
719 ; RV64I-NEXT: li a1, 23
720 ; RV64I-NEXT: mv a0, s1
721 ; RV64I-NEXT: call __umoddi3
722 ; RV64I-NEXT: mv s1, a0
723 ; RV64I-NEXT: lui a0, 1
724 ; RV64I-NEXT: addiw a1, a0, 1327
725 ; RV64I-NEXT: mv a0, s0
726 ; RV64I-NEXT: call __umoddi3
727 ; RV64I-NEXT: sh a0, 6(s2)
728 ; RV64I-NEXT: sh s1, 4(s2)
729 ; RV64I-NEXT: sh s3, 2(s2)
730 ; RV64I-NEXT: sh zero, 0(s2)
731 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
732 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
733 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
734 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
735 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
736 ; RV64I-NEXT: addi sp, sp, 48
739 ; RV64IM-LABEL: dont_fold_urem_one:
741 ; RV64IM-NEXT: lhu a2, 8(a1)
742 ; RV64IM-NEXT: lui a3, %hi(.LCPI4_0)
743 ; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3)
744 ; RV64IM-NEXT: lhu a4, 24(a1)
745 ; RV64IM-NEXT: lhu a1, 16(a1)
746 ; RV64IM-NEXT: mulhu a3, a2, a3
747 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1)
748 ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5)
749 ; RV64IM-NEXT: li a6, 654
750 ; RV64IM-NEXT: mul a3, a3, a6
751 ; RV64IM-NEXT: subw a2, a2, a3
752 ; RV64IM-NEXT: mulhu a3, a1, a5
753 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2)
754 ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5)
755 ; RV64IM-NEXT: li a6, 23
756 ; RV64IM-NEXT: mul a3, a3, a6
757 ; RV64IM-NEXT: subw a1, a1, a3
758 ; RV64IM-NEXT: mulhu a3, a4, a5
759 ; RV64IM-NEXT: lui a5, 1
760 ; RV64IM-NEXT: addi a5, a5, 1327
761 ; RV64IM-NEXT: mul a3, a3, a5
762 ; RV64IM-NEXT: subw a4, a4, a3
763 ; RV64IM-NEXT: sh zero, 0(a0)
764 ; RV64IM-NEXT: sh a4, 6(a0)
765 ; RV64IM-NEXT: sh a1, 4(a0)
766 ; RV64IM-NEXT: sh a2, 2(a0)
768 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
772 ; Don't fold if the divisor is 2^16.
773 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
774 ; CHECK-LABEL: dont_fold_urem_i16_smax:
777 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423>
781 ; Don't fold i64 urem.
782 define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
783 ; RV32I-LABEL: dont_fold_urem_i64:
785 ; RV32I-NEXT: addi sp, sp, -48
786 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
787 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
788 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
789 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
790 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
791 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
792 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
793 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
794 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
795 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
796 ; RV32I-NEXT: lw s0, 24(a1)
797 ; RV32I-NEXT: lw s1, 28(a1)
798 ; RV32I-NEXT: lw s2, 16(a1)
799 ; RV32I-NEXT: lw s3, 20(a1)
800 ; RV32I-NEXT: lw s4, 8(a1)
801 ; RV32I-NEXT: lw s5, 12(a1)
802 ; RV32I-NEXT: lw a3, 0(a1)
803 ; RV32I-NEXT: lw a1, 4(a1)
804 ; RV32I-NEXT: mv s6, a0
805 ; RV32I-NEXT: li a2, 1
806 ; RV32I-NEXT: mv a0, a3
807 ; RV32I-NEXT: li a3, 0
808 ; RV32I-NEXT: call __umoddi3
809 ; RV32I-NEXT: mv s7, a0
810 ; RV32I-NEXT: mv s8, a1
811 ; RV32I-NEXT: li a2, 654
812 ; RV32I-NEXT: mv a0, s4
813 ; RV32I-NEXT: mv a1, s5
814 ; RV32I-NEXT: li a3, 0
815 ; RV32I-NEXT: call __umoddi3
816 ; RV32I-NEXT: mv s4, a0
817 ; RV32I-NEXT: mv s5, a1
818 ; RV32I-NEXT: li a2, 23
819 ; RV32I-NEXT: mv a0, s2
820 ; RV32I-NEXT: mv a1, s3
821 ; RV32I-NEXT: li a3, 0
822 ; RV32I-NEXT: call __umoddi3
823 ; RV32I-NEXT: mv s2, a0
824 ; RV32I-NEXT: mv s3, a1
825 ; RV32I-NEXT: lui a0, 1
826 ; RV32I-NEXT: addi a2, a0, 1327
827 ; RV32I-NEXT: mv a0, s0
828 ; RV32I-NEXT: mv a1, s1
829 ; RV32I-NEXT: li a3, 0
830 ; RV32I-NEXT: call __umoddi3
831 ; RV32I-NEXT: sw a1, 28(s6)
832 ; RV32I-NEXT: sw a0, 24(s6)
833 ; RV32I-NEXT: sw s3, 20(s6)
834 ; RV32I-NEXT: sw s2, 16(s6)
835 ; RV32I-NEXT: sw s5, 12(s6)
836 ; RV32I-NEXT: sw s4, 8(s6)
837 ; RV32I-NEXT: sw s8, 4(s6)
838 ; RV32I-NEXT: sw s7, 0(s6)
839 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
840 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
841 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
842 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
843 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
844 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
845 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
846 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
847 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
848 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
849 ; RV32I-NEXT: addi sp, sp, 48
852 ; RV32IM-LABEL: dont_fold_urem_i64:
854 ; RV32IM-NEXT: addi sp, sp, -48
855 ; RV32IM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
856 ; RV32IM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
857 ; RV32IM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
858 ; RV32IM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
859 ; RV32IM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
860 ; RV32IM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
861 ; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
862 ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
863 ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
864 ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
865 ; RV32IM-NEXT: lw s0, 24(a1)
866 ; RV32IM-NEXT: lw s1, 28(a1)
867 ; RV32IM-NEXT: lw s2, 16(a1)
868 ; RV32IM-NEXT: lw s3, 20(a1)
869 ; RV32IM-NEXT: lw s4, 8(a1)
870 ; RV32IM-NEXT: lw s5, 12(a1)
871 ; RV32IM-NEXT: lw a3, 0(a1)
872 ; RV32IM-NEXT: lw a1, 4(a1)
873 ; RV32IM-NEXT: mv s6, a0
874 ; RV32IM-NEXT: li a2, 1
875 ; RV32IM-NEXT: mv a0, a3
876 ; RV32IM-NEXT: li a3, 0
877 ; RV32IM-NEXT: call __umoddi3
878 ; RV32IM-NEXT: mv s7, a0
879 ; RV32IM-NEXT: mv s8, a1
880 ; RV32IM-NEXT: li a2, 654
881 ; RV32IM-NEXT: mv a0, s4
882 ; RV32IM-NEXT: mv a1, s5
883 ; RV32IM-NEXT: li a3, 0
884 ; RV32IM-NEXT: call __umoddi3
885 ; RV32IM-NEXT: mv s4, a0
886 ; RV32IM-NEXT: mv s5, a1
887 ; RV32IM-NEXT: li a2, 23
888 ; RV32IM-NEXT: mv a0, s2
889 ; RV32IM-NEXT: mv a1, s3
890 ; RV32IM-NEXT: li a3, 0
891 ; RV32IM-NEXT: call __umoddi3
892 ; RV32IM-NEXT: mv s2, a0
893 ; RV32IM-NEXT: mv s3, a1
894 ; RV32IM-NEXT: lui a0, 1
895 ; RV32IM-NEXT: addi a2, a0, 1327
896 ; RV32IM-NEXT: mv a0, s0
897 ; RV32IM-NEXT: mv a1, s1
898 ; RV32IM-NEXT: li a3, 0
899 ; RV32IM-NEXT: call __umoddi3
900 ; RV32IM-NEXT: sw a1, 28(s6)
901 ; RV32IM-NEXT: sw a0, 24(s6)
902 ; RV32IM-NEXT: sw s3, 20(s6)
903 ; RV32IM-NEXT: sw s2, 16(s6)
904 ; RV32IM-NEXT: sw s5, 12(s6)
905 ; RV32IM-NEXT: sw s4, 8(s6)
906 ; RV32IM-NEXT: sw s8, 4(s6)
907 ; RV32IM-NEXT: sw s7, 0(s6)
908 ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
909 ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
910 ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
911 ; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
912 ; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
913 ; RV32IM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
914 ; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
915 ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
916 ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
917 ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
918 ; RV32IM-NEXT: addi sp, sp, 48
921 ; RV64I-LABEL: dont_fold_urem_i64:
923 ; RV64I-NEXT: addi sp, sp, -48
924 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
925 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
926 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
927 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
928 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
929 ; RV64I-NEXT: ld s0, 24(a1)
930 ; RV64I-NEXT: ld s1, 16(a1)
931 ; RV64I-NEXT: ld a2, 8(a1)
932 ; RV64I-NEXT: mv s2, a0
933 ; RV64I-NEXT: li a1, 654
934 ; RV64I-NEXT: mv a0, a2
935 ; RV64I-NEXT: call __umoddi3
936 ; RV64I-NEXT: mv s3, a0
937 ; RV64I-NEXT: li a1, 23
938 ; RV64I-NEXT: mv a0, s1
939 ; RV64I-NEXT: call __umoddi3
940 ; RV64I-NEXT: mv s1, a0
941 ; RV64I-NEXT: lui a0, 1
942 ; RV64I-NEXT: addiw a1, a0, 1327
943 ; RV64I-NEXT: mv a0, s0
944 ; RV64I-NEXT: call __umoddi3
945 ; RV64I-NEXT: sd a0, 24(s2)
946 ; RV64I-NEXT: sd s1, 16(s2)
947 ; RV64I-NEXT: sd s3, 8(s2)
948 ; RV64I-NEXT: sd zero, 0(s2)
949 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
950 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
951 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
952 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
953 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
954 ; RV64I-NEXT: addi sp, sp, 48
957 ; RV64IM-LABEL: dont_fold_urem_i64:
959 ; RV64IM-NEXT: ld a2, 16(a1)
960 ; RV64IM-NEXT: lui a3, %hi(.LCPI6_0)
961 ; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3)
962 ; RV64IM-NEXT: ld a4, 24(a1)
963 ; RV64IM-NEXT: ld a1, 8(a1)
964 ; RV64IM-NEXT: mulhu a3, a2, a3
965 ; RV64IM-NEXT: sub a5, a2, a3
966 ; RV64IM-NEXT: srli a5, a5, 1
967 ; RV64IM-NEXT: add a3, a5, a3
968 ; RV64IM-NEXT: srli a3, a3, 4
969 ; RV64IM-NEXT: li a5, 23
970 ; RV64IM-NEXT: lui a6, %hi(.LCPI6_1)
971 ; RV64IM-NEXT: ld a6, %lo(.LCPI6_1)(a6)
972 ; RV64IM-NEXT: mul a3, a3, a5
973 ; RV64IM-NEXT: sub a2, a2, a3
974 ; RV64IM-NEXT: srli a3, a1, 1
975 ; RV64IM-NEXT: mulhu a3, a3, a6
976 ; RV64IM-NEXT: srli a3, a3, 7
977 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2)
978 ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5)
979 ; RV64IM-NEXT: li a6, 654
980 ; RV64IM-NEXT: mul a3, a3, a6
981 ; RV64IM-NEXT: sub a1, a1, a3
982 ; RV64IM-NEXT: mulhu a3, a4, a5
983 ; RV64IM-NEXT: srli a3, a3, 12
984 ; RV64IM-NEXT: lui a5, 1
985 ; RV64IM-NEXT: addiw a5, a5, 1327
986 ; RV64IM-NEXT: mul a3, a3, a5
987 ; RV64IM-NEXT: sub a4, a4, a3
988 ; RV64IM-NEXT: sd zero, 0(a0)
989 ; RV64IM-NEXT: sd a4, 24(a0)
990 ; RV64IM-NEXT: sd a1, 8(a0)
991 ; RV64IM-NEXT: sd a2, 16(a0)
993 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>