1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
12 define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
13 ; RV32I-LABEL: fold_urem_vec_1:
15 ; RV32I-NEXT: addi sp, sp, -32
16 ; RV32I-NEXT: .cfi_def_cfa_offset 32
17 ; RV32I-NEXT: sw ra, 28(sp)
18 ; RV32I-NEXT: sw s0, 24(sp)
19 ; RV32I-NEXT: sw s1, 20(sp)
20 ; RV32I-NEXT: sw s2, 16(sp)
21 ; RV32I-NEXT: sw s3, 12(sp)
22 ; RV32I-NEXT: sw s4, 8(sp)
23 ; RV32I-NEXT: sw s5, 4(sp)
24 ; RV32I-NEXT: .cfi_offset ra, -4
25 ; RV32I-NEXT: .cfi_offset s0, -8
26 ; RV32I-NEXT: .cfi_offset s1, -12
27 ; RV32I-NEXT: .cfi_offset s2, -16
28 ; RV32I-NEXT: .cfi_offset s3, -20
29 ; RV32I-NEXT: .cfi_offset s4, -24
30 ; RV32I-NEXT: .cfi_offset s5, -28
31 ; RV32I-NEXT: lhu s2, 12(a1)
32 ; RV32I-NEXT: lhu s3, 8(a1)
33 ; RV32I-NEXT: lhu s0, 4(a1)
34 ; RV32I-NEXT: lhu a2, 0(a1)
35 ; RV32I-NEXT: mv s1, a0
36 ; RV32I-NEXT: addi a1, zero, 95
37 ; RV32I-NEXT: mv a0, a2
38 ; RV32I-NEXT: call __umodsi3
39 ; RV32I-NEXT: mv s4, a0
40 ; RV32I-NEXT: addi a1, zero, 124
41 ; RV32I-NEXT: mv a0, s0
42 ; RV32I-NEXT: call __umodsi3
43 ; RV32I-NEXT: mv s5, a0
44 ; RV32I-NEXT: addi a1, zero, 98
45 ; RV32I-NEXT: mv a0, s3
46 ; RV32I-NEXT: call __umodsi3
47 ; RV32I-NEXT: mv s0, a0
48 ; RV32I-NEXT: addi a1, zero, 1003
49 ; RV32I-NEXT: mv a0, s2
50 ; RV32I-NEXT: call __umodsi3
51 ; RV32I-NEXT: sh a0, 6(s1)
52 ; RV32I-NEXT: sh s0, 4(s1)
53 ; RV32I-NEXT: sh s5, 2(s1)
54 ; RV32I-NEXT: sh s4, 0(s1)
55 ; RV32I-NEXT: lw s5, 4(sp)
56 ; RV32I-NEXT: lw s4, 8(sp)
57 ; RV32I-NEXT: lw s3, 12(sp)
58 ; RV32I-NEXT: lw s2, 16(sp)
59 ; RV32I-NEXT: lw s1, 20(sp)
60 ; RV32I-NEXT: lw s0, 24(sp)
61 ; RV32I-NEXT: lw ra, 28(sp)
62 ; RV32I-NEXT: .cfi_restore ra
63 ; RV32I-NEXT: .cfi_restore s0
64 ; RV32I-NEXT: .cfi_restore s1
65 ; RV32I-NEXT: .cfi_restore s2
66 ; RV32I-NEXT: .cfi_restore s3
67 ; RV32I-NEXT: .cfi_restore s4
68 ; RV32I-NEXT: .cfi_restore s5
69 ; RV32I-NEXT: addi sp, sp, 32
70 ; RV32I-NEXT: .cfi_def_cfa_offset 0
73 ; RV32IM-LABEL: fold_urem_vec_1:
75 ; RV32IM-NEXT: lhu a6, 12(a1)
76 ; RV32IM-NEXT: lhu a3, 8(a1)
77 ; RV32IM-NEXT: lhu a4, 0(a1)
78 ; RV32IM-NEXT: lhu a1, 4(a1)
79 ; RV32IM-NEXT: lui a5, 364242
80 ; RV32IM-NEXT: addi a5, a5, 777
81 ; RV32IM-NEXT: mulhu a5, a4, a5
82 ; RV32IM-NEXT: sub a2, a4, a5
83 ; RV32IM-NEXT: srli a2, a2, 1
84 ; RV32IM-NEXT: add a2, a2, a5
85 ; RV32IM-NEXT: srli a2, a2, 6
86 ; RV32IM-NEXT: addi a5, zero, 95
87 ; RV32IM-NEXT: mul a2, a2, a5
88 ; RV32IM-NEXT: sub a2, a4, a2
89 ; RV32IM-NEXT: srli a4, a1, 2
90 ; RV32IM-NEXT: lui a5, 135300
91 ; RV32IM-NEXT: addi a5, a5, 529
92 ; RV32IM-NEXT: mulhu a4, a4, a5
93 ; RV32IM-NEXT: srli a4, a4, 2
94 ; RV32IM-NEXT: addi a5, zero, 124
95 ; RV32IM-NEXT: mul a4, a4, a5
96 ; RV32IM-NEXT: sub a1, a1, a4
97 ; RV32IM-NEXT: lui a4, 342392
98 ; RV32IM-NEXT: addi a4, a4, 669
99 ; RV32IM-NEXT: mulhu a4, a3, a4
100 ; RV32IM-NEXT: srli a4, a4, 5
101 ; RV32IM-NEXT: addi a5, zero, 98
102 ; RV32IM-NEXT: mul a4, a4, a5
103 ; RV32IM-NEXT: sub a3, a3, a4
104 ; RV32IM-NEXT: lui a4, 267633
105 ; RV32IM-NEXT: addi a4, a4, -1809
106 ; RV32IM-NEXT: mulhu a4, a6, a4
107 ; RV32IM-NEXT: srli a4, a4, 8
108 ; RV32IM-NEXT: addi a5, zero, 1003
109 ; RV32IM-NEXT: mul a4, a4, a5
110 ; RV32IM-NEXT: sub a4, a6, a4
111 ; RV32IM-NEXT: sh a4, 6(a0)
112 ; RV32IM-NEXT: sh a3, 4(a0)
113 ; RV32IM-NEXT: sh a1, 2(a0)
114 ; RV32IM-NEXT: sh a2, 0(a0)
115 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
118 ; RV64I-LABEL: fold_urem_vec_1:
120 ; RV64I-NEXT: addi sp, sp, -64
121 ; RV64I-NEXT: .cfi_def_cfa_offset 64
122 ; RV64I-NEXT: sd ra, 56(sp)
123 ; RV64I-NEXT: sd s0, 48(sp)
124 ; RV64I-NEXT: sd s1, 40(sp)
125 ; RV64I-NEXT: sd s2, 32(sp)
126 ; RV64I-NEXT: sd s3, 24(sp)
127 ; RV64I-NEXT: sd s4, 16(sp)
128 ; RV64I-NEXT: sd s5, 8(sp)
129 ; RV64I-NEXT: .cfi_offset ra, -8
130 ; RV64I-NEXT: .cfi_offset s0, -16
131 ; RV64I-NEXT: .cfi_offset s1, -24
132 ; RV64I-NEXT: .cfi_offset s2, -32
133 ; RV64I-NEXT: .cfi_offset s3, -40
134 ; RV64I-NEXT: .cfi_offset s4, -48
135 ; RV64I-NEXT: .cfi_offset s5, -56
136 ; RV64I-NEXT: lhu s2, 24(a1)
137 ; RV64I-NEXT: lhu s3, 16(a1)
138 ; RV64I-NEXT: lhu s0, 8(a1)
139 ; RV64I-NEXT: lhu a2, 0(a1)
140 ; RV64I-NEXT: mv s1, a0
141 ; RV64I-NEXT: addi a1, zero, 95
142 ; RV64I-NEXT: mv a0, a2
143 ; RV64I-NEXT: call __umoddi3
144 ; RV64I-NEXT: mv s4, a0
145 ; RV64I-NEXT: addi a1, zero, 124
146 ; RV64I-NEXT: mv a0, s0
147 ; RV64I-NEXT: call __umoddi3
148 ; RV64I-NEXT: mv s5, a0
149 ; RV64I-NEXT: addi a1, zero, 98
150 ; RV64I-NEXT: mv a0, s3
151 ; RV64I-NEXT: call __umoddi3
152 ; RV64I-NEXT: mv s0, a0
153 ; RV64I-NEXT: addi a1, zero, 1003
154 ; RV64I-NEXT: mv a0, s2
155 ; RV64I-NEXT: call __umoddi3
156 ; RV64I-NEXT: sh a0, 6(s1)
157 ; RV64I-NEXT: sh s0, 4(s1)
158 ; RV64I-NEXT: sh s5, 2(s1)
159 ; RV64I-NEXT: sh s4, 0(s1)
160 ; RV64I-NEXT: ld s5, 8(sp)
161 ; RV64I-NEXT: ld s4, 16(sp)
162 ; RV64I-NEXT: ld s3, 24(sp)
163 ; RV64I-NEXT: ld s2, 32(sp)
164 ; RV64I-NEXT: ld s1, 40(sp)
165 ; RV64I-NEXT: ld s0, 48(sp)
166 ; RV64I-NEXT: ld ra, 56(sp)
167 ; RV64I-NEXT: .cfi_restore ra
168 ; RV64I-NEXT: .cfi_restore s0
169 ; RV64I-NEXT: .cfi_restore s1
170 ; RV64I-NEXT: .cfi_restore s2
171 ; RV64I-NEXT: .cfi_restore s3
172 ; RV64I-NEXT: .cfi_restore s4
173 ; RV64I-NEXT: .cfi_restore s5
174 ; RV64I-NEXT: addi sp, sp, 64
175 ; RV64I-NEXT: .cfi_def_cfa_offset 0
178 ; RV64IM-LABEL: fold_urem_vec_1:
180 ; RV64IM-NEXT: lhu a6, 24(a1)
181 ; RV64IM-NEXT: lhu a3, 16(a1)
182 ; RV64IM-NEXT: lhu a4, 8(a1)
183 ; RV64IM-NEXT: lhu a1, 0(a1)
184 ; RV64IM-NEXT: lui a5, 1423
185 ; RV64IM-NEXT: addiw a5, a5, -733
186 ; RV64IM-NEXT: slli a5, a5, 15
187 ; RV64IM-NEXT: addi a5, a5, 1035
188 ; RV64IM-NEXT: slli a5, a5, 13
189 ; RV64IM-NEXT: addi a5, a5, -1811
190 ; RV64IM-NEXT: slli a5, a5, 12
191 ; RV64IM-NEXT: addi a5, a5, 561
192 ; RV64IM-NEXT: mulhu a5, a1, a5
193 ; RV64IM-NEXT: sub a2, a1, a5
194 ; RV64IM-NEXT: srli a2, a2, 1
195 ; RV64IM-NEXT: add a2, a2, a5
196 ; RV64IM-NEXT: srli a2, a2, 6
197 ; RV64IM-NEXT: addi a5, zero, 95
198 ; RV64IM-NEXT: mul a2, a2, a5
199 ; RV64IM-NEXT: sub a1, a1, a2
200 ; RV64IM-NEXT: srli a2, a4, 2
201 ; RV64IM-NEXT: lui a5, 264
202 ; RV64IM-NEXT: addiw a5, a5, 1057
203 ; RV64IM-NEXT: slli a5, a5, 15
204 ; RV64IM-NEXT: addi a5, a5, 1057
205 ; RV64IM-NEXT: slli a5, a5, 15
206 ; RV64IM-NEXT: addi a5, a5, 1057
207 ; RV64IM-NEXT: slli a5, a5, 12
208 ; RV64IM-NEXT: addi a5, a5, 133
209 ; RV64IM-NEXT: mulhu a2, a2, a5
210 ; RV64IM-NEXT: srli a2, a2, 3
211 ; RV64IM-NEXT: addi a5, zero, 124
212 ; RV64IM-NEXT: mul a2, a2, a5
213 ; RV64IM-NEXT: sub a2, a4, a2
214 ; RV64IM-NEXT: srli a4, a3, 1
215 ; RV64IM-NEXT: lui a5, 2675
216 ; RV64IM-NEXT: addiw a5, a5, -251
217 ; RV64IM-NEXT: slli a5, a5, 13
218 ; RV64IM-NEXT: addi a5, a5, 1839
219 ; RV64IM-NEXT: slli a5, a5, 13
220 ; RV64IM-NEXT: addi a5, a5, 167
221 ; RV64IM-NEXT: slli a5, a5, 13
222 ; RV64IM-NEXT: addi a5, a5, 1505
223 ; RV64IM-NEXT: mulhu a4, a4, a5
224 ; RV64IM-NEXT: srli a4, a4, 4
225 ; RV64IM-NEXT: addi a5, zero, 98
226 ; RV64IM-NEXT: mul a4, a4, a5
227 ; RV64IM-NEXT: sub a3, a3, a4
228 ; RV64IM-NEXT: lui a4, 8364
229 ; RV64IM-NEXT: addiw a4, a4, -1977
230 ; RV64IM-NEXT: slli a4, a4, 12
231 ; RV64IM-NEXT: addi a4, a4, 1907
232 ; RV64IM-NEXT: slli a4, a4, 12
233 ; RV64IM-NEXT: addi a4, a4, 453
234 ; RV64IM-NEXT: slli a4, a4, 12
235 ; RV64IM-NEXT: addi a4, a4, 1213
236 ; RV64IM-NEXT: mulhu a4, a6, a4
237 ; RV64IM-NEXT: srli a4, a4, 7
238 ; RV64IM-NEXT: addi a5, zero, 1003
239 ; RV64IM-NEXT: mul a4, a4, a5
240 ; RV64IM-NEXT: sub a4, a6, a4
241 ; RV64IM-NEXT: sh a4, 6(a0)
242 ; RV64IM-NEXT: sh a3, 4(a0)
243 ; RV64IM-NEXT: sh a2, 2(a0)
244 ; RV64IM-NEXT: sh a1, 0(a0)
245 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
247 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
251 define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
252 ; RV32I-LABEL: fold_urem_vec_2:
254 ; RV32I-NEXT: addi sp, sp, -32
255 ; RV32I-NEXT: .cfi_def_cfa_offset 32
256 ; RV32I-NEXT: sw ra, 28(sp)
257 ; RV32I-NEXT: sw s0, 24(sp)
258 ; RV32I-NEXT: sw s1, 20(sp)
259 ; RV32I-NEXT: sw s2, 16(sp)
260 ; RV32I-NEXT: sw s3, 12(sp)
261 ; RV32I-NEXT: sw s4, 8(sp)
262 ; RV32I-NEXT: sw s5, 4(sp)
263 ; RV32I-NEXT: .cfi_offset ra, -4
264 ; RV32I-NEXT: .cfi_offset s0, -8
265 ; RV32I-NEXT: .cfi_offset s1, -12
266 ; RV32I-NEXT: .cfi_offset s2, -16
267 ; RV32I-NEXT: .cfi_offset s3, -20
268 ; RV32I-NEXT: .cfi_offset s4, -24
269 ; RV32I-NEXT: .cfi_offset s5, -28
270 ; RV32I-NEXT: lhu s2, 12(a1)
271 ; RV32I-NEXT: lhu s3, 8(a1)
272 ; RV32I-NEXT: lhu s0, 4(a1)
273 ; RV32I-NEXT: lhu a2, 0(a1)
274 ; RV32I-NEXT: mv s1, a0
275 ; RV32I-NEXT: addi a1, zero, 95
276 ; RV32I-NEXT: mv a0, a2
277 ; RV32I-NEXT: call __umodsi3
278 ; RV32I-NEXT: mv s4, a0
279 ; RV32I-NEXT: addi a1, zero, 95
280 ; RV32I-NEXT: mv a0, s0
281 ; RV32I-NEXT: call __umodsi3
282 ; RV32I-NEXT: mv s5, a0
283 ; RV32I-NEXT: addi a1, zero, 95
284 ; RV32I-NEXT: mv a0, s3
285 ; RV32I-NEXT: call __umodsi3
286 ; RV32I-NEXT: mv s0, a0
287 ; RV32I-NEXT: addi a1, zero, 95
288 ; RV32I-NEXT: mv a0, s2
289 ; RV32I-NEXT: call __umodsi3
290 ; RV32I-NEXT: sh a0, 6(s1)
291 ; RV32I-NEXT: sh s0, 4(s1)
292 ; RV32I-NEXT: sh s5, 2(s1)
293 ; RV32I-NEXT: sh s4, 0(s1)
294 ; RV32I-NEXT: lw s5, 4(sp)
295 ; RV32I-NEXT: lw s4, 8(sp)
296 ; RV32I-NEXT: lw s3, 12(sp)
297 ; RV32I-NEXT: lw s2, 16(sp)
298 ; RV32I-NEXT: lw s1, 20(sp)
299 ; RV32I-NEXT: lw s0, 24(sp)
300 ; RV32I-NEXT: lw ra, 28(sp)
301 ; RV32I-NEXT: .cfi_restore ra
302 ; RV32I-NEXT: .cfi_restore s0
303 ; RV32I-NEXT: .cfi_restore s1
304 ; RV32I-NEXT: .cfi_restore s2
305 ; RV32I-NEXT: .cfi_restore s3
306 ; RV32I-NEXT: .cfi_restore s4
307 ; RV32I-NEXT: .cfi_restore s5
308 ; RV32I-NEXT: addi sp, sp, 32
309 ; RV32I-NEXT: .cfi_def_cfa_offset 0
312 ; RV32IM-LABEL: fold_urem_vec_2:
314 ; RV32IM-NEXT: lhu a6, 12(a1)
315 ; RV32IM-NEXT: lhu a7, 8(a1)
316 ; RV32IM-NEXT: lhu a4, 0(a1)
317 ; RV32IM-NEXT: lhu a1, 4(a1)
318 ; RV32IM-NEXT: lui a5, 364242
319 ; RV32IM-NEXT: addi a5, a5, 777
320 ; RV32IM-NEXT: mulhu a2, a4, a5
321 ; RV32IM-NEXT: sub a3, a4, a2
322 ; RV32IM-NEXT: srli a3, a3, 1
323 ; RV32IM-NEXT: add a2, a3, a2
324 ; RV32IM-NEXT: srli a2, a2, 6
325 ; RV32IM-NEXT: addi a3, zero, 95
326 ; RV32IM-NEXT: mul a2, a2, a3
327 ; RV32IM-NEXT: sub t0, a4, a2
328 ; RV32IM-NEXT: mulhu a4, a1, a5
329 ; RV32IM-NEXT: sub a2, a1, a4
330 ; RV32IM-NEXT: srli a2, a2, 1
331 ; RV32IM-NEXT: add a2, a2, a4
332 ; RV32IM-NEXT: srli a2, a2, 6
333 ; RV32IM-NEXT: mul a2, a2, a3
334 ; RV32IM-NEXT: sub a1, a1, a2
335 ; RV32IM-NEXT: mulhu a2, a7, a5
336 ; RV32IM-NEXT: sub a4, a7, a2
337 ; RV32IM-NEXT: srli a4, a4, 1
338 ; RV32IM-NEXT: add a2, a4, a2
339 ; RV32IM-NEXT: srli a2, a2, 6
340 ; RV32IM-NEXT: mul a2, a2, a3
341 ; RV32IM-NEXT: sub a2, a7, a2
342 ; RV32IM-NEXT: mulhu a4, a6, a5
343 ; RV32IM-NEXT: sub a5, a6, a4
344 ; RV32IM-NEXT: srli a5, a5, 1
345 ; RV32IM-NEXT: add a4, a5, a4
346 ; RV32IM-NEXT: srli a4, a4, 6
347 ; RV32IM-NEXT: mul a3, a4, a3
348 ; RV32IM-NEXT: sub a3, a6, a3
349 ; RV32IM-NEXT: sh a3, 6(a0)
350 ; RV32IM-NEXT: sh a2, 4(a0)
351 ; RV32IM-NEXT: sh a1, 2(a0)
352 ; RV32IM-NEXT: sh t0, 0(a0)
353 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
356 ; RV64I-LABEL: fold_urem_vec_2:
358 ; RV64I-NEXT: addi sp, sp, -64
359 ; RV64I-NEXT: .cfi_def_cfa_offset 64
360 ; RV64I-NEXT: sd ra, 56(sp)
361 ; RV64I-NEXT: sd s0, 48(sp)
362 ; RV64I-NEXT: sd s1, 40(sp)
363 ; RV64I-NEXT: sd s2, 32(sp)
364 ; RV64I-NEXT: sd s3, 24(sp)
365 ; RV64I-NEXT: sd s4, 16(sp)
366 ; RV64I-NEXT: sd s5, 8(sp)
367 ; RV64I-NEXT: .cfi_offset ra, -8
368 ; RV64I-NEXT: .cfi_offset s0, -16
369 ; RV64I-NEXT: .cfi_offset s1, -24
370 ; RV64I-NEXT: .cfi_offset s2, -32
371 ; RV64I-NEXT: .cfi_offset s3, -40
372 ; RV64I-NEXT: .cfi_offset s4, -48
373 ; RV64I-NEXT: .cfi_offset s5, -56
374 ; RV64I-NEXT: lhu s2, 24(a1)
375 ; RV64I-NEXT: lhu s3, 16(a1)
376 ; RV64I-NEXT: lhu s0, 8(a1)
377 ; RV64I-NEXT: lhu a2, 0(a1)
378 ; RV64I-NEXT: mv s1, a0
379 ; RV64I-NEXT: addi a1, zero, 95
380 ; RV64I-NEXT: mv a0, a2
381 ; RV64I-NEXT: call __umoddi3
382 ; RV64I-NEXT: mv s4, a0
383 ; RV64I-NEXT: addi a1, zero, 95
384 ; RV64I-NEXT: mv a0, s0
385 ; RV64I-NEXT: call __umoddi3
386 ; RV64I-NEXT: mv s5, a0
387 ; RV64I-NEXT: addi a1, zero, 95
388 ; RV64I-NEXT: mv a0, s3
389 ; RV64I-NEXT: call __umoddi3
390 ; RV64I-NEXT: mv s0, a0
391 ; RV64I-NEXT: addi a1, zero, 95
392 ; RV64I-NEXT: mv a0, s2
393 ; RV64I-NEXT: call __umoddi3
394 ; RV64I-NEXT: sh a0, 6(s1)
395 ; RV64I-NEXT: sh s0, 4(s1)
396 ; RV64I-NEXT: sh s5, 2(s1)
397 ; RV64I-NEXT: sh s4, 0(s1)
398 ; RV64I-NEXT: ld s5, 8(sp)
399 ; RV64I-NEXT: ld s4, 16(sp)
400 ; RV64I-NEXT: ld s3, 24(sp)
401 ; RV64I-NEXT: ld s2, 32(sp)
402 ; RV64I-NEXT: ld s1, 40(sp)
403 ; RV64I-NEXT: ld s0, 48(sp)
404 ; RV64I-NEXT: ld ra, 56(sp)
405 ; RV64I-NEXT: .cfi_restore ra
406 ; RV64I-NEXT: .cfi_restore s0
407 ; RV64I-NEXT: .cfi_restore s1
408 ; RV64I-NEXT: .cfi_restore s2
409 ; RV64I-NEXT: .cfi_restore s3
410 ; RV64I-NEXT: .cfi_restore s4
411 ; RV64I-NEXT: .cfi_restore s5
412 ; RV64I-NEXT: addi sp, sp, 64
413 ; RV64I-NEXT: .cfi_def_cfa_offset 0
416 ; RV64IM-LABEL: fold_urem_vec_2:
418 ; RV64IM-NEXT: lhu a6, 24(a1)
419 ; RV64IM-NEXT: lhu a7, 16(a1)
420 ; RV64IM-NEXT: lhu a4, 8(a1)
421 ; RV64IM-NEXT: lhu a1, 0(a1)
422 ; RV64IM-NEXT: lui a5, 1423
423 ; RV64IM-NEXT: addiw a5, a5, -733
424 ; RV64IM-NEXT: slli a5, a5, 15
425 ; RV64IM-NEXT: addi a5, a5, 1035
426 ; RV64IM-NEXT: slli a5, a5, 13
427 ; RV64IM-NEXT: addi a5, a5, -1811
428 ; RV64IM-NEXT: slli a5, a5, 12
429 ; RV64IM-NEXT: addi a5, a5, 561
430 ; RV64IM-NEXT: mulhu a2, a1, a5
431 ; RV64IM-NEXT: sub a3, a1, a2
432 ; RV64IM-NEXT: srli a3, a3, 1
433 ; RV64IM-NEXT: add a2, a3, a2
434 ; RV64IM-NEXT: srli a2, a2, 6
435 ; RV64IM-NEXT: addi a3, zero, 95
436 ; RV64IM-NEXT: mul a2, a2, a3
437 ; RV64IM-NEXT: sub t0, a1, a2
438 ; RV64IM-NEXT: mulhu a2, a4, a5
439 ; RV64IM-NEXT: sub a1, a4, a2
440 ; RV64IM-NEXT: srli a1, a1, 1
441 ; RV64IM-NEXT: add a1, a1, a2
442 ; RV64IM-NEXT: srli a1, a1, 6
443 ; RV64IM-NEXT: mul a1, a1, a3
444 ; RV64IM-NEXT: sub a1, a4, a1
445 ; RV64IM-NEXT: mulhu a2, a7, a5
446 ; RV64IM-NEXT: sub a4, a7, a2
447 ; RV64IM-NEXT: srli a4, a4, 1
448 ; RV64IM-NEXT: add a2, a4, a2
449 ; RV64IM-NEXT: srli a2, a2, 6
450 ; RV64IM-NEXT: mul a2, a2, a3
451 ; RV64IM-NEXT: sub a2, a7, a2
452 ; RV64IM-NEXT: mulhu a4, a6, a5
453 ; RV64IM-NEXT: sub a5, a6, a4
454 ; RV64IM-NEXT: srli a5, a5, 1
455 ; RV64IM-NEXT: add a4, a5, a4
456 ; RV64IM-NEXT: srli a4, a4, 6
457 ; RV64IM-NEXT: mul a3, a4, a3
458 ; RV64IM-NEXT: sub a3, a6, a3
459 ; RV64IM-NEXT: sh a3, 6(a0)
460 ; RV64IM-NEXT: sh a2, 4(a0)
461 ; RV64IM-NEXT: sh a1, 2(a0)
462 ; RV64IM-NEXT: sh t0, 0(a0)
463 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
465 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
470 ; Don't fold if we can combine urem with udiv.
471 define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
472 ; RV32I-LABEL: combine_urem_udiv:
474 ; RV32I-NEXT: addi sp, sp, -48
475 ; RV32I-NEXT: .cfi_def_cfa_offset 48
476 ; RV32I-NEXT: sw ra, 44(sp)
477 ; RV32I-NEXT: sw s0, 40(sp)
478 ; RV32I-NEXT: sw s1, 36(sp)
479 ; RV32I-NEXT: sw s2, 32(sp)
480 ; RV32I-NEXT: sw s3, 28(sp)
481 ; RV32I-NEXT: sw s4, 24(sp)
482 ; RV32I-NEXT: sw s5, 20(sp)
483 ; RV32I-NEXT: sw s6, 16(sp)
484 ; RV32I-NEXT: sw s7, 12(sp)
485 ; RV32I-NEXT: sw s8, 8(sp)
486 ; RV32I-NEXT: sw s9, 4(sp)
487 ; RV32I-NEXT: .cfi_offset ra, -4
488 ; RV32I-NEXT: .cfi_offset s0, -8
489 ; RV32I-NEXT: .cfi_offset s1, -12
490 ; RV32I-NEXT: .cfi_offset s2, -16
491 ; RV32I-NEXT: .cfi_offset s3, -20
492 ; RV32I-NEXT: .cfi_offset s4, -24
493 ; RV32I-NEXT: .cfi_offset s5, -28
494 ; RV32I-NEXT: .cfi_offset s6, -32
495 ; RV32I-NEXT: .cfi_offset s7, -36
496 ; RV32I-NEXT: .cfi_offset s8, -40
497 ; RV32I-NEXT: .cfi_offset s9, -44
498 ; RV32I-NEXT: lhu s2, 0(a1)
499 ; RV32I-NEXT: lhu s3, 4(a1)
500 ; RV32I-NEXT: lhu s4, 8(a1)
501 ; RV32I-NEXT: lhu s1, 12(a1)
502 ; RV32I-NEXT: mv s0, a0
503 ; RV32I-NEXT: addi a1, zero, 95
504 ; RV32I-NEXT: mv a0, s1
505 ; RV32I-NEXT: call __umodsi3
506 ; RV32I-NEXT: mv s5, a0
507 ; RV32I-NEXT: addi a1, zero, 95
508 ; RV32I-NEXT: mv a0, s4
509 ; RV32I-NEXT: call __umodsi3
510 ; RV32I-NEXT: mv s6, a0
511 ; RV32I-NEXT: addi a1, zero, 95
512 ; RV32I-NEXT: mv a0, s3
513 ; RV32I-NEXT: call __umodsi3
514 ; RV32I-NEXT: mv s7, a0
515 ; RV32I-NEXT: addi a1, zero, 95
516 ; RV32I-NEXT: mv a0, s2
517 ; RV32I-NEXT: call __umodsi3
518 ; RV32I-NEXT: mv s8, a0
519 ; RV32I-NEXT: addi a1, zero, 95
520 ; RV32I-NEXT: mv a0, s1
521 ; RV32I-NEXT: call __udivsi3
522 ; RV32I-NEXT: mv s9, a0
523 ; RV32I-NEXT: addi a1, zero, 95
524 ; RV32I-NEXT: mv a0, s4
525 ; RV32I-NEXT: call __udivsi3
526 ; RV32I-NEXT: mv s4, a0
527 ; RV32I-NEXT: addi a1, zero, 95
528 ; RV32I-NEXT: mv a0, s3
529 ; RV32I-NEXT: call __udivsi3
530 ; RV32I-NEXT: mv s1, a0
531 ; RV32I-NEXT: addi a1, zero, 95
532 ; RV32I-NEXT: mv a0, s2
533 ; RV32I-NEXT: call __udivsi3
534 ; RV32I-NEXT: add a0, s8, a0
535 ; RV32I-NEXT: add a1, s7, s1
536 ; RV32I-NEXT: add a2, s6, s4
537 ; RV32I-NEXT: add a3, s5, s9
538 ; RV32I-NEXT: sh a3, 6(s0)
539 ; RV32I-NEXT: sh a2, 4(s0)
540 ; RV32I-NEXT: sh a1, 2(s0)
541 ; RV32I-NEXT: sh a0, 0(s0)
542 ; RV32I-NEXT: lw s9, 4(sp)
543 ; RV32I-NEXT: lw s8, 8(sp)
544 ; RV32I-NEXT: lw s7, 12(sp)
545 ; RV32I-NEXT: lw s6, 16(sp)
546 ; RV32I-NEXT: lw s5, 20(sp)
547 ; RV32I-NEXT: lw s4, 24(sp)
548 ; RV32I-NEXT: lw s3, 28(sp)
549 ; RV32I-NEXT: lw s2, 32(sp)
550 ; RV32I-NEXT: lw s1, 36(sp)
551 ; RV32I-NEXT: lw s0, 40(sp)
552 ; RV32I-NEXT: lw ra, 44(sp)
553 ; RV32I-NEXT: .cfi_restore ra
554 ; RV32I-NEXT: .cfi_restore s0
555 ; RV32I-NEXT: .cfi_restore s1
556 ; RV32I-NEXT: .cfi_restore s2
557 ; RV32I-NEXT: .cfi_restore s3
558 ; RV32I-NEXT: .cfi_restore s4
559 ; RV32I-NEXT: .cfi_restore s5
560 ; RV32I-NEXT: .cfi_restore s6
561 ; RV32I-NEXT: .cfi_restore s7
562 ; RV32I-NEXT: .cfi_restore s8
563 ; RV32I-NEXT: .cfi_restore s9
564 ; RV32I-NEXT: addi sp, sp, 48
565 ; RV32I-NEXT: .cfi_def_cfa_offset 0
568 ; RV32IM-LABEL: combine_urem_udiv:
570 ; RV32IM-NEXT: lhu a6, 0(a1)
571 ; RV32IM-NEXT: lhu a7, 4(a1)
572 ; RV32IM-NEXT: lhu a4, 12(a1)
573 ; RV32IM-NEXT: lhu a1, 8(a1)
574 ; RV32IM-NEXT: lui a5, 364242
575 ; RV32IM-NEXT: addi a5, a5, 777
576 ; RV32IM-NEXT: mulhu a2, a4, a5
577 ; RV32IM-NEXT: sub a3, a4, a2
578 ; RV32IM-NEXT: srli a3, a3, 1
579 ; RV32IM-NEXT: add a2, a3, a2
580 ; RV32IM-NEXT: srli t3, a2, 6
581 ; RV32IM-NEXT: addi t0, zero, 95
582 ; RV32IM-NEXT: mul a3, t3, t0
583 ; RV32IM-NEXT: sub t1, a4, a3
584 ; RV32IM-NEXT: mulhu a4, a1, a5
585 ; RV32IM-NEXT: sub a3, a1, a4
586 ; RV32IM-NEXT: srli a3, a3, 1
587 ; RV32IM-NEXT: add a3, a3, a4
588 ; RV32IM-NEXT: srli a3, a3, 6
589 ; RV32IM-NEXT: mul a4, a3, t0
590 ; RV32IM-NEXT: sub t2, a1, a4
591 ; RV32IM-NEXT: mulhu a4, a7, a5
592 ; RV32IM-NEXT: sub a1, a7, a4
593 ; RV32IM-NEXT: srli a1, a1, 1
594 ; RV32IM-NEXT: add a1, a1, a4
595 ; RV32IM-NEXT: srli a1, a1, 6
596 ; RV32IM-NEXT: mul a4, a1, t0
597 ; RV32IM-NEXT: sub a4, a7, a4
598 ; RV32IM-NEXT: mulhu a5, a6, a5
599 ; RV32IM-NEXT: sub a2, a6, a5
600 ; RV32IM-NEXT: srli a2, a2, 1
601 ; RV32IM-NEXT: add a2, a2, a5
602 ; RV32IM-NEXT: srli a2, a2, 6
603 ; RV32IM-NEXT: mul a5, a2, t0
604 ; RV32IM-NEXT: sub a5, a6, a5
605 ; RV32IM-NEXT: add a2, a5, a2
606 ; RV32IM-NEXT: add a1, a4, a1
607 ; RV32IM-NEXT: add a3, t2, a3
608 ; RV32IM-NEXT: add a4, t1, t3
609 ; RV32IM-NEXT: sh a4, 6(a0)
610 ; RV32IM-NEXT: sh a3, 4(a0)
611 ; RV32IM-NEXT: sh a1, 2(a0)
612 ; RV32IM-NEXT: sh a2, 0(a0)
613 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
616 ; RV64I-LABEL: combine_urem_udiv:
618 ; RV64I-NEXT: addi sp, sp, -96
619 ; RV64I-NEXT: .cfi_def_cfa_offset 96
620 ; RV64I-NEXT: sd ra, 88(sp)
621 ; RV64I-NEXT: sd s0, 80(sp)
622 ; RV64I-NEXT: sd s1, 72(sp)
623 ; RV64I-NEXT: sd s2, 64(sp)
624 ; RV64I-NEXT: sd s3, 56(sp)
625 ; RV64I-NEXT: sd s4, 48(sp)
626 ; RV64I-NEXT: sd s5, 40(sp)
627 ; RV64I-NEXT: sd s6, 32(sp)
628 ; RV64I-NEXT: sd s7, 24(sp)
629 ; RV64I-NEXT: sd s8, 16(sp)
630 ; RV64I-NEXT: sd s9, 8(sp)
631 ; RV64I-NEXT: .cfi_offset ra, -8
632 ; RV64I-NEXT: .cfi_offset s0, -16
633 ; RV64I-NEXT: .cfi_offset s1, -24
634 ; RV64I-NEXT: .cfi_offset s2, -32
635 ; RV64I-NEXT: .cfi_offset s3, -40
636 ; RV64I-NEXT: .cfi_offset s4, -48
637 ; RV64I-NEXT: .cfi_offset s5, -56
638 ; RV64I-NEXT: .cfi_offset s6, -64
639 ; RV64I-NEXT: .cfi_offset s7, -72
640 ; RV64I-NEXT: .cfi_offset s8, -80
641 ; RV64I-NEXT: .cfi_offset s9, -88
642 ; RV64I-NEXT: lhu s2, 0(a1)
643 ; RV64I-NEXT: lhu s3, 8(a1)
644 ; RV64I-NEXT: lhu s4, 16(a1)
645 ; RV64I-NEXT: lhu s1, 24(a1)
646 ; RV64I-NEXT: mv s0, a0
647 ; RV64I-NEXT: addi a1, zero, 95
648 ; RV64I-NEXT: mv a0, s1
649 ; RV64I-NEXT: call __umoddi3
650 ; RV64I-NEXT: mv s5, a0
651 ; RV64I-NEXT: addi a1, zero, 95
652 ; RV64I-NEXT: mv a0, s4
653 ; RV64I-NEXT: call __umoddi3
654 ; RV64I-NEXT: mv s6, a0
655 ; RV64I-NEXT: addi a1, zero, 95
656 ; RV64I-NEXT: mv a0, s3
657 ; RV64I-NEXT: call __umoddi3
658 ; RV64I-NEXT: mv s7, a0
659 ; RV64I-NEXT: addi a1, zero, 95
660 ; RV64I-NEXT: mv a0, s2
661 ; RV64I-NEXT: call __umoddi3
662 ; RV64I-NEXT: mv s8, a0
663 ; RV64I-NEXT: addi a1, zero, 95
664 ; RV64I-NEXT: mv a0, s1
665 ; RV64I-NEXT: call __udivdi3
666 ; RV64I-NEXT: mv s9, a0
667 ; RV64I-NEXT: addi a1, zero, 95
668 ; RV64I-NEXT: mv a0, s4
669 ; RV64I-NEXT: call __udivdi3
670 ; RV64I-NEXT: mv s4, a0
671 ; RV64I-NEXT: addi a1, zero, 95
672 ; RV64I-NEXT: mv a0, s3
673 ; RV64I-NEXT: call __udivdi3
674 ; RV64I-NEXT: mv s1, a0
675 ; RV64I-NEXT: addi a1, zero, 95
676 ; RV64I-NEXT: mv a0, s2
677 ; RV64I-NEXT: call __udivdi3
678 ; RV64I-NEXT: add a0, s8, a0
679 ; RV64I-NEXT: add a1, s7, s1
680 ; RV64I-NEXT: add a2, s6, s4
681 ; RV64I-NEXT: add a3, s5, s9
682 ; RV64I-NEXT: sh a3, 6(s0)
683 ; RV64I-NEXT: sh a2, 4(s0)
684 ; RV64I-NEXT: sh a1, 2(s0)
685 ; RV64I-NEXT: sh a0, 0(s0)
686 ; RV64I-NEXT: ld s9, 8(sp)
687 ; RV64I-NEXT: ld s8, 16(sp)
688 ; RV64I-NEXT: ld s7, 24(sp)
689 ; RV64I-NEXT: ld s6, 32(sp)
690 ; RV64I-NEXT: ld s5, 40(sp)
691 ; RV64I-NEXT: ld s4, 48(sp)
692 ; RV64I-NEXT: ld s3, 56(sp)
693 ; RV64I-NEXT: ld s2, 64(sp)
694 ; RV64I-NEXT: ld s1, 72(sp)
695 ; RV64I-NEXT: ld s0, 80(sp)
696 ; RV64I-NEXT: ld ra, 88(sp)
697 ; RV64I-NEXT: .cfi_restore ra
698 ; RV64I-NEXT: .cfi_restore s0
699 ; RV64I-NEXT: .cfi_restore s1
700 ; RV64I-NEXT: .cfi_restore s2
701 ; RV64I-NEXT: .cfi_restore s3
702 ; RV64I-NEXT: .cfi_restore s4
703 ; RV64I-NEXT: .cfi_restore s5
704 ; RV64I-NEXT: .cfi_restore s6
705 ; RV64I-NEXT: .cfi_restore s7
706 ; RV64I-NEXT: .cfi_restore s8
707 ; RV64I-NEXT: .cfi_restore s9
708 ; RV64I-NEXT: addi sp, sp, 96
709 ; RV64I-NEXT: .cfi_def_cfa_offset 0
712 ; RV64IM-LABEL: combine_urem_udiv:
714 ; RV64IM-NEXT: lhu a6, 0(a1)
715 ; RV64IM-NEXT: lhu a7, 8(a1)
716 ; RV64IM-NEXT: lhu a4, 16(a1)
717 ; RV64IM-NEXT: lhu a1, 24(a1)
718 ; RV64IM-NEXT: lui a5, 1423
719 ; RV64IM-NEXT: addiw a5, a5, -733
720 ; RV64IM-NEXT: slli a5, a5, 15
721 ; RV64IM-NEXT: addi a5, a5, 1035
722 ; RV64IM-NEXT: slli a5, a5, 13
723 ; RV64IM-NEXT: addi a5, a5, -1811
724 ; RV64IM-NEXT: slli a5, a5, 12
725 ; RV64IM-NEXT: addi a5, a5, 561
726 ; RV64IM-NEXT: mulhu a2, a1, a5
727 ; RV64IM-NEXT: sub a3, a1, a2
728 ; RV64IM-NEXT: srli a3, a3, 1
729 ; RV64IM-NEXT: add a2, a3, a2
730 ; RV64IM-NEXT: srli t3, a2, 6
731 ; RV64IM-NEXT: addi t0, zero, 95
732 ; RV64IM-NEXT: mul a3, t3, t0
733 ; RV64IM-NEXT: sub t1, a1, a3
734 ; RV64IM-NEXT: mulhu a3, a4, a5
735 ; RV64IM-NEXT: sub a1, a4, a3
736 ; RV64IM-NEXT: srli a1, a1, 1
737 ; RV64IM-NEXT: add a1, a1, a3
738 ; RV64IM-NEXT: srli a1, a1, 6
739 ; RV64IM-NEXT: mul a3, a1, t0
740 ; RV64IM-NEXT: sub t2, a4, a3
741 ; RV64IM-NEXT: mulhu a4, a7, a5
742 ; RV64IM-NEXT: sub a3, a7, a4
743 ; RV64IM-NEXT: srli a3, a3, 1
744 ; RV64IM-NEXT: add a3, a3, a4
745 ; RV64IM-NEXT: srli a3, a3, 6
746 ; RV64IM-NEXT: mul a4, a3, t0
747 ; RV64IM-NEXT: sub a4, a7, a4
748 ; RV64IM-NEXT: mulhu a5, a6, a5
749 ; RV64IM-NEXT: sub a2, a6, a5
750 ; RV64IM-NEXT: srli a2, a2, 1
751 ; RV64IM-NEXT: add a2, a2, a5
752 ; RV64IM-NEXT: srli a2, a2, 6
753 ; RV64IM-NEXT: mul a5, a2, t0
754 ; RV64IM-NEXT: sub a5, a6, a5
755 ; RV64IM-NEXT: add a2, a5, a2
756 ; RV64IM-NEXT: add a3, a4, a3
757 ; RV64IM-NEXT: add a1, t2, a1
758 ; RV64IM-NEXT: add a4, t1, t3
759 ; RV64IM-NEXT: sh a4, 6(a0)
760 ; RV64IM-NEXT: sh a1, 4(a0)
761 ; RV64IM-NEXT: sh a3, 2(a0)
762 ; RV64IM-NEXT: sh a2, 0(a0)
763 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
765 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
766 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
767 %3 = add <4 x i16> %1, %2
771 ; Don't fold for divisors that are a power of two.
772 define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
773 ; RV32I-LABEL: dont_fold_urem_power_of_two:
775 ; RV32I-NEXT: addi sp, sp, -32
776 ; RV32I-NEXT: .cfi_def_cfa_offset 32
777 ; RV32I-NEXT: sw ra, 28(sp)
778 ; RV32I-NEXT: sw s0, 24(sp)
779 ; RV32I-NEXT: sw s1, 20(sp)
780 ; RV32I-NEXT: sw s2, 16(sp)
781 ; RV32I-NEXT: sw s3, 12(sp)
782 ; RV32I-NEXT: .cfi_offset ra, -4
783 ; RV32I-NEXT: .cfi_offset s0, -8
784 ; RV32I-NEXT: .cfi_offset s1, -12
785 ; RV32I-NEXT: .cfi_offset s2, -16
786 ; RV32I-NEXT: .cfi_offset s3, -20
787 ; RV32I-NEXT: lhu s2, 8(a1)
788 ; RV32I-NEXT: lhu s3, 4(a1)
789 ; RV32I-NEXT: lhu s1, 0(a1)
790 ; RV32I-NEXT: lhu a2, 12(a1)
791 ; RV32I-NEXT: mv s0, a0
792 ; RV32I-NEXT: addi a1, zero, 95
793 ; RV32I-NEXT: mv a0, a2
794 ; RV32I-NEXT: call __umodsi3
795 ; RV32I-NEXT: andi a1, s1, 63
796 ; RV32I-NEXT: andi a2, s3, 31
797 ; RV32I-NEXT: andi a3, s2, 7
798 ; RV32I-NEXT: sh a0, 6(s0)
799 ; RV32I-NEXT: sh a3, 4(s0)
800 ; RV32I-NEXT: sh a2, 2(s0)
801 ; RV32I-NEXT: sh a1, 0(s0)
802 ; RV32I-NEXT: lw s3, 12(sp)
803 ; RV32I-NEXT: lw s2, 16(sp)
804 ; RV32I-NEXT: lw s1, 20(sp)
805 ; RV32I-NEXT: lw s0, 24(sp)
806 ; RV32I-NEXT: lw ra, 28(sp)
807 ; RV32I-NEXT: .cfi_restore ra
808 ; RV32I-NEXT: .cfi_restore s0
809 ; RV32I-NEXT: .cfi_restore s1
810 ; RV32I-NEXT: .cfi_restore s2
811 ; RV32I-NEXT: .cfi_restore s3
812 ; RV32I-NEXT: addi sp, sp, 32
813 ; RV32I-NEXT: .cfi_def_cfa_offset 0
816 ; RV32IM-LABEL: dont_fold_urem_power_of_two:
818 ; RV32IM-NEXT: lhu a6, 8(a1)
819 ; RV32IM-NEXT: lhu a3, 4(a1)
820 ; RV32IM-NEXT: lhu a4, 12(a1)
821 ; RV32IM-NEXT: lhu a1, 0(a1)
822 ; RV32IM-NEXT: lui a5, 364242
823 ; RV32IM-NEXT: addi a5, a5, 777
824 ; RV32IM-NEXT: mulhu a5, a4, a5
825 ; RV32IM-NEXT: sub a2, a4, a5
826 ; RV32IM-NEXT: srli a2, a2, 1
827 ; RV32IM-NEXT: add a2, a2, a5
828 ; RV32IM-NEXT: srli a2, a2, 6
829 ; RV32IM-NEXT: addi a5, zero, 95
830 ; RV32IM-NEXT: mul a2, a2, a5
831 ; RV32IM-NEXT: sub a2, a4, a2
832 ; RV32IM-NEXT: andi a1, a1, 63
833 ; RV32IM-NEXT: andi a3, a3, 31
834 ; RV32IM-NEXT: andi a4, a6, 7
835 ; RV32IM-NEXT: sh a4, 4(a0)
836 ; RV32IM-NEXT: sh a3, 2(a0)
837 ; RV32IM-NEXT: sh a1, 0(a0)
838 ; RV32IM-NEXT: sh a2, 6(a0)
839 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
842 ; RV64I-LABEL: dont_fold_urem_power_of_two:
844 ; RV64I-NEXT: addi sp, sp, -48
845 ; RV64I-NEXT: .cfi_def_cfa_offset 48
846 ; RV64I-NEXT: sd ra, 40(sp)
847 ; RV64I-NEXT: sd s0, 32(sp)
848 ; RV64I-NEXT: sd s1, 24(sp)
849 ; RV64I-NEXT: sd s2, 16(sp)
850 ; RV64I-NEXT: sd s3, 8(sp)
851 ; RV64I-NEXT: .cfi_offset ra, -8
852 ; RV64I-NEXT: .cfi_offset s0, -16
853 ; RV64I-NEXT: .cfi_offset s1, -24
854 ; RV64I-NEXT: .cfi_offset s2, -32
855 ; RV64I-NEXT: .cfi_offset s3, -40
856 ; RV64I-NEXT: lhu s2, 16(a1)
857 ; RV64I-NEXT: lhu s3, 8(a1)
858 ; RV64I-NEXT: lhu s1, 0(a1)
859 ; RV64I-NEXT: lhu a2, 24(a1)
860 ; RV64I-NEXT: mv s0, a0
861 ; RV64I-NEXT: addi a1, zero, 95
862 ; RV64I-NEXT: mv a0, a2
863 ; RV64I-NEXT: call __umoddi3
864 ; RV64I-NEXT: andi a1, s1, 63
865 ; RV64I-NEXT: andi a2, s3, 31
866 ; RV64I-NEXT: andi a3, s2, 7
867 ; RV64I-NEXT: sh a0, 6(s0)
868 ; RV64I-NEXT: sh a3, 4(s0)
869 ; RV64I-NEXT: sh a2, 2(s0)
870 ; RV64I-NEXT: sh a1, 0(s0)
871 ; RV64I-NEXT: ld s3, 8(sp)
872 ; RV64I-NEXT: ld s2, 16(sp)
873 ; RV64I-NEXT: ld s1, 24(sp)
874 ; RV64I-NEXT: ld s0, 32(sp)
875 ; RV64I-NEXT: ld ra, 40(sp)
876 ; RV64I-NEXT: .cfi_restore ra
877 ; RV64I-NEXT: .cfi_restore s0
878 ; RV64I-NEXT: .cfi_restore s1
879 ; RV64I-NEXT: .cfi_restore s2
880 ; RV64I-NEXT: .cfi_restore s3
881 ; RV64I-NEXT: addi sp, sp, 48
882 ; RV64I-NEXT: .cfi_def_cfa_offset 0
885 ; RV64IM-LABEL: dont_fold_urem_power_of_two:
887 ; RV64IM-NEXT: lhu a6, 16(a1)
888 ; RV64IM-NEXT: lhu a3, 8(a1)
889 ; RV64IM-NEXT: lhu a4, 0(a1)
890 ; RV64IM-NEXT: lhu a1, 24(a1)
891 ; RV64IM-NEXT: lui a5, 1423
892 ; RV64IM-NEXT: addiw a5, a5, -733
893 ; RV64IM-NEXT: slli a5, a5, 15
894 ; RV64IM-NEXT: addi a5, a5, 1035
895 ; RV64IM-NEXT: slli a5, a5, 13
896 ; RV64IM-NEXT: addi a5, a5, -1811
897 ; RV64IM-NEXT: slli a5, a5, 12
898 ; RV64IM-NEXT: addi a5, a5, 561
899 ; RV64IM-NEXT: mulhu a5, a1, a5
900 ; RV64IM-NEXT: sub a2, a1, a5
901 ; RV64IM-NEXT: srli a2, a2, 1
902 ; RV64IM-NEXT: add a2, a2, a5
903 ; RV64IM-NEXT: srli a2, a2, 6
904 ; RV64IM-NEXT: addi a5, zero, 95
905 ; RV64IM-NEXT: mul a2, a2, a5
906 ; RV64IM-NEXT: sub a1, a1, a2
907 ; RV64IM-NEXT: andi a2, a4, 63
908 ; RV64IM-NEXT: andi a3, a3, 31
909 ; RV64IM-NEXT: andi a4, a6, 7
910 ; RV64IM-NEXT: sh a4, 4(a0)
911 ; RV64IM-NEXT: sh a3, 2(a0)
912 ; RV64IM-NEXT: sh a2, 0(a0)
913 ; RV64IM-NEXT: sh a1, 6(a0)
914 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
916 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
920 ; Don't fold if the divisor is one.
921 define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
922 ; RV32I-LABEL: dont_fold_urem_one:
924 ; RV32I-NEXT: addi sp, sp, -32
925 ; RV32I-NEXT: .cfi_def_cfa_offset 32
926 ; RV32I-NEXT: sw ra, 28(sp)
927 ; RV32I-NEXT: sw s0, 24(sp)
928 ; RV32I-NEXT: sw s1, 20(sp)
929 ; RV32I-NEXT: sw s2, 16(sp)
930 ; RV32I-NEXT: sw s3, 12(sp)
931 ; RV32I-NEXT: .cfi_offset ra, -4
932 ; RV32I-NEXT: .cfi_offset s0, -8
933 ; RV32I-NEXT: .cfi_offset s1, -12
934 ; RV32I-NEXT: .cfi_offset s2, -16
935 ; RV32I-NEXT: .cfi_offset s3, -20
936 ; RV32I-NEXT: lhu s2, 12(a1)
937 ; RV32I-NEXT: lhu s1, 8(a1)
938 ; RV32I-NEXT: lhu a2, 4(a1)
939 ; RV32I-NEXT: mv s0, a0
940 ; RV32I-NEXT: addi a1, zero, 654
941 ; RV32I-NEXT: mv a0, a2
942 ; RV32I-NEXT: call __umodsi3
943 ; RV32I-NEXT: mv s3, a0
944 ; RV32I-NEXT: addi a1, zero, 23
945 ; RV32I-NEXT: mv a0, s1
946 ; RV32I-NEXT: call __umodsi3
947 ; RV32I-NEXT: mv s1, a0
948 ; RV32I-NEXT: lui a0, 1
949 ; RV32I-NEXT: addi a1, a0, 1327
950 ; RV32I-NEXT: mv a0, s2
951 ; RV32I-NEXT: call __umodsi3
952 ; RV32I-NEXT: sh zero, 0(s0)
953 ; RV32I-NEXT: sh a0, 6(s0)
954 ; RV32I-NEXT: sh s1, 4(s0)
955 ; RV32I-NEXT: sh s3, 2(s0)
956 ; RV32I-NEXT: lw s3, 12(sp)
957 ; RV32I-NEXT: lw s2, 16(sp)
958 ; RV32I-NEXT: lw s1, 20(sp)
959 ; RV32I-NEXT: lw s0, 24(sp)
960 ; RV32I-NEXT: lw ra, 28(sp)
961 ; RV32I-NEXT: .cfi_restore ra
962 ; RV32I-NEXT: .cfi_restore s0
963 ; RV32I-NEXT: .cfi_restore s1
964 ; RV32I-NEXT: .cfi_restore s2
965 ; RV32I-NEXT: .cfi_restore s3
966 ; RV32I-NEXT: addi sp, sp, 32
967 ; RV32I-NEXT: .cfi_def_cfa_offset 0
970 ; RV32IM-LABEL: dont_fold_urem_one:
972 ; RV32IM-NEXT: lhu a2, 4(a1)
973 ; RV32IM-NEXT: lhu a3, 12(a1)
974 ; RV32IM-NEXT: lhu a1, 8(a1)
975 ; RV32IM-NEXT: srli a4, a2, 1
976 ; RV32IM-NEXT: lui a5, 820904
977 ; RV32IM-NEXT: addi a5, a5, -1903
978 ; RV32IM-NEXT: mulhu a4, a4, a5
979 ; RV32IM-NEXT: srli a4, a4, 8
980 ; RV32IM-NEXT: addi a5, zero, 654
981 ; RV32IM-NEXT: mul a4, a4, a5
982 ; RV32IM-NEXT: sub a2, a2, a4
983 ; RV32IM-NEXT: lui a4, 729444
984 ; RV32IM-NEXT: addi a4, a4, 713
985 ; RV32IM-NEXT: mulhu a4, a1, a4
986 ; RV32IM-NEXT: srli a4, a4, 4
987 ; RV32IM-NEXT: addi a5, zero, 23
988 ; RV32IM-NEXT: mul a4, a4, a5
989 ; RV32IM-NEXT: sub a1, a1, a4
990 ; RV32IM-NEXT: lui a4, 395996
991 ; RV32IM-NEXT: addi a4, a4, -2009
992 ; RV32IM-NEXT: mulhu a4, a3, a4
993 ; RV32IM-NEXT: srli a4, a4, 11
994 ; RV32IM-NEXT: lui a5, 1
995 ; RV32IM-NEXT: addi a5, a5, 1327
996 ; RV32IM-NEXT: mul a4, a4, a5
997 ; RV32IM-NEXT: sub a3, a3, a4
998 ; RV32IM-NEXT: sh zero, 0(a0)
999 ; RV32IM-NEXT: sh a3, 6(a0)
1000 ; RV32IM-NEXT: sh a1, 4(a0)
1001 ; RV32IM-NEXT: sh a2, 2(a0)
1002 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
1005 ; RV64I-LABEL: dont_fold_urem_one:
1007 ; RV64I-NEXT: addi sp, sp, -48
1008 ; RV64I-NEXT: .cfi_def_cfa_offset 48
1009 ; RV64I-NEXT: sd ra, 40(sp)
1010 ; RV64I-NEXT: sd s0, 32(sp)
1011 ; RV64I-NEXT: sd s1, 24(sp)
1012 ; RV64I-NEXT: sd s2, 16(sp)
1013 ; RV64I-NEXT: sd s3, 8(sp)
1014 ; RV64I-NEXT: .cfi_offset ra, -8
1015 ; RV64I-NEXT: .cfi_offset s0, -16
1016 ; RV64I-NEXT: .cfi_offset s1, -24
1017 ; RV64I-NEXT: .cfi_offset s2, -32
1018 ; RV64I-NEXT: .cfi_offset s3, -40
1019 ; RV64I-NEXT: lhu s2, 24(a1)
1020 ; RV64I-NEXT: lhu s1, 16(a1)
1021 ; RV64I-NEXT: lhu a2, 8(a1)
1022 ; RV64I-NEXT: mv s0, a0
1023 ; RV64I-NEXT: addi a1, zero, 654
1024 ; RV64I-NEXT: mv a0, a2
1025 ; RV64I-NEXT: call __umoddi3
1026 ; RV64I-NEXT: mv s3, a0
1027 ; RV64I-NEXT: addi a1, zero, 23
1028 ; RV64I-NEXT: mv a0, s1
1029 ; RV64I-NEXT: call __umoddi3
1030 ; RV64I-NEXT: mv s1, a0
1031 ; RV64I-NEXT: lui a0, 1
1032 ; RV64I-NEXT: addiw a1, a0, 1327
1033 ; RV64I-NEXT: mv a0, s2
1034 ; RV64I-NEXT: call __umoddi3
1035 ; RV64I-NEXT: sh zero, 0(s0)
1036 ; RV64I-NEXT: sh a0, 6(s0)
1037 ; RV64I-NEXT: sh s1, 4(s0)
1038 ; RV64I-NEXT: sh s3, 2(s0)
1039 ; RV64I-NEXT: ld s3, 8(sp)
1040 ; RV64I-NEXT: ld s2, 16(sp)
1041 ; RV64I-NEXT: ld s1, 24(sp)
1042 ; RV64I-NEXT: ld s0, 32(sp)
1043 ; RV64I-NEXT: ld ra, 40(sp)
1044 ; RV64I-NEXT: .cfi_restore ra
1045 ; RV64I-NEXT: .cfi_restore s0
1046 ; RV64I-NEXT: .cfi_restore s1
1047 ; RV64I-NEXT: .cfi_restore s2
1048 ; RV64I-NEXT: .cfi_restore s3
1049 ; RV64I-NEXT: addi sp, sp, 48
1050 ; RV64I-NEXT: .cfi_def_cfa_offset 0
1053 ; RV64IM-LABEL: dont_fold_urem_one:
1055 ; RV64IM-NEXT: lhu a2, 24(a1)
1056 ; RV64IM-NEXT: lhu a3, 8(a1)
1057 ; RV64IM-NEXT: lhu a1, 16(a1)
1058 ; RV64IM-NEXT: lui a4, 3206
1059 ; RV64IM-NEXT: addiw a4, a4, -1781
1060 ; RV64IM-NEXT: slli a4, a4, 13
1061 ; RV64IM-NEXT: addi a4, a4, 1069
1062 ; RV64IM-NEXT: slli a4, a4, 12
1063 ; RV64IM-NEXT: addi a4, a4, -1959
1064 ; RV64IM-NEXT: slli a4, a4, 14
1065 ; RV64IM-NEXT: addi a4, a4, 713
1066 ; RV64IM-NEXT: mulhu a4, a1, a4
1067 ; RV64IM-NEXT: sub a5, a1, a4
1068 ; RV64IM-NEXT: srli a5, a5, 1
1069 ; RV64IM-NEXT: add a4, a5, a4
1070 ; RV64IM-NEXT: srli a4, a4, 4
1071 ; RV64IM-NEXT: addi a5, zero, 23
1072 ; RV64IM-NEXT: mul a4, a4, a5
1073 ; RV64IM-NEXT: sub a1, a1, a4
1074 ; RV64IM-NEXT: srli a4, a3, 1
1075 ; RV64IM-NEXT: lui a5, 6413
1076 ; RV64IM-NEXT: addiw a5, a5, 1265
1077 ; RV64IM-NEXT: slli a5, a5, 13
1078 ; RV64IM-NEXT: addi a5, a5, 1027
1079 ; RV64IM-NEXT: slli a5, a5, 13
1080 ; RV64IM-NEXT: addi a5, a5, 1077
1081 ; RV64IM-NEXT: slli a5, a5, 12
1082 ; RV64IM-NEXT: addi a5, a5, 965
1083 ; RV64IM-NEXT: mulhu a4, a4, a5
1084 ; RV64IM-NEXT: srli a4, a4, 7
1085 ; RV64IM-NEXT: addi a5, zero, 654
1086 ; RV64IM-NEXT: mul a4, a4, a5
1087 ; RV64IM-NEXT: sub a3, a3, a4
1088 ; RV64IM-NEXT: lui a4, 1044567
1089 ; RV64IM-NEXT: addiw a4, a4, -575
1090 ; RV64IM-NEXT: slli a4, a4, 12
1091 ; RV64IM-NEXT: addi a4, a4, 883
1092 ; RV64IM-NEXT: slli a4, a4, 14
1093 ; RV64IM-NEXT: addi a4, a4, -861
1094 ; RV64IM-NEXT: slli a4, a4, 12
1095 ; RV64IM-NEXT: addi a4, a4, -179
1096 ; RV64IM-NEXT: mulhu a4, a2, a4
1097 ; RV64IM-NEXT: srli a4, a4, 12
1098 ; RV64IM-NEXT: lui a5, 1
1099 ; RV64IM-NEXT: addiw a5, a5, 1327
1100 ; RV64IM-NEXT: mul a4, a4, a5
1101 ; RV64IM-NEXT: sub a2, a2, a4
1102 ; RV64IM-NEXT: sh zero, 0(a0)
1103 ; RV64IM-NEXT: sh a2, 6(a0)
1104 ; RV64IM-NEXT: sh a3, 2(a0)
1105 ; RV64IM-NEXT: sh a1, 4(a0)
1106 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
1108 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1112 ; Don't fold if the divisor is 2^16.
1113 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1114 ; CHECK-LABEL: dont_fold_urem_i16_smax:
1116 ; CHECK-NEXT: .cfi_def_cfa_offset 0
1118 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423>
1122 ; Don't fold i64 urem.
1123 define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
1124 ; RV32I-LABEL: dont_fold_urem_i64:
1126 ; RV32I-NEXT: addi sp, sp, -48
1127 ; RV32I-NEXT: .cfi_def_cfa_offset 48
1128 ; RV32I-NEXT: sw ra, 44(sp)
1129 ; RV32I-NEXT: sw s0, 40(sp)
1130 ; RV32I-NEXT: sw s1, 36(sp)
1131 ; RV32I-NEXT: sw s2, 32(sp)
1132 ; RV32I-NEXT: sw s3, 28(sp)
1133 ; RV32I-NEXT: sw s4, 24(sp)
1134 ; RV32I-NEXT: sw s5, 20(sp)
1135 ; RV32I-NEXT: sw s6, 16(sp)
1136 ; RV32I-NEXT: sw s7, 12(sp)
1137 ; RV32I-NEXT: sw s8, 8(sp)
1138 ; RV32I-NEXT: sw s9, 4(sp)
1139 ; RV32I-NEXT: .cfi_offset ra, -4
1140 ; RV32I-NEXT: .cfi_offset s0, -8
1141 ; RV32I-NEXT: .cfi_offset s1, -12
1142 ; RV32I-NEXT: .cfi_offset s2, -16
1143 ; RV32I-NEXT: .cfi_offset s3, -20
1144 ; RV32I-NEXT: .cfi_offset s4, -24
1145 ; RV32I-NEXT: .cfi_offset s5, -28
1146 ; RV32I-NEXT: .cfi_offset s6, -32
1147 ; RV32I-NEXT: .cfi_offset s7, -36
1148 ; RV32I-NEXT: .cfi_offset s8, -40
1149 ; RV32I-NEXT: .cfi_offset s9, -44
1150 ; RV32I-NEXT: lw s2, 24(a1)
1151 ; RV32I-NEXT: lw s3, 28(a1)
1152 ; RV32I-NEXT: lw s4, 16(a1)
1153 ; RV32I-NEXT: lw s5, 20(a1)
1154 ; RV32I-NEXT: lw s6, 8(a1)
1155 ; RV32I-NEXT: lw s1, 12(a1)
1156 ; RV32I-NEXT: lw a3, 0(a1)
1157 ; RV32I-NEXT: lw a1, 4(a1)
1158 ; RV32I-NEXT: mv s0, a0
1159 ; RV32I-NEXT: addi a2, zero, 1
1160 ; RV32I-NEXT: mv a0, a3
1161 ; RV32I-NEXT: mv a3, zero
1162 ; RV32I-NEXT: call __umoddi3
1163 ; RV32I-NEXT: mv s7, a0
1164 ; RV32I-NEXT: mv s8, a1
1165 ; RV32I-NEXT: addi a2, zero, 654
1166 ; RV32I-NEXT: mv a0, s6
1167 ; RV32I-NEXT: mv a1, s1
1168 ; RV32I-NEXT: mv a3, zero
1169 ; RV32I-NEXT: call __umoddi3
1170 ; RV32I-NEXT: mv s6, a0
1171 ; RV32I-NEXT: mv s9, a1
1172 ; RV32I-NEXT: addi a2, zero, 23
1173 ; RV32I-NEXT: mv a0, s4
1174 ; RV32I-NEXT: mv a1, s5
1175 ; RV32I-NEXT: mv a3, zero
1176 ; RV32I-NEXT: call __umoddi3
1177 ; RV32I-NEXT: mv s4, a0
1178 ; RV32I-NEXT: mv s1, a1
1179 ; RV32I-NEXT: lui a0, 1
1180 ; RV32I-NEXT: addi a2, a0, 1327
1181 ; RV32I-NEXT: mv a0, s2
1182 ; RV32I-NEXT: mv a1, s3
1183 ; RV32I-NEXT: mv a3, zero
1184 ; RV32I-NEXT: call __umoddi3
1185 ; RV32I-NEXT: sw a1, 28(s0)
1186 ; RV32I-NEXT: sw a0, 24(s0)
1187 ; RV32I-NEXT: sw s1, 20(s0)
1188 ; RV32I-NEXT: sw s4, 16(s0)
1189 ; RV32I-NEXT: sw s9, 12(s0)
1190 ; RV32I-NEXT: sw s6, 8(s0)
1191 ; RV32I-NEXT: sw s8, 4(s0)
1192 ; RV32I-NEXT: sw s7, 0(s0)
1193 ; RV32I-NEXT: lw s9, 4(sp)
1194 ; RV32I-NEXT: lw s8, 8(sp)
1195 ; RV32I-NEXT: lw s7, 12(sp)
1196 ; RV32I-NEXT: lw s6, 16(sp)
1197 ; RV32I-NEXT: lw s5, 20(sp)
1198 ; RV32I-NEXT: lw s4, 24(sp)
1199 ; RV32I-NEXT: lw s3, 28(sp)
1200 ; RV32I-NEXT: lw s2, 32(sp)
1201 ; RV32I-NEXT: lw s1, 36(sp)
1202 ; RV32I-NEXT: lw s0, 40(sp)
1203 ; RV32I-NEXT: lw ra, 44(sp)
1204 ; RV32I-NEXT: .cfi_restore ra
1205 ; RV32I-NEXT: .cfi_restore s0
1206 ; RV32I-NEXT: .cfi_restore s1
1207 ; RV32I-NEXT: .cfi_restore s2
1208 ; RV32I-NEXT: .cfi_restore s3
1209 ; RV32I-NEXT: .cfi_restore s4
1210 ; RV32I-NEXT: .cfi_restore s5
1211 ; RV32I-NEXT: .cfi_restore s6
1212 ; RV32I-NEXT: .cfi_restore s7
1213 ; RV32I-NEXT: .cfi_restore s8
1214 ; RV32I-NEXT: .cfi_restore s9
1215 ; RV32I-NEXT: addi sp, sp, 48
1216 ; RV32I-NEXT: .cfi_def_cfa_offset 0
1219 ; RV32IM-LABEL: dont_fold_urem_i64:
1221 ; RV32IM-NEXT: addi sp, sp, -48
1222 ; RV32IM-NEXT: .cfi_def_cfa_offset 48
1223 ; RV32IM-NEXT: sw ra, 44(sp)
1224 ; RV32IM-NEXT: sw s0, 40(sp)
1225 ; RV32IM-NEXT: sw s1, 36(sp)
1226 ; RV32IM-NEXT: sw s2, 32(sp)
1227 ; RV32IM-NEXT: sw s3, 28(sp)
1228 ; RV32IM-NEXT: sw s4, 24(sp)
1229 ; RV32IM-NEXT: sw s5, 20(sp)
1230 ; RV32IM-NEXT: sw s6, 16(sp)
1231 ; RV32IM-NEXT: sw s7, 12(sp)
1232 ; RV32IM-NEXT: sw s8, 8(sp)
1233 ; RV32IM-NEXT: sw s9, 4(sp)
1234 ; RV32IM-NEXT: .cfi_offset ra, -4
1235 ; RV32IM-NEXT: .cfi_offset s0, -8
1236 ; RV32IM-NEXT: .cfi_offset s1, -12
1237 ; RV32IM-NEXT: .cfi_offset s2, -16
1238 ; RV32IM-NEXT: .cfi_offset s3, -20
1239 ; RV32IM-NEXT: .cfi_offset s4, -24
1240 ; RV32IM-NEXT: .cfi_offset s5, -28
1241 ; RV32IM-NEXT: .cfi_offset s6, -32
1242 ; RV32IM-NEXT: .cfi_offset s7, -36
1243 ; RV32IM-NEXT: .cfi_offset s8, -40
1244 ; RV32IM-NEXT: .cfi_offset s9, -44
1245 ; RV32IM-NEXT: lw s2, 24(a1)
1246 ; RV32IM-NEXT: lw s3, 28(a1)
1247 ; RV32IM-NEXT: lw s4, 16(a1)
1248 ; RV32IM-NEXT: lw s5, 20(a1)
1249 ; RV32IM-NEXT: lw s6, 8(a1)
1250 ; RV32IM-NEXT: lw s1, 12(a1)
1251 ; RV32IM-NEXT: lw a3, 0(a1)
1252 ; RV32IM-NEXT: lw a1, 4(a1)
1253 ; RV32IM-NEXT: mv s0, a0
1254 ; RV32IM-NEXT: addi a2, zero, 1
1255 ; RV32IM-NEXT: mv a0, a3
1256 ; RV32IM-NEXT: mv a3, zero
1257 ; RV32IM-NEXT: call __umoddi3
1258 ; RV32IM-NEXT: mv s7, a0
1259 ; RV32IM-NEXT: mv s8, a1
1260 ; RV32IM-NEXT: addi a2, zero, 654
1261 ; RV32IM-NEXT: mv a0, s6
1262 ; RV32IM-NEXT: mv a1, s1
1263 ; RV32IM-NEXT: mv a3, zero
1264 ; RV32IM-NEXT: call __umoddi3
1265 ; RV32IM-NEXT: mv s6, a0
1266 ; RV32IM-NEXT: mv s9, a1
1267 ; RV32IM-NEXT: addi a2, zero, 23
1268 ; RV32IM-NEXT: mv a0, s4
1269 ; RV32IM-NEXT: mv a1, s5
1270 ; RV32IM-NEXT: mv a3, zero
1271 ; RV32IM-NEXT: call __umoddi3
1272 ; RV32IM-NEXT: mv s4, a0
1273 ; RV32IM-NEXT: mv s1, a1
1274 ; RV32IM-NEXT: lui a0, 1
1275 ; RV32IM-NEXT: addi a2, a0, 1327
1276 ; RV32IM-NEXT: mv a0, s2
1277 ; RV32IM-NEXT: mv a1, s3
1278 ; RV32IM-NEXT: mv a3, zero
1279 ; RV32IM-NEXT: call __umoddi3
1280 ; RV32IM-NEXT: sw a1, 28(s0)
1281 ; RV32IM-NEXT: sw a0, 24(s0)
1282 ; RV32IM-NEXT: sw s1, 20(s0)
1283 ; RV32IM-NEXT: sw s4, 16(s0)
1284 ; RV32IM-NEXT: sw s9, 12(s0)
1285 ; RV32IM-NEXT: sw s6, 8(s0)
1286 ; RV32IM-NEXT: sw s8, 4(s0)
1287 ; RV32IM-NEXT: sw s7, 0(s0)
1288 ; RV32IM-NEXT: lw s9, 4(sp)
1289 ; RV32IM-NEXT: lw s8, 8(sp)
1290 ; RV32IM-NEXT: lw s7, 12(sp)
1291 ; RV32IM-NEXT: lw s6, 16(sp)
1292 ; RV32IM-NEXT: lw s5, 20(sp)
1293 ; RV32IM-NEXT: lw s4, 24(sp)
1294 ; RV32IM-NEXT: lw s3, 28(sp)
1295 ; RV32IM-NEXT: lw s2, 32(sp)
1296 ; RV32IM-NEXT: lw s1, 36(sp)
1297 ; RV32IM-NEXT: lw s0, 40(sp)
1298 ; RV32IM-NEXT: lw ra, 44(sp)
1299 ; RV32IM-NEXT: .cfi_restore ra
1300 ; RV32IM-NEXT: .cfi_restore s0
1301 ; RV32IM-NEXT: .cfi_restore s1
1302 ; RV32IM-NEXT: .cfi_restore s2
1303 ; RV32IM-NEXT: .cfi_restore s3
1304 ; RV32IM-NEXT: .cfi_restore s4
1305 ; RV32IM-NEXT: .cfi_restore s5
1306 ; RV32IM-NEXT: .cfi_restore s6
1307 ; RV32IM-NEXT: .cfi_restore s7
1308 ; RV32IM-NEXT: .cfi_restore s8
1309 ; RV32IM-NEXT: .cfi_restore s9
1310 ; RV32IM-NEXT: addi sp, sp, 48
1311 ; RV32IM-NEXT: .cfi_def_cfa_offset 0
1314 ; RV64I-LABEL: dont_fold_urem_i64:
1316 ; RV64I-NEXT: addi sp, sp, -48
1317 ; RV64I-NEXT: .cfi_def_cfa_offset 48
1318 ; RV64I-NEXT: sd ra, 40(sp)
1319 ; RV64I-NEXT: sd s0, 32(sp)
1320 ; RV64I-NEXT: sd s1, 24(sp)
1321 ; RV64I-NEXT: sd s2, 16(sp)
1322 ; RV64I-NEXT: sd s3, 8(sp)
1323 ; RV64I-NEXT: .cfi_offset ra, -8
1324 ; RV64I-NEXT: .cfi_offset s0, -16
1325 ; RV64I-NEXT: .cfi_offset s1, -24
1326 ; RV64I-NEXT: .cfi_offset s2, -32
1327 ; RV64I-NEXT: .cfi_offset s3, -40
1328 ; RV64I-NEXT: ld s2, 24(a1)
1329 ; RV64I-NEXT: ld s1, 16(a1)
1330 ; RV64I-NEXT: ld a2, 8(a1)
1331 ; RV64I-NEXT: mv s0, a0
1332 ; RV64I-NEXT: addi a1, zero, 654
1333 ; RV64I-NEXT: mv a0, a2
1334 ; RV64I-NEXT: call __umoddi3
1335 ; RV64I-NEXT: mv s3, a0
1336 ; RV64I-NEXT: addi a1, zero, 23
1337 ; RV64I-NEXT: mv a0, s1
1338 ; RV64I-NEXT: call __umoddi3
1339 ; RV64I-NEXT: mv s1, a0
1340 ; RV64I-NEXT: lui a0, 1
1341 ; RV64I-NEXT: addiw a1, a0, 1327
1342 ; RV64I-NEXT: mv a0, s2
1343 ; RV64I-NEXT: call __umoddi3
1344 ; RV64I-NEXT: sd zero, 0(s0)
1345 ; RV64I-NEXT: sd a0, 24(s0)
1346 ; RV64I-NEXT: sd s1, 16(s0)
1347 ; RV64I-NEXT: sd s3, 8(s0)
1348 ; RV64I-NEXT: ld s3, 8(sp)
1349 ; RV64I-NEXT: ld s2, 16(sp)
1350 ; RV64I-NEXT: ld s1, 24(sp)
1351 ; RV64I-NEXT: ld s0, 32(sp)
1352 ; RV64I-NEXT: ld ra, 40(sp)
1353 ; RV64I-NEXT: .cfi_restore ra
1354 ; RV64I-NEXT: .cfi_restore s0
1355 ; RV64I-NEXT: .cfi_restore s1
1356 ; RV64I-NEXT: .cfi_restore s2
1357 ; RV64I-NEXT: .cfi_restore s3
1358 ; RV64I-NEXT: addi sp, sp, 48
1359 ; RV64I-NEXT: .cfi_def_cfa_offset 0
1362 ; RV64IM-LABEL: dont_fold_urem_i64:
1364 ; RV64IM-NEXT: ld a2, 24(a1)
1365 ; RV64IM-NEXT: ld a3, 8(a1)
1366 ; RV64IM-NEXT: ld a1, 16(a1)
1367 ; RV64IM-NEXT: lui a4, 3206
1368 ; RV64IM-NEXT: addiw a4, a4, -1781
1369 ; RV64IM-NEXT: slli a4, a4, 13
1370 ; RV64IM-NEXT: addi a4, a4, 1069
1371 ; RV64IM-NEXT: slli a4, a4, 12
1372 ; RV64IM-NEXT: addi a4, a4, -1959
1373 ; RV64IM-NEXT: slli a4, a4, 14
1374 ; RV64IM-NEXT: addi a4, a4, 713
1375 ; RV64IM-NEXT: mulhu a4, a1, a4
1376 ; RV64IM-NEXT: sub a5, a1, a4
1377 ; RV64IM-NEXT: srli a5, a5, 1
1378 ; RV64IM-NEXT: add a4, a5, a4
1379 ; RV64IM-NEXT: srli a4, a4, 4
1380 ; RV64IM-NEXT: addi a5, zero, 23
1381 ; RV64IM-NEXT: mul a4, a4, a5
1382 ; RV64IM-NEXT: sub a1, a1, a4
1383 ; RV64IM-NEXT: srli a4, a3, 1
1384 ; RV64IM-NEXT: lui a5, 6413
1385 ; RV64IM-NEXT: addiw a5, a5, 1265
1386 ; RV64IM-NEXT: slli a5, a5, 13
1387 ; RV64IM-NEXT: addi a5, a5, 1027
1388 ; RV64IM-NEXT: slli a5, a5, 13
1389 ; RV64IM-NEXT: addi a5, a5, 1077
1390 ; RV64IM-NEXT: slli a5, a5, 12
1391 ; RV64IM-NEXT: addi a5, a5, 965
1392 ; RV64IM-NEXT: mulhu a4, a4, a5
1393 ; RV64IM-NEXT: srli a4, a4, 7
1394 ; RV64IM-NEXT: addi a5, zero, 654
1395 ; RV64IM-NEXT: mul a4, a4, a5
1396 ; RV64IM-NEXT: sub a3, a3, a4
1397 ; RV64IM-NEXT: lui a4, 1044567
1398 ; RV64IM-NEXT: addiw a4, a4, -575
1399 ; RV64IM-NEXT: slli a4, a4, 12
1400 ; RV64IM-NEXT: addi a4, a4, 883
1401 ; RV64IM-NEXT: slli a4, a4, 14
1402 ; RV64IM-NEXT: addi a4, a4, -861
1403 ; RV64IM-NEXT: slli a4, a4, 12
1404 ; RV64IM-NEXT: addi a4, a4, -179
1405 ; RV64IM-NEXT: mulhu a4, a2, a4
1406 ; RV64IM-NEXT: srli a4, a4, 12
1407 ; RV64IM-NEXT: lui a5, 1
1408 ; RV64IM-NEXT: addiw a5, a5, 1327
1409 ; RV64IM-NEXT: mul a4, a4, a5
1410 ; RV64IM-NEXT: sub a2, a2, a4
1411 ; RV64IM-NEXT: sd zero, 0(a0)
1412 ; RV64IM-NEXT: sd a2, 24(a0)
1413 ; RV64IM-NEXT: sd a3, 8(a0)
1414 ; RV64IM-NEXT: sd a1, 16(a0)
1415 ; RV64IM-NEXT: .cfi_def_cfa_offset 0
1417 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>