1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefix=RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefix=RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefix=RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefix=RV64IM %s
11 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
12 ; RV32I-LABEL: fold_srem_vec_1:
14 ; RV32I-NEXT: addi sp, sp, -32
15 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
16 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
17 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
18 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
19 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
20 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
21 ; RV32I-NEXT: lh s0, 12(a1)
22 ; RV32I-NEXT: lh s1, 8(a1)
23 ; RV32I-NEXT: lh s2, 4(a1)
24 ; RV32I-NEXT: lh a2, 0(a1)
25 ; RV32I-NEXT: mv s3, a0
26 ; RV32I-NEXT: li a1, 95
27 ; RV32I-NEXT: mv a0, a2
28 ; RV32I-NEXT: call __modsi3
29 ; RV32I-NEXT: mv s4, a0
30 ; RV32I-NEXT: li a1, -124
31 ; RV32I-NEXT: mv a0, s2
32 ; RV32I-NEXT: call __modsi3
33 ; RV32I-NEXT: mv s2, a0
34 ; RV32I-NEXT: li a1, 98
35 ; RV32I-NEXT: mv a0, s1
36 ; RV32I-NEXT: call __modsi3
37 ; RV32I-NEXT: mv s1, a0
38 ; RV32I-NEXT: li a1, -1003
39 ; RV32I-NEXT: mv a0, s0
40 ; RV32I-NEXT: call __modsi3
41 ; RV32I-NEXT: sh a0, 6(s3)
42 ; RV32I-NEXT: sh s1, 4(s3)
43 ; RV32I-NEXT: sh s2, 2(s3)
44 ; RV32I-NEXT: sh s4, 0(s3)
45 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
46 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
47 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
48 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
49 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
50 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
51 ; RV32I-NEXT: addi sp, sp, 32
54 ; RV32IM-LABEL: fold_srem_vec_1:
56 ; RV32IM-NEXT: lh a2, 12(a1)
57 ; RV32IM-NEXT: lh a3, 8(a1)
58 ; RV32IM-NEXT: lh a4, 0(a1)
59 ; RV32IM-NEXT: lh a1, 4(a1)
60 ; RV32IM-NEXT: lui a5, 706409
61 ; RV32IM-NEXT: addi a5, a5, 389
62 ; RV32IM-NEXT: mulh a5, a4, a5
63 ; RV32IM-NEXT: add a5, a5, a4
64 ; RV32IM-NEXT: srli a6, a5, 31
65 ; RV32IM-NEXT: srli a5, a5, 6
66 ; RV32IM-NEXT: add a5, a5, a6
67 ; RV32IM-NEXT: li a6, 95
68 ; RV32IM-NEXT: mul a5, a5, a6
69 ; RV32IM-NEXT: sub a4, a4, a5
70 ; RV32IM-NEXT: lui a5, 507375
71 ; RV32IM-NEXT: addi a5, a5, 1981
72 ; RV32IM-NEXT: mulh a5, a1, a5
73 ; RV32IM-NEXT: sub a5, a5, a1
74 ; RV32IM-NEXT: srli a6, a5, 31
75 ; RV32IM-NEXT: srli a5, a5, 6
76 ; RV32IM-NEXT: add a5, a5, a6
77 ; RV32IM-NEXT: li a6, -124
78 ; RV32IM-NEXT: mul a5, a5, a6
79 ; RV32IM-NEXT: sub a1, a1, a5
80 ; RV32IM-NEXT: lui a5, 342392
81 ; RV32IM-NEXT: addi a5, a5, 669
82 ; RV32IM-NEXT: mulh a5, a3, a5
83 ; RV32IM-NEXT: srli a6, a5, 31
84 ; RV32IM-NEXT: srli a5, a5, 5
85 ; RV32IM-NEXT: add a5, a5, a6
86 ; RV32IM-NEXT: li a6, 98
87 ; RV32IM-NEXT: mul a5, a5, a6
88 ; RV32IM-NEXT: sub a3, a3, a5
89 ; RV32IM-NEXT: lui a5, 780943
90 ; RV32IM-NEXT: addi a5, a5, 1809
91 ; RV32IM-NEXT: mulh a5, a2, a5
92 ; RV32IM-NEXT: srli a6, a5, 31
93 ; RV32IM-NEXT: srli a5, a5, 8
94 ; RV32IM-NEXT: add a5, a5, a6
95 ; RV32IM-NEXT: li a6, -1003
96 ; RV32IM-NEXT: mul a5, a5, a6
97 ; RV32IM-NEXT: sub a2, a2, a5
98 ; RV32IM-NEXT: sh a2, 6(a0)
99 ; RV32IM-NEXT: sh a3, 4(a0)
100 ; RV32IM-NEXT: sh a1, 2(a0)
101 ; RV32IM-NEXT: sh a4, 0(a0)
104 ; RV64I-LABEL: fold_srem_vec_1:
106 ; RV64I-NEXT: addi sp, sp, -48
107 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
108 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
109 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
110 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
111 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
112 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
113 ; RV64I-NEXT: lh s0, 24(a1)
114 ; RV64I-NEXT: lh s1, 16(a1)
115 ; RV64I-NEXT: lh s2, 8(a1)
116 ; RV64I-NEXT: lh a2, 0(a1)
117 ; RV64I-NEXT: mv s3, a0
118 ; RV64I-NEXT: li a1, 95
119 ; RV64I-NEXT: mv a0, a2
120 ; RV64I-NEXT: call __moddi3
121 ; RV64I-NEXT: mv s4, a0
122 ; RV64I-NEXT: li a1, -124
123 ; RV64I-NEXT: mv a0, s2
124 ; RV64I-NEXT: call __moddi3
125 ; RV64I-NEXT: mv s2, a0
126 ; RV64I-NEXT: li a1, 98
127 ; RV64I-NEXT: mv a0, s1
128 ; RV64I-NEXT: call __moddi3
129 ; RV64I-NEXT: mv s1, a0
130 ; RV64I-NEXT: li a1, -1003
131 ; RV64I-NEXT: mv a0, s0
132 ; RV64I-NEXT: call __moddi3
133 ; RV64I-NEXT: sh a0, 6(s3)
134 ; RV64I-NEXT: sh s1, 4(s3)
135 ; RV64I-NEXT: sh s2, 2(s3)
136 ; RV64I-NEXT: sh s4, 0(s3)
137 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
138 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
139 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
140 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
141 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
142 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
143 ; RV64I-NEXT: addi sp, sp, 48
146 ; RV64IM-LABEL: fold_srem_vec_1:
148 ; RV64IM-NEXT: lh a2, 0(a1)
149 ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0)
150 ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3)
151 ; RV64IM-NEXT: lh a4, 24(a1)
152 ; RV64IM-NEXT: lh a5, 16(a1)
153 ; RV64IM-NEXT: lh a1, 8(a1)
154 ; RV64IM-NEXT: mulh a3, a2, a3
155 ; RV64IM-NEXT: add a3, a3, a2
156 ; RV64IM-NEXT: srli a6, a3, 63
157 ; RV64IM-NEXT: srli a3, a3, 6
158 ; RV64IM-NEXT: add a3, a3, a6
159 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_1)
160 ; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6)
161 ; RV64IM-NEXT: li a7, 95
162 ; RV64IM-NEXT: mul a3, a3, a7
163 ; RV64IM-NEXT: subw a2, a2, a3
164 ; RV64IM-NEXT: mulh a3, a1, a6
165 ; RV64IM-NEXT: sub a3, a3, a1
166 ; RV64IM-NEXT: srli a6, a3, 63
167 ; RV64IM-NEXT: srli a3, a3, 6
168 ; RV64IM-NEXT: add a3, a3, a6
169 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_2)
170 ; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6)
171 ; RV64IM-NEXT: li a7, -124
172 ; RV64IM-NEXT: mul a3, a3, a7
173 ; RV64IM-NEXT: subw a1, a1, a3
174 ; RV64IM-NEXT: mulh a3, a5, a6
175 ; RV64IM-NEXT: srli a6, a3, 63
176 ; RV64IM-NEXT: srli a3, a3, 5
177 ; RV64IM-NEXT: add a3, a3, a6
178 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_3)
179 ; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6)
180 ; RV64IM-NEXT: li a7, 98
181 ; RV64IM-NEXT: mul a3, a3, a7
182 ; RV64IM-NEXT: subw a5, a5, a3
183 ; RV64IM-NEXT: mulh a3, a4, a6
184 ; RV64IM-NEXT: srli a6, a3, 63
185 ; RV64IM-NEXT: srli a3, a3, 7
186 ; RV64IM-NEXT: add a3, a3, a6
187 ; RV64IM-NEXT: li a6, -1003
188 ; RV64IM-NEXT: mul a3, a3, a6
189 ; RV64IM-NEXT: subw a4, a4, a3
190 ; RV64IM-NEXT: sh a4, 6(a0)
191 ; RV64IM-NEXT: sh a5, 4(a0)
192 ; RV64IM-NEXT: sh a1, 2(a0)
193 ; RV64IM-NEXT: sh a2, 0(a0)
195 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
199 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
200 ; RV32I-LABEL: fold_srem_vec_2:
202 ; RV32I-NEXT: addi sp, sp, -32
203 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
204 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
205 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
206 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
207 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
208 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
209 ; RV32I-NEXT: lh s0, 12(a1)
210 ; RV32I-NEXT: lh s1, 8(a1)
211 ; RV32I-NEXT: lh s2, 4(a1)
212 ; RV32I-NEXT: lh a2, 0(a1)
213 ; RV32I-NEXT: mv s3, a0
214 ; RV32I-NEXT: li a1, 95
215 ; RV32I-NEXT: mv a0, a2
216 ; RV32I-NEXT: call __modsi3
217 ; RV32I-NEXT: mv s4, a0
218 ; RV32I-NEXT: li a1, 95
219 ; RV32I-NEXT: mv a0, s2
220 ; RV32I-NEXT: call __modsi3
221 ; RV32I-NEXT: mv s2, a0
222 ; RV32I-NEXT: li a1, 95
223 ; RV32I-NEXT: mv a0, s1
224 ; RV32I-NEXT: call __modsi3
225 ; RV32I-NEXT: mv s1, a0
226 ; RV32I-NEXT: li a1, 95
227 ; RV32I-NEXT: mv a0, s0
228 ; RV32I-NEXT: call __modsi3
229 ; RV32I-NEXT: sh a0, 6(s3)
230 ; RV32I-NEXT: sh s1, 4(s3)
231 ; RV32I-NEXT: sh s2, 2(s3)
232 ; RV32I-NEXT: sh s4, 0(s3)
233 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
234 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
235 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
236 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
237 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
238 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
239 ; RV32I-NEXT: addi sp, sp, 32
242 ; RV32IM-LABEL: fold_srem_vec_2:
244 ; RV32IM-NEXT: lh a2, 12(a1)
245 ; RV32IM-NEXT: lh a3, 8(a1)
246 ; RV32IM-NEXT: lh a4, 0(a1)
247 ; RV32IM-NEXT: lh a1, 4(a1)
248 ; RV32IM-NEXT: lui a5, 706409
249 ; RV32IM-NEXT: addi a5, a5, 389
250 ; RV32IM-NEXT: mulh a6, a4, a5
251 ; RV32IM-NEXT: add a6, a6, a4
252 ; RV32IM-NEXT: srli a7, a6, 31
253 ; RV32IM-NEXT: srli a6, a6, 6
254 ; RV32IM-NEXT: add a6, a6, a7
255 ; RV32IM-NEXT: li a7, 95
256 ; RV32IM-NEXT: mul a6, a6, a7
257 ; RV32IM-NEXT: sub a4, a4, a6
258 ; RV32IM-NEXT: mulh a6, a1, a5
259 ; RV32IM-NEXT: add a6, a6, a1
260 ; RV32IM-NEXT: srli t0, a6, 31
261 ; RV32IM-NEXT: srli a6, a6, 6
262 ; RV32IM-NEXT: add a6, a6, t0
263 ; RV32IM-NEXT: mul a6, a6, a7
264 ; RV32IM-NEXT: sub a1, a1, a6
265 ; RV32IM-NEXT: mulh a6, a3, a5
266 ; RV32IM-NEXT: add a6, a6, a3
267 ; RV32IM-NEXT: srli t0, a6, 31
268 ; RV32IM-NEXT: srli a6, a6, 6
269 ; RV32IM-NEXT: add a6, a6, t0
270 ; RV32IM-NEXT: mul a6, a6, a7
271 ; RV32IM-NEXT: sub a3, a3, a6
272 ; RV32IM-NEXT: mulh a5, a2, a5
273 ; RV32IM-NEXT: add a5, a5, a2
274 ; RV32IM-NEXT: srli a6, a5, 31
275 ; RV32IM-NEXT: srli a5, a5, 6
276 ; RV32IM-NEXT: add a5, a5, a6
277 ; RV32IM-NEXT: mul a5, a5, a7
278 ; RV32IM-NEXT: sub a2, a2, a5
279 ; RV32IM-NEXT: sh a2, 6(a0)
280 ; RV32IM-NEXT: sh a3, 4(a0)
281 ; RV32IM-NEXT: sh a1, 2(a0)
282 ; RV32IM-NEXT: sh a4, 0(a0)
285 ; RV64I-LABEL: fold_srem_vec_2:
287 ; RV64I-NEXT: addi sp, sp, -48
288 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
289 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
290 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
291 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
292 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
293 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
294 ; RV64I-NEXT: lh s0, 24(a1)
295 ; RV64I-NEXT: lh s1, 16(a1)
296 ; RV64I-NEXT: lh s2, 8(a1)
297 ; RV64I-NEXT: lh a2, 0(a1)
298 ; RV64I-NEXT: mv s3, a0
299 ; RV64I-NEXT: li a1, 95
300 ; RV64I-NEXT: mv a0, a2
301 ; RV64I-NEXT: call __moddi3
302 ; RV64I-NEXT: mv s4, a0
303 ; RV64I-NEXT: li a1, 95
304 ; RV64I-NEXT: mv a0, s2
305 ; RV64I-NEXT: call __moddi3
306 ; RV64I-NEXT: mv s2, a0
307 ; RV64I-NEXT: li a1, 95
308 ; RV64I-NEXT: mv a0, s1
309 ; RV64I-NEXT: call __moddi3
310 ; RV64I-NEXT: mv s1, a0
311 ; RV64I-NEXT: li a1, 95
312 ; RV64I-NEXT: mv a0, s0
313 ; RV64I-NEXT: call __moddi3
314 ; RV64I-NEXT: sh a0, 6(s3)
315 ; RV64I-NEXT: sh s1, 4(s3)
316 ; RV64I-NEXT: sh s2, 2(s3)
317 ; RV64I-NEXT: sh s4, 0(s3)
318 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
319 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
320 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
321 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
322 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
323 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
324 ; RV64I-NEXT: addi sp, sp, 48
327 ; RV64IM-LABEL: fold_srem_vec_2:
329 ; RV64IM-NEXT: lh a2, 0(a1)
330 ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0)
331 ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3)
332 ; RV64IM-NEXT: lh a4, 24(a1)
333 ; RV64IM-NEXT: lh a5, 16(a1)
334 ; RV64IM-NEXT: lh a1, 8(a1)
335 ; RV64IM-NEXT: mulh a6, a2, a3
336 ; RV64IM-NEXT: add a6, a6, a2
337 ; RV64IM-NEXT: srli a7, a6, 63
338 ; RV64IM-NEXT: srli a6, a6, 6
339 ; RV64IM-NEXT: add a6, a6, a7
340 ; RV64IM-NEXT: li a7, 95
341 ; RV64IM-NEXT: mul a6, a6, a7
342 ; RV64IM-NEXT: subw a2, a2, a6
343 ; RV64IM-NEXT: mulh a6, a1, a3
344 ; RV64IM-NEXT: add a6, a6, a1
345 ; RV64IM-NEXT: srli t0, a6, 63
346 ; RV64IM-NEXT: srli a6, a6, 6
347 ; RV64IM-NEXT: add a6, a6, t0
348 ; RV64IM-NEXT: mul a6, a6, a7
349 ; RV64IM-NEXT: subw a1, a1, a6
350 ; RV64IM-NEXT: mulh a6, a5, a3
351 ; RV64IM-NEXT: add a6, a6, a5
352 ; RV64IM-NEXT: srli t0, a6, 63
353 ; RV64IM-NEXT: srli a6, a6, 6
354 ; RV64IM-NEXT: add a6, a6, t0
355 ; RV64IM-NEXT: mul a6, a6, a7
356 ; RV64IM-NEXT: subw a5, a5, a6
357 ; RV64IM-NEXT: mulh a3, a4, a3
358 ; RV64IM-NEXT: add a3, a3, a4
359 ; RV64IM-NEXT: srli a6, a3, 63
360 ; RV64IM-NEXT: srli a3, a3, 6
361 ; RV64IM-NEXT: add a3, a3, a6
362 ; RV64IM-NEXT: mul a3, a3, a7
363 ; RV64IM-NEXT: subw a4, a4, a3
364 ; RV64IM-NEXT: sh a4, 6(a0)
365 ; RV64IM-NEXT: sh a5, 4(a0)
366 ; RV64IM-NEXT: sh a1, 2(a0)
367 ; RV64IM-NEXT: sh a2, 0(a0)
369 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
374 ; Don't fold if we can combine srem with sdiv.
375 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
376 ; RV32I-LABEL: combine_srem_sdiv:
378 ; RV32I-NEXT: addi sp, sp, -48
379 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
380 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
381 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
382 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
383 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
384 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
385 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
386 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
387 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
388 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
389 ; RV32I-NEXT: lh s1, 0(a1)
390 ; RV32I-NEXT: lh s2, 4(a1)
391 ; RV32I-NEXT: lh s3, 8(a1)
392 ; RV32I-NEXT: lh s4, 12(a1)
393 ; RV32I-NEXT: mv s0, a0
394 ; RV32I-NEXT: li a1, 95
395 ; RV32I-NEXT: mv a0, s4
396 ; RV32I-NEXT: call __modsi3
397 ; RV32I-NEXT: mv s5, a0
398 ; RV32I-NEXT: li a1, 95
399 ; RV32I-NEXT: mv a0, s3
400 ; RV32I-NEXT: call __modsi3
401 ; RV32I-NEXT: mv s6, a0
402 ; RV32I-NEXT: li a1, 95
403 ; RV32I-NEXT: mv a0, s2
404 ; RV32I-NEXT: call __modsi3
405 ; RV32I-NEXT: mv s7, a0
406 ; RV32I-NEXT: li a1, 95
407 ; RV32I-NEXT: mv a0, s1
408 ; RV32I-NEXT: call __modsi3
409 ; RV32I-NEXT: mv s8, a0
410 ; RV32I-NEXT: li a1, 95
411 ; RV32I-NEXT: mv a0, s4
412 ; RV32I-NEXT: call __divsi3
413 ; RV32I-NEXT: mv s4, a0
414 ; RV32I-NEXT: li a1, 95
415 ; RV32I-NEXT: mv a0, s3
416 ; RV32I-NEXT: call __divsi3
417 ; RV32I-NEXT: mv s3, a0
418 ; RV32I-NEXT: li a1, 95
419 ; RV32I-NEXT: mv a0, s2
420 ; RV32I-NEXT: call __divsi3
421 ; RV32I-NEXT: mv s2, a0
422 ; RV32I-NEXT: li a1, 95
423 ; RV32I-NEXT: mv a0, s1
424 ; RV32I-NEXT: call __divsi3
425 ; RV32I-NEXT: add a0, s8, a0
426 ; RV32I-NEXT: add s2, s7, s2
427 ; RV32I-NEXT: add s3, s6, s3
428 ; RV32I-NEXT: add s4, s5, s4
429 ; RV32I-NEXT: sh s4, 6(s0)
430 ; RV32I-NEXT: sh s3, 4(s0)
431 ; RV32I-NEXT: sh s2, 2(s0)
432 ; RV32I-NEXT: sh a0, 0(s0)
433 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
434 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
435 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
436 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
437 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
438 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
439 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
440 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
441 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
442 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
443 ; RV32I-NEXT: addi sp, sp, 48
446 ; RV32IM-LABEL: combine_srem_sdiv:
448 ; RV32IM-NEXT: lh a2, 0(a1)
449 ; RV32IM-NEXT: lh a3, 4(a1)
450 ; RV32IM-NEXT: lh a4, 12(a1)
451 ; RV32IM-NEXT: lh a1, 8(a1)
452 ; RV32IM-NEXT: lui a5, 706409
453 ; RV32IM-NEXT: addi a5, a5, 389
454 ; RV32IM-NEXT: mulh a6, a4, a5
455 ; RV32IM-NEXT: add a6, a6, a4
456 ; RV32IM-NEXT: srli a7, a6, 31
457 ; RV32IM-NEXT: srai a6, a6, 6
458 ; RV32IM-NEXT: add a6, a6, a7
459 ; RV32IM-NEXT: li a7, 95
460 ; RV32IM-NEXT: mul t0, a6, a7
461 ; RV32IM-NEXT: mulh t1, a1, a5
462 ; RV32IM-NEXT: add t1, t1, a1
463 ; RV32IM-NEXT: srli t2, t1, 31
464 ; RV32IM-NEXT: srai t1, t1, 6
465 ; RV32IM-NEXT: add t1, t1, t2
466 ; RV32IM-NEXT: mul t2, t1, a7
467 ; RV32IM-NEXT: mulh t3, a3, a5
468 ; RV32IM-NEXT: add t3, t3, a3
469 ; RV32IM-NEXT: srli t4, t3, 31
470 ; RV32IM-NEXT: srai t3, t3, 6
471 ; RV32IM-NEXT: add t3, t3, t4
472 ; RV32IM-NEXT: mul t4, t3, a7
473 ; RV32IM-NEXT: mulh a5, a2, a5
474 ; RV32IM-NEXT: add a5, a5, a2
475 ; RV32IM-NEXT: srli t5, a5, 31
476 ; RV32IM-NEXT: srai a5, a5, 6
477 ; RV32IM-NEXT: add a5, a5, t5
478 ; RV32IM-NEXT: mul a7, a5, a7
479 ; RV32IM-NEXT: add a2, a2, a5
480 ; RV32IM-NEXT: sub a2, a2, a7
481 ; RV32IM-NEXT: add a3, a3, t3
482 ; RV32IM-NEXT: sub a3, a3, t4
483 ; RV32IM-NEXT: add a1, a1, t1
484 ; RV32IM-NEXT: sub a1, a1, t2
485 ; RV32IM-NEXT: add a4, a4, a6
486 ; RV32IM-NEXT: sub a4, a4, t0
487 ; RV32IM-NEXT: sh a4, 6(a0)
488 ; RV32IM-NEXT: sh a1, 4(a0)
489 ; RV32IM-NEXT: sh a3, 2(a0)
490 ; RV32IM-NEXT: sh a2, 0(a0)
493 ; RV64I-LABEL: combine_srem_sdiv:
495 ; RV64I-NEXT: addi sp, sp, -80
496 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
497 ; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
498 ; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
499 ; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
500 ; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
501 ; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
502 ; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
503 ; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
504 ; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
505 ; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill
506 ; RV64I-NEXT: lh s1, 0(a1)
507 ; RV64I-NEXT: lh s2, 8(a1)
508 ; RV64I-NEXT: lh s3, 16(a1)
509 ; RV64I-NEXT: lh s4, 24(a1)
510 ; RV64I-NEXT: mv s0, a0
511 ; RV64I-NEXT: li a1, 95
512 ; RV64I-NEXT: mv a0, s4
513 ; RV64I-NEXT: call __moddi3
514 ; RV64I-NEXT: mv s5, a0
515 ; RV64I-NEXT: li a1, 95
516 ; RV64I-NEXT: mv a0, s3
517 ; RV64I-NEXT: call __moddi3
518 ; RV64I-NEXT: mv s6, a0
519 ; RV64I-NEXT: li a1, 95
520 ; RV64I-NEXT: mv a0, s2
521 ; RV64I-NEXT: call __moddi3
522 ; RV64I-NEXT: mv s7, a0
523 ; RV64I-NEXT: li a1, 95
524 ; RV64I-NEXT: mv a0, s1
525 ; RV64I-NEXT: call __moddi3
526 ; RV64I-NEXT: mv s8, a0
527 ; RV64I-NEXT: li a1, 95
528 ; RV64I-NEXT: mv a0, s4
529 ; RV64I-NEXT: call __divdi3
530 ; RV64I-NEXT: mv s4, a0
531 ; RV64I-NEXT: li a1, 95
532 ; RV64I-NEXT: mv a0, s3
533 ; RV64I-NEXT: call __divdi3
534 ; RV64I-NEXT: mv s3, a0
535 ; RV64I-NEXT: li a1, 95
536 ; RV64I-NEXT: mv a0, s2
537 ; RV64I-NEXT: call __divdi3
538 ; RV64I-NEXT: mv s2, a0
539 ; RV64I-NEXT: li a1, 95
540 ; RV64I-NEXT: mv a0, s1
541 ; RV64I-NEXT: call __divdi3
542 ; RV64I-NEXT: add a0, s8, a0
543 ; RV64I-NEXT: add s2, s7, s2
544 ; RV64I-NEXT: add s3, s6, s3
545 ; RV64I-NEXT: add s4, s5, s4
546 ; RV64I-NEXT: sh s4, 6(s0)
547 ; RV64I-NEXT: sh s3, 4(s0)
548 ; RV64I-NEXT: sh s2, 2(s0)
549 ; RV64I-NEXT: sh a0, 0(s0)
550 ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
551 ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
552 ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
553 ; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
554 ; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
555 ; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
556 ; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
557 ; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
558 ; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
559 ; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload
560 ; RV64I-NEXT: addi sp, sp, 80
563 ; RV64IM-LABEL: combine_srem_sdiv:
565 ; RV64IM-NEXT: lh a2, 24(a1)
566 ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0)
567 ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3)
568 ; RV64IM-NEXT: lh a4, 0(a1)
569 ; RV64IM-NEXT: lh a5, 8(a1)
570 ; RV64IM-NEXT: lh a1, 16(a1)
571 ; RV64IM-NEXT: mulh a6, a2, a3
572 ; RV64IM-NEXT: add a6, a6, a2
573 ; RV64IM-NEXT: srli a7, a6, 63
574 ; RV64IM-NEXT: srai a6, a6, 6
575 ; RV64IM-NEXT: add a6, a6, a7
576 ; RV64IM-NEXT: li a7, 95
577 ; RV64IM-NEXT: mul t0, a6, a7
578 ; RV64IM-NEXT: mulh t1, a1, a3
579 ; RV64IM-NEXT: add t1, t1, a1
580 ; RV64IM-NEXT: srli t2, t1, 63
581 ; RV64IM-NEXT: srai t1, t1, 6
582 ; RV64IM-NEXT: add t1, t1, t2
583 ; RV64IM-NEXT: mul t2, t1, a7
584 ; RV64IM-NEXT: mulh t3, a5, a3
585 ; RV64IM-NEXT: add t3, t3, a5
586 ; RV64IM-NEXT: srli t4, t3, 63
587 ; RV64IM-NEXT: srai t3, t3, 6
588 ; RV64IM-NEXT: add t3, t3, t4
589 ; RV64IM-NEXT: mul t4, t3, a7
590 ; RV64IM-NEXT: mulh a3, a4, a3
591 ; RV64IM-NEXT: add a3, a3, a4
592 ; RV64IM-NEXT: srli t5, a3, 63
593 ; RV64IM-NEXT: srai a3, a3, 6
594 ; RV64IM-NEXT: add a3, a3, t5
595 ; RV64IM-NEXT: mul a7, a3, a7
596 ; RV64IM-NEXT: add a3, a4, a3
597 ; RV64IM-NEXT: subw a3, a3, a7
598 ; RV64IM-NEXT: add a5, a5, t3
599 ; RV64IM-NEXT: subw a4, a5, t4
600 ; RV64IM-NEXT: add a1, a1, t1
601 ; RV64IM-NEXT: subw a1, a1, t2
602 ; RV64IM-NEXT: add a2, a2, a6
603 ; RV64IM-NEXT: subw a2, a2, t0
604 ; RV64IM-NEXT: sh a2, 6(a0)
605 ; RV64IM-NEXT: sh a1, 4(a0)
606 ; RV64IM-NEXT: sh a4, 2(a0)
607 ; RV64IM-NEXT: sh a3, 0(a0)
609 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
610 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
611 %3 = add <4 x i16> %1, %2
615 ; Don't fold for divisors that are a power of two.
616 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
617 ; RV32I-LABEL: dont_fold_srem_power_of_two:
619 ; RV32I-NEXT: addi sp, sp, -32
620 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
621 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
622 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
623 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
624 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
625 ; RV32I-NEXT: mv s0, a0
626 ; RV32I-NEXT: lh a2, 0(a1)
627 ; RV32I-NEXT: lh a0, 12(a1)
628 ; RV32I-NEXT: lh a3, 8(a1)
629 ; RV32I-NEXT: lh a1, 4(a1)
630 ; RV32I-NEXT: srli a4, a2, 26
631 ; RV32I-NEXT: add a4, a2, a4
632 ; RV32I-NEXT: andi a4, a4, -64
633 ; RV32I-NEXT: sub s1, a2, a4
634 ; RV32I-NEXT: srli a2, a1, 27
635 ; RV32I-NEXT: add a2, a1, a2
636 ; RV32I-NEXT: andi a2, a2, -32
637 ; RV32I-NEXT: sub s2, a1, a2
638 ; RV32I-NEXT: srli a1, a3, 29
639 ; RV32I-NEXT: add a1, a3, a1
640 ; RV32I-NEXT: andi a1, a1, -8
641 ; RV32I-NEXT: sub s3, a3, a1
642 ; RV32I-NEXT: li a1, 95
643 ; RV32I-NEXT: call __modsi3
644 ; RV32I-NEXT: sh a0, 6(s0)
645 ; RV32I-NEXT: sh s3, 4(s0)
646 ; RV32I-NEXT: sh s2, 2(s0)
647 ; RV32I-NEXT: sh s1, 0(s0)
648 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
649 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
650 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
651 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
652 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
653 ; RV32I-NEXT: addi sp, sp, 32
656 ; RV32IM-LABEL: dont_fold_srem_power_of_two:
658 ; RV32IM-NEXT: lh a2, 8(a1)
659 ; RV32IM-NEXT: lh a3, 4(a1)
660 ; RV32IM-NEXT: lh a4, 12(a1)
661 ; RV32IM-NEXT: lh a1, 0(a1)
662 ; RV32IM-NEXT: lui a5, 706409
663 ; RV32IM-NEXT: addi a5, a5, 389
664 ; RV32IM-NEXT: mulh a5, a4, a5
665 ; RV32IM-NEXT: add a5, a5, a4
666 ; RV32IM-NEXT: srli a6, a5, 31
667 ; RV32IM-NEXT: srli a5, a5, 6
668 ; RV32IM-NEXT: add a5, a5, a6
669 ; RV32IM-NEXT: li a6, 95
670 ; RV32IM-NEXT: mul a5, a5, a6
671 ; RV32IM-NEXT: sub a4, a4, a5
672 ; RV32IM-NEXT: srli a5, a1, 26
673 ; RV32IM-NEXT: add a5, a1, a5
674 ; RV32IM-NEXT: andi a5, a5, -64
675 ; RV32IM-NEXT: sub a1, a1, a5
676 ; RV32IM-NEXT: srli a5, a3, 27
677 ; RV32IM-NEXT: add a5, a3, a5
678 ; RV32IM-NEXT: andi a5, a5, -32
679 ; RV32IM-NEXT: sub a3, a3, a5
680 ; RV32IM-NEXT: srli a5, a2, 29
681 ; RV32IM-NEXT: add a5, a2, a5
682 ; RV32IM-NEXT: andi a5, a5, -8
683 ; RV32IM-NEXT: sub a2, a2, a5
684 ; RV32IM-NEXT: sh a2, 4(a0)
685 ; RV32IM-NEXT: sh a3, 2(a0)
686 ; RV32IM-NEXT: sh a1, 0(a0)
687 ; RV32IM-NEXT: sh a4, 6(a0)
690 ; RV64I-LABEL: dont_fold_srem_power_of_two:
692 ; RV64I-NEXT: addi sp, sp, -48
693 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
694 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
695 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
696 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
697 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
698 ; RV64I-NEXT: mv s0, a0
699 ; RV64I-NEXT: lh a2, 0(a1)
700 ; RV64I-NEXT: lh a0, 24(a1)
701 ; RV64I-NEXT: lh a3, 16(a1)
702 ; RV64I-NEXT: lh a1, 8(a1)
703 ; RV64I-NEXT: srli a4, a2, 58
704 ; RV64I-NEXT: add a4, a2, a4
705 ; RV64I-NEXT: andi a4, a4, -64
706 ; RV64I-NEXT: subw s1, a2, a4
707 ; RV64I-NEXT: srli a2, a1, 59
708 ; RV64I-NEXT: add a2, a1, a2
709 ; RV64I-NEXT: andi a2, a2, -32
710 ; RV64I-NEXT: subw s2, a1, a2
711 ; RV64I-NEXT: srli a1, a3, 61
712 ; RV64I-NEXT: add a1, a3, a1
713 ; RV64I-NEXT: andi a1, a1, -8
714 ; RV64I-NEXT: subw s3, a3, a1
715 ; RV64I-NEXT: li a1, 95
716 ; RV64I-NEXT: call __moddi3
717 ; RV64I-NEXT: sh a0, 6(s0)
718 ; RV64I-NEXT: sh s3, 4(s0)
719 ; RV64I-NEXT: sh s2, 2(s0)
720 ; RV64I-NEXT: sh s1, 0(s0)
721 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
722 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
723 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
724 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
725 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
726 ; RV64I-NEXT: addi sp, sp, 48
729 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
731 ; RV64IM-NEXT: lh a2, 24(a1)
732 ; RV64IM-NEXT: lui a3, %hi(.LCPI3_0)
733 ; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3)
734 ; RV64IM-NEXT: lh a4, 16(a1)
735 ; RV64IM-NEXT: lh a5, 8(a1)
736 ; RV64IM-NEXT: lh a1, 0(a1)
737 ; RV64IM-NEXT: mulh a3, a2, a3
738 ; RV64IM-NEXT: add a3, a3, a2
739 ; RV64IM-NEXT: srli a6, a3, 63
740 ; RV64IM-NEXT: srli a3, a3, 6
741 ; RV64IM-NEXT: add a3, a3, a6
742 ; RV64IM-NEXT: li a6, 95
743 ; RV64IM-NEXT: mul a3, a3, a6
744 ; RV64IM-NEXT: subw a2, a2, a3
745 ; RV64IM-NEXT: srli a3, a1, 58
746 ; RV64IM-NEXT: add a3, a1, a3
747 ; RV64IM-NEXT: andi a3, a3, -64
748 ; RV64IM-NEXT: subw a1, a1, a3
749 ; RV64IM-NEXT: srli a3, a5, 59
750 ; RV64IM-NEXT: add a3, a5, a3
751 ; RV64IM-NEXT: andi a3, a3, -32
752 ; RV64IM-NEXT: subw a5, a5, a3
753 ; RV64IM-NEXT: srli a3, a4, 61
754 ; RV64IM-NEXT: add a3, a4, a3
755 ; RV64IM-NEXT: andi a3, a3, -8
756 ; RV64IM-NEXT: subw a4, a4, a3
757 ; RV64IM-NEXT: sh a4, 4(a0)
758 ; RV64IM-NEXT: sh a5, 2(a0)
759 ; RV64IM-NEXT: sh a1, 0(a0)
760 ; RV64IM-NEXT: sh a2, 6(a0)
762 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
766 ; Don't fold if the divisor is one.
767 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
768 ; RV32I-LABEL: dont_fold_srem_one:
770 ; RV32I-NEXT: addi sp, sp, -32
771 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
772 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
773 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
774 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
775 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
776 ; RV32I-NEXT: lh s0, 12(a1)
777 ; RV32I-NEXT: lh s1, 8(a1)
778 ; RV32I-NEXT: lh a2, 4(a1)
779 ; RV32I-NEXT: mv s2, a0
780 ; RV32I-NEXT: li a1, 654
781 ; RV32I-NEXT: mv a0, a2
782 ; RV32I-NEXT: call __modsi3
783 ; RV32I-NEXT: mv s3, a0
784 ; RV32I-NEXT: li a1, 23
785 ; RV32I-NEXT: mv a0, s1
786 ; RV32I-NEXT: call __modsi3
787 ; RV32I-NEXT: mv s1, a0
788 ; RV32I-NEXT: lui a0, 1
789 ; RV32I-NEXT: addi a1, a0, 1327
790 ; RV32I-NEXT: mv a0, s0
791 ; RV32I-NEXT: call __modsi3
792 ; RV32I-NEXT: sh a0, 6(s2)
793 ; RV32I-NEXT: sh s1, 4(s2)
794 ; RV32I-NEXT: sh s3, 2(s2)
795 ; RV32I-NEXT: sh zero, 0(s2)
796 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
797 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
798 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
799 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
800 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
801 ; RV32I-NEXT: addi sp, sp, 32
804 ; RV32IM-LABEL: dont_fold_srem_one:
806 ; RV32IM-NEXT: lh a2, 12(a1)
807 ; RV32IM-NEXT: lh a3, 4(a1)
808 ; RV32IM-NEXT: lh a1, 8(a1)
809 ; RV32IM-NEXT: lui a4, 820904
810 ; RV32IM-NEXT: addi a4, a4, -1903
811 ; RV32IM-NEXT: mulh a4, a3, a4
812 ; RV32IM-NEXT: add a4, a4, a3
813 ; RV32IM-NEXT: srli a5, a4, 31
814 ; RV32IM-NEXT: srli a4, a4, 9
815 ; RV32IM-NEXT: add a4, a4, a5
816 ; RV32IM-NEXT: li a5, 654
817 ; RV32IM-NEXT: mul a4, a4, a5
818 ; RV32IM-NEXT: sub a3, a3, a4
819 ; RV32IM-NEXT: lui a4, 729444
820 ; RV32IM-NEXT: addi a4, a4, 713
821 ; RV32IM-NEXT: mulh a4, a1, a4
822 ; RV32IM-NEXT: add a4, a4, a1
823 ; RV32IM-NEXT: srli a5, a4, 31
824 ; RV32IM-NEXT: srli a4, a4, 4
825 ; RV32IM-NEXT: add a4, a4, a5
826 ; RV32IM-NEXT: li a5, 23
827 ; RV32IM-NEXT: mul a4, a4, a5
828 ; RV32IM-NEXT: sub a1, a1, a4
829 ; RV32IM-NEXT: lui a4, 395996
830 ; RV32IM-NEXT: addi a4, a4, -2009
831 ; RV32IM-NEXT: mulh a4, a2, a4
832 ; RV32IM-NEXT: srli a5, a4, 31
833 ; RV32IM-NEXT: srli a4, a4, 11
834 ; RV32IM-NEXT: add a4, a4, a5
835 ; RV32IM-NEXT: lui a5, 1
836 ; RV32IM-NEXT: addi a5, a5, 1327
837 ; RV32IM-NEXT: mul a4, a4, a5
838 ; RV32IM-NEXT: sub a2, a2, a4
839 ; RV32IM-NEXT: sh zero, 0(a0)
840 ; RV32IM-NEXT: sh a2, 6(a0)
841 ; RV32IM-NEXT: sh a1, 4(a0)
842 ; RV32IM-NEXT: sh a3, 2(a0)
845 ; RV64I-LABEL: dont_fold_srem_one:
847 ; RV64I-NEXT: addi sp, sp, -48
848 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
849 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
850 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
851 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
852 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
853 ; RV64I-NEXT: lh s0, 24(a1)
854 ; RV64I-NEXT: lh s1, 16(a1)
855 ; RV64I-NEXT: lh a2, 8(a1)
856 ; RV64I-NEXT: mv s2, a0
857 ; RV64I-NEXT: li a1, 654
858 ; RV64I-NEXT: mv a0, a2
859 ; RV64I-NEXT: call __moddi3
860 ; RV64I-NEXT: mv s3, a0
861 ; RV64I-NEXT: li a1, 23
862 ; RV64I-NEXT: mv a0, s1
863 ; RV64I-NEXT: call __moddi3
864 ; RV64I-NEXT: mv s1, a0
865 ; RV64I-NEXT: lui a0, 1
866 ; RV64I-NEXT: addiw a1, a0, 1327
867 ; RV64I-NEXT: mv a0, s0
868 ; RV64I-NEXT: call __moddi3
869 ; RV64I-NEXT: sh a0, 6(s2)
870 ; RV64I-NEXT: sh s1, 4(s2)
871 ; RV64I-NEXT: sh s3, 2(s2)
872 ; RV64I-NEXT: sh zero, 0(s2)
873 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
874 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
875 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
876 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
877 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
878 ; RV64I-NEXT: addi sp, sp, 48
881 ; RV64IM-LABEL: dont_fold_srem_one:
883 ; RV64IM-NEXT: lh a2, 16(a1)
884 ; RV64IM-NEXT: lui a3, %hi(.LCPI4_0)
885 ; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3)
886 ; RV64IM-NEXT: lh a4, 24(a1)
887 ; RV64IM-NEXT: lh a1, 8(a1)
888 ; RV64IM-NEXT: mulh a3, a2, a3
889 ; RV64IM-NEXT: add a3, a3, a2
890 ; RV64IM-NEXT: srli a5, a3, 63
891 ; RV64IM-NEXT: srli a3, a3, 4
892 ; RV64IM-NEXT: add a3, a3, a5
893 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1)
894 ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5)
895 ; RV64IM-NEXT: li a6, 23
896 ; RV64IM-NEXT: mul a3, a3, a6
897 ; RV64IM-NEXT: subw a2, a2, a3
898 ; RV64IM-NEXT: mulh a3, a1, a5
899 ; RV64IM-NEXT: srli a5, a3, 63
900 ; RV64IM-NEXT: srli a3, a3, 8
901 ; RV64IM-NEXT: add a3, a3, a5
902 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2)
903 ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5)
904 ; RV64IM-NEXT: li a6, 654
905 ; RV64IM-NEXT: mul a3, a3, a6
906 ; RV64IM-NEXT: subw a1, a1, a3
907 ; RV64IM-NEXT: mulh a3, a4, a5
908 ; RV64IM-NEXT: srli a5, a3, 63
909 ; RV64IM-NEXT: srli a3, a3, 11
910 ; RV64IM-NEXT: add a3, a3, a5
911 ; RV64IM-NEXT: lui a5, 1
912 ; RV64IM-NEXT: addi a5, a5, 1327
913 ; RV64IM-NEXT: mul a3, a3, a5
914 ; RV64IM-NEXT: subw a4, a4, a3
915 ; RV64IM-NEXT: sh zero, 0(a0)
916 ; RV64IM-NEXT: sh a4, 6(a0)
917 ; RV64IM-NEXT: sh a1, 2(a0)
918 ; RV64IM-NEXT: sh a2, 4(a0)
920 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
924 ; Don't fold if the divisor is 2^15.
925 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
926 ; RV32I-LABEL: dont_fold_urem_i16_smax:
928 ; RV32I-NEXT: addi sp, sp, -32
929 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
930 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
931 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
932 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
933 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
934 ; RV32I-NEXT: lh a2, 4(a1)
935 ; RV32I-NEXT: mv s0, a0
936 ; RV32I-NEXT: lh s1, 12(a1)
937 ; RV32I-NEXT: lh a0, 8(a1)
938 ; RV32I-NEXT: srli a1, a2, 17
939 ; RV32I-NEXT: add a1, a2, a1
940 ; RV32I-NEXT: lui a3, 8
941 ; RV32I-NEXT: and a1, a1, a3
942 ; RV32I-NEXT: sub s3, a2, a1
943 ; RV32I-NEXT: li a1, 23
944 ; RV32I-NEXT: call __modsi3
945 ; RV32I-NEXT: mv s2, a0
946 ; RV32I-NEXT: lui a0, 1
947 ; RV32I-NEXT: addi a1, a0, 1327
948 ; RV32I-NEXT: mv a0, s1
949 ; RV32I-NEXT: call __modsi3
950 ; RV32I-NEXT: sh a0, 6(s0)
951 ; RV32I-NEXT: sh s2, 4(s0)
952 ; RV32I-NEXT: sh zero, 0(s0)
953 ; RV32I-NEXT: sh s3, 2(s0)
954 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
955 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
956 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
957 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
958 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
959 ; RV32I-NEXT: addi sp, sp, 32
962 ; RV32IM-LABEL: dont_fold_urem_i16_smax:
964 ; RV32IM-NEXT: lh a2, 4(a1)
965 ; RV32IM-NEXT: lh a3, 8(a1)
966 ; RV32IM-NEXT: lh a1, 12(a1)
967 ; RV32IM-NEXT: lui a4, 729444
968 ; RV32IM-NEXT: addi a4, a4, 713
969 ; RV32IM-NEXT: mulh a4, a3, a4
970 ; RV32IM-NEXT: add a4, a4, a3
971 ; RV32IM-NEXT: srli a5, a4, 31
972 ; RV32IM-NEXT: srli a4, a4, 4
973 ; RV32IM-NEXT: add a4, a4, a5
974 ; RV32IM-NEXT: li a5, 23
975 ; RV32IM-NEXT: mul a4, a4, a5
976 ; RV32IM-NEXT: sub a3, a3, a4
977 ; RV32IM-NEXT: lui a4, 395996
978 ; RV32IM-NEXT: addi a4, a4, -2009
979 ; RV32IM-NEXT: mulh a4, a1, a4
980 ; RV32IM-NEXT: srli a5, a4, 31
981 ; RV32IM-NEXT: srli a4, a4, 11
982 ; RV32IM-NEXT: add a4, a4, a5
983 ; RV32IM-NEXT: lui a5, 1
984 ; RV32IM-NEXT: addi a5, a5, 1327
985 ; RV32IM-NEXT: mul a4, a4, a5
986 ; RV32IM-NEXT: sub a1, a1, a4
987 ; RV32IM-NEXT: srli a4, a2, 17
988 ; RV32IM-NEXT: add a4, a2, a4
989 ; RV32IM-NEXT: lui a5, 8
990 ; RV32IM-NEXT: and a4, a4, a5
991 ; RV32IM-NEXT: sub a2, a2, a4
992 ; RV32IM-NEXT: sh zero, 0(a0)
993 ; RV32IM-NEXT: sh a2, 2(a0)
994 ; RV32IM-NEXT: sh a1, 6(a0)
995 ; RV32IM-NEXT: sh a3, 4(a0)
998 ; RV64I-LABEL: dont_fold_urem_i16_smax:
1000 ; RV64I-NEXT: addi sp, sp, -48
1001 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1002 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1003 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1004 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1005 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1006 ; RV64I-NEXT: lh a2, 8(a1)
1007 ; RV64I-NEXT: mv s0, a0
1008 ; RV64I-NEXT: lh s1, 24(a1)
1009 ; RV64I-NEXT: lh a0, 16(a1)
1010 ; RV64I-NEXT: srli a1, a2, 49
1011 ; RV64I-NEXT: add a1, a2, a1
1012 ; RV64I-NEXT: lui a3, 8
1013 ; RV64I-NEXT: and a1, a1, a3
1014 ; RV64I-NEXT: subw s3, a2, a1
1015 ; RV64I-NEXT: li a1, 23
1016 ; RV64I-NEXT: call __moddi3
1017 ; RV64I-NEXT: mv s2, a0
1018 ; RV64I-NEXT: lui a0, 1
1019 ; RV64I-NEXT: addiw a1, a0, 1327
1020 ; RV64I-NEXT: mv a0, s1
1021 ; RV64I-NEXT: call __moddi3
1022 ; RV64I-NEXT: sh a0, 6(s0)
1023 ; RV64I-NEXT: sh s2, 4(s0)
1024 ; RV64I-NEXT: sh zero, 0(s0)
1025 ; RV64I-NEXT: sh s3, 2(s0)
1026 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1027 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1028 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1029 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1030 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1031 ; RV64I-NEXT: addi sp, sp, 48
1034 ; RV64IM-LABEL: dont_fold_urem_i16_smax:
1036 ; RV64IM-NEXT: lh a2, 16(a1)
1037 ; RV64IM-NEXT: lui a3, %hi(.LCPI5_0)
1038 ; RV64IM-NEXT: ld a3, %lo(.LCPI5_0)(a3)
1039 ; RV64IM-NEXT: lh a4, 24(a1)
1040 ; RV64IM-NEXT: mulh a3, a2, a3
1041 ; RV64IM-NEXT: add a3, a3, a2
1042 ; RV64IM-NEXT: srli a5, a3, 63
1043 ; RV64IM-NEXT: srli a3, a3, 4
1044 ; RV64IM-NEXT: add a3, a3, a5
1045 ; RV64IM-NEXT: li a5, 23
1046 ; RV64IM-NEXT: lui a6, %hi(.LCPI5_1)
1047 ; RV64IM-NEXT: ld a6, %lo(.LCPI5_1)(a6)
1048 ; RV64IM-NEXT: mul a3, a3, a5
1049 ; RV64IM-NEXT: lh a1, 8(a1)
1050 ; RV64IM-NEXT: subw a2, a2, a3
1051 ; RV64IM-NEXT: mulh a3, a4, a6
1052 ; RV64IM-NEXT: srli a5, a3, 63
1053 ; RV64IM-NEXT: srli a3, a3, 11
1054 ; RV64IM-NEXT: add a3, a3, a5
1055 ; RV64IM-NEXT: lui a5, 1
1056 ; RV64IM-NEXT: addi a5, a5, 1327
1057 ; RV64IM-NEXT: mul a3, a3, a5
1058 ; RV64IM-NEXT: subw a4, a4, a3
1059 ; RV64IM-NEXT: srli a3, a1, 49
1060 ; RV64IM-NEXT: add a3, a1, a3
1061 ; RV64IM-NEXT: lui a5, 8
1062 ; RV64IM-NEXT: and a3, a3, a5
1063 ; RV64IM-NEXT: subw a1, a1, a3
1064 ; RV64IM-NEXT: sh zero, 0(a0)
1065 ; RV64IM-NEXT: sh a1, 2(a0)
1066 ; RV64IM-NEXT: sh a4, 6(a0)
1067 ; RV64IM-NEXT: sh a2, 4(a0)
1069 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1073 ; Don't fold i64 srem.
1074 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
1075 ; RV32I-LABEL: dont_fold_srem_i64:
1077 ; RV32I-NEXT: addi sp, sp, -48
1078 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
1079 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
1080 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
1081 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
1082 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
1083 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
1084 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
1085 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
1086 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
1087 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
1088 ; RV32I-NEXT: lw s0, 24(a1)
1089 ; RV32I-NEXT: lw s1, 28(a1)
1090 ; RV32I-NEXT: lw s2, 16(a1)
1091 ; RV32I-NEXT: lw s3, 20(a1)
1092 ; RV32I-NEXT: lw s4, 8(a1)
1093 ; RV32I-NEXT: lw s5, 12(a1)
1094 ; RV32I-NEXT: lw a3, 0(a1)
1095 ; RV32I-NEXT: lw a1, 4(a1)
1096 ; RV32I-NEXT: mv s6, a0
1097 ; RV32I-NEXT: li a2, 1
1098 ; RV32I-NEXT: mv a0, a3
1099 ; RV32I-NEXT: li a3, 0
1100 ; RV32I-NEXT: call __moddi3
1101 ; RV32I-NEXT: mv s7, a0
1102 ; RV32I-NEXT: mv s8, a1
1103 ; RV32I-NEXT: li a2, 654
1104 ; RV32I-NEXT: mv a0, s4
1105 ; RV32I-NEXT: mv a1, s5
1106 ; RV32I-NEXT: li a3, 0
1107 ; RV32I-NEXT: call __moddi3
1108 ; RV32I-NEXT: mv s4, a0
1109 ; RV32I-NEXT: mv s5, a1
1110 ; RV32I-NEXT: li a2, 23
1111 ; RV32I-NEXT: mv a0, s2
1112 ; RV32I-NEXT: mv a1, s3
1113 ; RV32I-NEXT: li a3, 0
1114 ; RV32I-NEXT: call __moddi3
1115 ; RV32I-NEXT: mv s2, a0
1116 ; RV32I-NEXT: mv s3, a1
1117 ; RV32I-NEXT: lui a0, 1
1118 ; RV32I-NEXT: addi a2, a0, 1327
1119 ; RV32I-NEXT: mv a0, s0
1120 ; RV32I-NEXT: mv a1, s1
1121 ; RV32I-NEXT: li a3, 0
1122 ; RV32I-NEXT: call __moddi3
1123 ; RV32I-NEXT: sw a1, 28(s6)
1124 ; RV32I-NEXT: sw a0, 24(s6)
1125 ; RV32I-NEXT: sw s3, 20(s6)
1126 ; RV32I-NEXT: sw s2, 16(s6)
1127 ; RV32I-NEXT: sw s5, 12(s6)
1128 ; RV32I-NEXT: sw s4, 8(s6)
1129 ; RV32I-NEXT: sw s8, 4(s6)
1130 ; RV32I-NEXT: sw s7, 0(s6)
1131 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
1132 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
1133 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
1134 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
1135 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
1136 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
1137 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
1138 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
1139 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
1140 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
1141 ; RV32I-NEXT: addi sp, sp, 48
1144 ; RV32IM-LABEL: dont_fold_srem_i64:
1146 ; RV32IM-NEXT: addi sp, sp, -48
1147 ; RV32IM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
1148 ; RV32IM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
1149 ; RV32IM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
1150 ; RV32IM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
1151 ; RV32IM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
1152 ; RV32IM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
1153 ; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
1154 ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
1155 ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
1156 ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
1157 ; RV32IM-NEXT: lw s0, 24(a1)
1158 ; RV32IM-NEXT: lw s1, 28(a1)
1159 ; RV32IM-NEXT: lw s2, 16(a1)
1160 ; RV32IM-NEXT: lw s3, 20(a1)
1161 ; RV32IM-NEXT: lw s4, 8(a1)
1162 ; RV32IM-NEXT: lw s5, 12(a1)
1163 ; RV32IM-NEXT: lw a3, 0(a1)
1164 ; RV32IM-NEXT: lw a1, 4(a1)
1165 ; RV32IM-NEXT: mv s6, a0
1166 ; RV32IM-NEXT: li a2, 1
1167 ; RV32IM-NEXT: mv a0, a3
1168 ; RV32IM-NEXT: li a3, 0
1169 ; RV32IM-NEXT: call __moddi3
1170 ; RV32IM-NEXT: mv s7, a0
1171 ; RV32IM-NEXT: mv s8, a1
1172 ; RV32IM-NEXT: li a2, 654
1173 ; RV32IM-NEXT: mv a0, s4
1174 ; RV32IM-NEXT: mv a1, s5
1175 ; RV32IM-NEXT: li a3, 0
1176 ; RV32IM-NEXT: call __moddi3
1177 ; RV32IM-NEXT: mv s4, a0
1178 ; RV32IM-NEXT: mv s5, a1
1179 ; RV32IM-NEXT: li a2, 23
1180 ; RV32IM-NEXT: mv a0, s2
1181 ; RV32IM-NEXT: mv a1, s3
1182 ; RV32IM-NEXT: li a3, 0
1183 ; RV32IM-NEXT: call __moddi3
1184 ; RV32IM-NEXT: mv s2, a0
1185 ; RV32IM-NEXT: mv s3, a1
1186 ; RV32IM-NEXT: lui a0, 1
1187 ; RV32IM-NEXT: addi a2, a0, 1327
1188 ; RV32IM-NEXT: mv a0, s0
1189 ; RV32IM-NEXT: mv a1, s1
1190 ; RV32IM-NEXT: li a3, 0
1191 ; RV32IM-NEXT: call __moddi3
1192 ; RV32IM-NEXT: sw a1, 28(s6)
1193 ; RV32IM-NEXT: sw a0, 24(s6)
1194 ; RV32IM-NEXT: sw s3, 20(s6)
1195 ; RV32IM-NEXT: sw s2, 16(s6)
1196 ; RV32IM-NEXT: sw s5, 12(s6)
1197 ; RV32IM-NEXT: sw s4, 8(s6)
1198 ; RV32IM-NEXT: sw s8, 4(s6)
1199 ; RV32IM-NEXT: sw s7, 0(s6)
1200 ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
1201 ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
1202 ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
1203 ; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
1204 ; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
1205 ; RV32IM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
1206 ; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
1207 ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
1208 ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
1209 ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
1210 ; RV32IM-NEXT: addi sp, sp, 48
1213 ; RV64I-LABEL: dont_fold_srem_i64:
1215 ; RV64I-NEXT: addi sp, sp, -48
1216 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1217 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1218 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1219 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1220 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1221 ; RV64I-NEXT: ld s0, 24(a1)
1222 ; RV64I-NEXT: ld s1, 16(a1)
1223 ; RV64I-NEXT: ld a2, 8(a1)
1224 ; RV64I-NEXT: mv s2, a0
1225 ; RV64I-NEXT: li a1, 654
1226 ; RV64I-NEXT: mv a0, a2
1227 ; RV64I-NEXT: call __moddi3
1228 ; RV64I-NEXT: mv s3, a0
1229 ; RV64I-NEXT: li a1, 23
1230 ; RV64I-NEXT: mv a0, s1
1231 ; RV64I-NEXT: call __moddi3
1232 ; RV64I-NEXT: mv s1, a0
1233 ; RV64I-NEXT: lui a0, 1
1234 ; RV64I-NEXT: addiw a1, a0, 1327
1235 ; RV64I-NEXT: mv a0, s0
1236 ; RV64I-NEXT: call __moddi3
1237 ; RV64I-NEXT: sd a0, 24(s2)
1238 ; RV64I-NEXT: sd s1, 16(s2)
1239 ; RV64I-NEXT: sd s3, 8(s2)
1240 ; RV64I-NEXT: sd zero, 0(s2)
1241 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1242 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1243 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1244 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1245 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1246 ; RV64I-NEXT: addi sp, sp, 48
1249 ; RV64IM-LABEL: dont_fold_srem_i64:
1251 ; RV64IM-NEXT: ld a2, 16(a1)
1252 ; RV64IM-NEXT: lui a3, %hi(.LCPI6_0)
1253 ; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3)
1254 ; RV64IM-NEXT: ld a4, 24(a1)
1255 ; RV64IM-NEXT: ld a1, 8(a1)
1256 ; RV64IM-NEXT: mulh a3, a2, a3
1257 ; RV64IM-NEXT: add a3, a3, a2
1258 ; RV64IM-NEXT: srli a5, a3, 63
1259 ; RV64IM-NEXT: srai a3, a3, 4
1260 ; RV64IM-NEXT: add a3, a3, a5
1261 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_1)
1262 ; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5)
1263 ; RV64IM-NEXT: li a6, 23
1264 ; RV64IM-NEXT: mul a3, a3, a6
1265 ; RV64IM-NEXT: sub a2, a2, a3
1266 ; RV64IM-NEXT: mulh a3, a1, a5
1267 ; RV64IM-NEXT: srli a5, a3, 63
1268 ; RV64IM-NEXT: srai a3, a3, 8
1269 ; RV64IM-NEXT: add a3, a3, a5
1270 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2)
1271 ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5)
1272 ; RV64IM-NEXT: li a6, 654
1273 ; RV64IM-NEXT: mul a3, a3, a6
1274 ; RV64IM-NEXT: sub a1, a1, a3
1275 ; RV64IM-NEXT: mulh a3, a4, a5
1276 ; RV64IM-NEXT: srli a5, a3, 63
1277 ; RV64IM-NEXT: srai a3, a3, 11
1278 ; RV64IM-NEXT: add a3, a3, a5
1279 ; RV64IM-NEXT: lui a5, 1
1280 ; RV64IM-NEXT: addiw a5, a5, 1327
1281 ; RV64IM-NEXT: mul a3, a3, a5
1282 ; RV64IM-NEXT: sub a4, a4, a3
1283 ; RV64IM-NEXT: sd zero, 0(a0)
1284 ; RV64IM-NEXT: sd a4, 24(a0)
1285 ; RV64IM-NEXT: sd a1, 8(a0)
1286 ; RV64IM-NEXT: sd a2, 16(a0)
1288 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>