1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck -check-prefix=RV32I %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5 ; RUN: | FileCheck -check-prefix=RV32IM %s
6 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7 ; RUN: | FileCheck -check-prefix=RV64I %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9 ; RUN: | FileCheck -check-prefix=RV64IM %s
11 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
12 ; RV32I-LABEL: fold_srem_vec_1:
14 ; RV32I-NEXT: addi sp, sp, -32
15 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
16 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
17 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
18 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
19 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
20 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
21 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
22 ; RV32I-NEXT: lh s2, 12(a1)
23 ; RV32I-NEXT: lh s3, 8(a1)
24 ; RV32I-NEXT: lh s0, 4(a1)
25 ; RV32I-NEXT: lh a2, 0(a1)
26 ; RV32I-NEXT: mv s1, a0
27 ; RV32I-NEXT: addi a1, zero, 95
28 ; RV32I-NEXT: mv a0, a2
29 ; RV32I-NEXT: call __modsi3@plt
30 ; RV32I-NEXT: mv s4, a0
31 ; RV32I-NEXT: addi a1, zero, -124
32 ; RV32I-NEXT: mv a0, s0
33 ; RV32I-NEXT: call __modsi3@plt
34 ; RV32I-NEXT: mv s5, a0
35 ; RV32I-NEXT: addi a1, zero, 98
36 ; RV32I-NEXT: mv a0, s3
37 ; RV32I-NEXT: call __modsi3@plt
38 ; RV32I-NEXT: mv s0, a0
39 ; RV32I-NEXT: addi a1, zero, -1003
40 ; RV32I-NEXT: mv a0, s2
41 ; RV32I-NEXT: call __modsi3@plt
42 ; RV32I-NEXT: sh a0, 6(s1)
43 ; RV32I-NEXT: sh s0, 4(s1)
44 ; RV32I-NEXT: sh s5, 2(s1)
45 ; RV32I-NEXT: sh s4, 0(s1)
46 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
47 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
48 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
49 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
50 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
51 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
52 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
53 ; RV32I-NEXT: addi sp, sp, 32
56 ; RV32IM-LABEL: fold_srem_vec_1:
58 ; RV32IM-NEXT: lh a6, 12(a1)
59 ; RV32IM-NEXT: lh a3, 8(a1)
60 ; RV32IM-NEXT: lh a4, 0(a1)
61 ; RV32IM-NEXT: lh a1, 4(a1)
62 ; RV32IM-NEXT: lui a5, 706409
63 ; RV32IM-NEXT: addi a5, a5, 389
64 ; RV32IM-NEXT: mulh a5, a4, a5
65 ; RV32IM-NEXT: add a5, a5, a4
66 ; RV32IM-NEXT: srli a2, a5, 31
67 ; RV32IM-NEXT: srli a5, a5, 6
68 ; RV32IM-NEXT: add a2, a5, a2
69 ; RV32IM-NEXT: addi a5, zero, 95
70 ; RV32IM-NEXT: mul a2, a2, a5
71 ; RV32IM-NEXT: sub a2, a4, a2
72 ; RV32IM-NEXT: lui a4, 507375
73 ; RV32IM-NEXT: addi a4, a4, 1981
74 ; RV32IM-NEXT: mulh a4, a1, a4
75 ; RV32IM-NEXT: sub a4, a4, a1
76 ; RV32IM-NEXT: srli a5, a4, 31
77 ; RV32IM-NEXT: srli a4, a4, 6
78 ; RV32IM-NEXT: add a4, a4, a5
79 ; RV32IM-NEXT: addi a5, zero, -124
80 ; RV32IM-NEXT: mul a4, a4, a5
81 ; RV32IM-NEXT: sub a1, a1, a4
82 ; RV32IM-NEXT: lui a4, 342392
83 ; RV32IM-NEXT: addi a4, a4, 669
84 ; RV32IM-NEXT: mulh a4, a3, a4
85 ; RV32IM-NEXT: srli a5, a4, 31
86 ; RV32IM-NEXT: srli a4, a4, 5
87 ; RV32IM-NEXT: add a4, a4, a5
88 ; RV32IM-NEXT: addi a5, zero, 98
89 ; RV32IM-NEXT: mul a4, a4, a5
90 ; RV32IM-NEXT: sub a3, a3, a4
91 ; RV32IM-NEXT: lui a4, 780943
92 ; RV32IM-NEXT: addi a4, a4, 1809
93 ; RV32IM-NEXT: mulh a4, a6, a4
94 ; RV32IM-NEXT: srli a5, a4, 31
95 ; RV32IM-NEXT: srli a4, a4, 8
96 ; RV32IM-NEXT: add a4, a4, a5
97 ; RV32IM-NEXT: addi a5, zero, -1003
98 ; RV32IM-NEXT: mul a4, a4, a5
99 ; RV32IM-NEXT: sub a4, a6, a4
100 ; RV32IM-NEXT: sh a4, 6(a0)
101 ; RV32IM-NEXT: sh a3, 4(a0)
102 ; RV32IM-NEXT: sh a1, 2(a0)
103 ; RV32IM-NEXT: sh a2, 0(a0)
106 ; RV64I-LABEL: fold_srem_vec_1:
108 ; RV64I-NEXT: addi sp, sp, -64
109 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
110 ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
111 ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
112 ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
113 ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
114 ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
115 ; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
116 ; RV64I-NEXT: lh s2, 24(a1)
117 ; RV64I-NEXT: lh s3, 16(a1)
118 ; RV64I-NEXT: lh s0, 8(a1)
119 ; RV64I-NEXT: lh a2, 0(a1)
120 ; RV64I-NEXT: mv s1, a0
121 ; RV64I-NEXT: addi a1, zero, 95
122 ; RV64I-NEXT: mv a0, a2
123 ; RV64I-NEXT: call __moddi3@plt
124 ; RV64I-NEXT: mv s4, a0
125 ; RV64I-NEXT: addi a1, zero, -124
126 ; RV64I-NEXT: mv a0, s0
127 ; RV64I-NEXT: call __moddi3@plt
128 ; RV64I-NEXT: mv s5, a0
129 ; RV64I-NEXT: addi a1, zero, 98
130 ; RV64I-NEXT: mv a0, s3
131 ; RV64I-NEXT: call __moddi3@plt
132 ; RV64I-NEXT: mv s0, a0
133 ; RV64I-NEXT: addi a1, zero, -1003
134 ; RV64I-NEXT: mv a0, s2
135 ; RV64I-NEXT: call __moddi3@plt
136 ; RV64I-NEXT: sh a0, 6(s1)
137 ; RV64I-NEXT: sh s0, 4(s1)
138 ; RV64I-NEXT: sh s5, 2(s1)
139 ; RV64I-NEXT: sh s4, 0(s1)
140 ; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
141 ; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
142 ; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
143 ; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
144 ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
145 ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
146 ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
147 ; RV64I-NEXT: addi sp, sp, 64
150 ; RV64IM-LABEL: fold_srem_vec_1:
152 ; RV64IM-NEXT: lh a6, 24(a1)
153 ; RV64IM-NEXT: lh a3, 16(a1)
154 ; RV64IM-NEXT: lh a4, 8(a1)
155 ; RV64IM-NEXT: lh a1, 0(a1)
156 ; RV64IM-NEXT: lui a5, 1045903
157 ; RV64IM-NEXT: addiw a5, a5, -733
158 ; RV64IM-NEXT: slli a5, a5, 15
159 ; RV64IM-NEXT: addi a5, a5, 1035
160 ; RV64IM-NEXT: slli a5, a5, 12
161 ; RV64IM-NEXT: addi a5, a5, -905
162 ; RV64IM-NEXT: slli a5, a5, 12
163 ; RV64IM-NEXT: addi a5, a5, -1767
164 ; RV64IM-NEXT: mulh a5, a1, a5
165 ; RV64IM-NEXT: add a5, a5, a1
166 ; RV64IM-NEXT: srli a2, a5, 63
167 ; RV64IM-NEXT: srli a5, a5, 6
168 ; RV64IM-NEXT: addw a2, a5, a2
169 ; RV64IM-NEXT: addi a5, zero, 95
170 ; RV64IM-NEXT: mulw a2, a2, a5
171 ; RV64IM-NEXT: subw a1, a1, a2
172 ; RV64IM-NEXT: lui a2, 777976
173 ; RV64IM-NEXT: addiw a2, a2, -1057
174 ; RV64IM-NEXT: slli a2, a2, 15
175 ; RV64IM-NEXT: addi a2, a2, -1057
176 ; RV64IM-NEXT: slli a2, a2, 14
177 ; RV64IM-NEXT: addi a2, a2, -529
178 ; RV64IM-NEXT: srli a2, a2, 1
179 ; RV64IM-NEXT: mulh a2, a4, a2
180 ; RV64IM-NEXT: sub a2, a2, a4
181 ; RV64IM-NEXT: srli a5, a2, 63
182 ; RV64IM-NEXT: srli a2, a2, 6
183 ; RV64IM-NEXT: addw a2, a2, a5
184 ; RV64IM-NEXT: addi a5, zero, -124
185 ; RV64IM-NEXT: mulw a2, a2, a5
186 ; RV64IM-NEXT: subw a2, a4, a2
187 ; RV64IM-NEXT: lui a4, 2675
188 ; RV64IM-NEXT: addiw a4, a4, -251
189 ; RV64IM-NEXT: slli a4, a4, 13
190 ; RV64IM-NEXT: addi a4, a4, 1839
191 ; RV64IM-NEXT: slli a4, a4, 13
192 ; RV64IM-NEXT: addi a4, a4, 167
193 ; RV64IM-NEXT: slli a4, a4, 13
194 ; RV64IM-NEXT: addi a4, a4, 1505
195 ; RV64IM-NEXT: mulh a4, a3, a4
196 ; RV64IM-NEXT: srli a5, a4, 63
197 ; RV64IM-NEXT: srli a4, a4, 5
198 ; RV64IM-NEXT: addw a4, a4, a5
199 ; RV64IM-NEXT: addi a5, zero, 98
200 ; RV64IM-NEXT: mulw a4, a4, a5
201 ; RV64IM-NEXT: subw a3, a3, a4
202 ; RV64IM-NEXT: lui a4, 1040212
203 ; RV64IM-NEXT: addiw a4, a4, 1977
204 ; RV64IM-NEXT: slli a4, a4, 12
205 ; RV64IM-NEXT: addi a4, a4, -1907
206 ; RV64IM-NEXT: slli a4, a4, 12
207 ; RV64IM-NEXT: addi a4, a4, -453
208 ; RV64IM-NEXT: slli a4, a4, 12
209 ; RV64IM-NEXT: addi a4, a4, -1213
210 ; RV64IM-NEXT: mulh a4, a6, a4
211 ; RV64IM-NEXT: srli a5, a4, 63
212 ; RV64IM-NEXT: srli a4, a4, 7
213 ; RV64IM-NEXT: addw a4, a4, a5
214 ; RV64IM-NEXT: addi a5, zero, -1003
215 ; RV64IM-NEXT: mulw a4, a4, a5
216 ; RV64IM-NEXT: subw a4, a6, a4
217 ; RV64IM-NEXT: sh a4, 6(a0)
218 ; RV64IM-NEXT: sh a3, 4(a0)
219 ; RV64IM-NEXT: sh a2, 2(a0)
220 ; RV64IM-NEXT: sh a1, 0(a0)
222 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
226 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
227 ; RV32I-LABEL: fold_srem_vec_2:
229 ; RV32I-NEXT: addi sp, sp, -32
230 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
231 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
232 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
233 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
234 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
235 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
236 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
237 ; RV32I-NEXT: lh s2, 12(a1)
238 ; RV32I-NEXT: lh s3, 8(a1)
239 ; RV32I-NEXT: lh s0, 4(a1)
240 ; RV32I-NEXT: lh a2, 0(a1)
241 ; RV32I-NEXT: mv s1, a0
242 ; RV32I-NEXT: addi a1, zero, 95
243 ; RV32I-NEXT: mv a0, a2
244 ; RV32I-NEXT: call __modsi3@plt
245 ; RV32I-NEXT: mv s4, a0
246 ; RV32I-NEXT: addi a1, zero, 95
247 ; RV32I-NEXT: mv a0, s0
248 ; RV32I-NEXT: call __modsi3@plt
249 ; RV32I-NEXT: mv s5, a0
250 ; RV32I-NEXT: addi a1, zero, 95
251 ; RV32I-NEXT: mv a0, s3
252 ; RV32I-NEXT: call __modsi3@plt
253 ; RV32I-NEXT: mv s0, a0
254 ; RV32I-NEXT: addi a1, zero, 95
255 ; RV32I-NEXT: mv a0, s2
256 ; RV32I-NEXT: call __modsi3@plt
257 ; RV32I-NEXT: sh a0, 6(s1)
258 ; RV32I-NEXT: sh s0, 4(s1)
259 ; RV32I-NEXT: sh s5, 2(s1)
260 ; RV32I-NEXT: sh s4, 0(s1)
261 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
262 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
263 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
264 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
265 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
266 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
267 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
268 ; RV32I-NEXT: addi sp, sp, 32
271 ; RV32IM-LABEL: fold_srem_vec_2:
273 ; RV32IM-NEXT: lh a6, 12(a1)
274 ; RV32IM-NEXT: lh a3, 8(a1)
275 ; RV32IM-NEXT: lh a4, 0(a1)
276 ; RV32IM-NEXT: lh a1, 4(a1)
277 ; RV32IM-NEXT: lui a5, 706409
278 ; RV32IM-NEXT: addi a5, a5, 389
279 ; RV32IM-NEXT: mulh a2, a4, a5
280 ; RV32IM-NEXT: add a2, a2, a4
281 ; RV32IM-NEXT: srli a7, a2, 31
282 ; RV32IM-NEXT: srli a2, a2, 6
283 ; RV32IM-NEXT: add a2, a2, a7
284 ; RV32IM-NEXT: addi a7, zero, 95
285 ; RV32IM-NEXT: mul a2, a2, a7
286 ; RV32IM-NEXT: sub t0, a4, a2
287 ; RV32IM-NEXT: mulh a4, a1, a5
288 ; RV32IM-NEXT: add a4, a4, a1
289 ; RV32IM-NEXT: srli a2, a4, 31
290 ; RV32IM-NEXT: srli a4, a4, 6
291 ; RV32IM-NEXT: add a2, a4, a2
292 ; RV32IM-NEXT: mul a2, a2, a7
293 ; RV32IM-NEXT: sub a1, a1, a2
294 ; RV32IM-NEXT: mulh a2, a3, a5
295 ; RV32IM-NEXT: add a2, a2, a3
296 ; RV32IM-NEXT: srli a4, a2, 31
297 ; RV32IM-NEXT: srli a2, a2, 6
298 ; RV32IM-NEXT: add a2, a2, a4
299 ; RV32IM-NEXT: mul a2, a2, a7
300 ; RV32IM-NEXT: sub a2, a3, a2
301 ; RV32IM-NEXT: mulh a3, a6, a5
302 ; RV32IM-NEXT: add a3, a3, a6
303 ; RV32IM-NEXT: srli a4, a3, 31
304 ; RV32IM-NEXT: srli a3, a3, 6
305 ; RV32IM-NEXT: add a3, a3, a4
306 ; RV32IM-NEXT: mul a3, a3, a7
307 ; RV32IM-NEXT: sub a3, a6, a3
308 ; RV32IM-NEXT: sh a3, 6(a0)
309 ; RV32IM-NEXT: sh a2, 4(a0)
310 ; RV32IM-NEXT: sh a1, 2(a0)
311 ; RV32IM-NEXT: sh t0, 0(a0)
314 ; RV64I-LABEL: fold_srem_vec_2:
316 ; RV64I-NEXT: addi sp, sp, -64
317 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
318 ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
319 ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
320 ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
321 ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
322 ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
323 ; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
324 ; RV64I-NEXT: lh s2, 24(a1)
325 ; RV64I-NEXT: lh s3, 16(a1)
326 ; RV64I-NEXT: lh s0, 8(a1)
327 ; RV64I-NEXT: lh a2, 0(a1)
328 ; RV64I-NEXT: mv s1, a0
329 ; RV64I-NEXT: addi a1, zero, 95
330 ; RV64I-NEXT: mv a0, a2
331 ; RV64I-NEXT: call __moddi3@plt
332 ; RV64I-NEXT: mv s4, a0
333 ; RV64I-NEXT: addi a1, zero, 95
334 ; RV64I-NEXT: mv a0, s0
335 ; RV64I-NEXT: call __moddi3@plt
336 ; RV64I-NEXT: mv s5, a0
337 ; RV64I-NEXT: addi a1, zero, 95
338 ; RV64I-NEXT: mv a0, s3
339 ; RV64I-NEXT: call __moddi3@plt
340 ; RV64I-NEXT: mv s0, a0
341 ; RV64I-NEXT: addi a1, zero, 95
342 ; RV64I-NEXT: mv a0, s2
343 ; RV64I-NEXT: call __moddi3@plt
344 ; RV64I-NEXT: sh a0, 6(s1)
345 ; RV64I-NEXT: sh s0, 4(s1)
346 ; RV64I-NEXT: sh s5, 2(s1)
347 ; RV64I-NEXT: sh s4, 0(s1)
348 ; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
349 ; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
350 ; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
351 ; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
352 ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
353 ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
354 ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
355 ; RV64I-NEXT: addi sp, sp, 64
358 ; RV64IM-LABEL: fold_srem_vec_2:
360 ; RV64IM-NEXT: lh a6, 24(a1)
361 ; RV64IM-NEXT: lh a7, 16(a1)
362 ; RV64IM-NEXT: lh a4, 8(a1)
363 ; RV64IM-NEXT: lh a1, 0(a1)
364 ; RV64IM-NEXT: lui a5, 1045903
365 ; RV64IM-NEXT: addiw a5, a5, -733
366 ; RV64IM-NEXT: slli a5, a5, 15
367 ; RV64IM-NEXT: addi a5, a5, 1035
368 ; RV64IM-NEXT: slli a5, a5, 12
369 ; RV64IM-NEXT: addi a5, a5, -905
370 ; RV64IM-NEXT: slli a5, a5, 12
371 ; RV64IM-NEXT: addi a5, a5, -1767
372 ; RV64IM-NEXT: mulh a2, a1, a5
373 ; RV64IM-NEXT: add a2, a2, a1
374 ; RV64IM-NEXT: srli a3, a2, 63
375 ; RV64IM-NEXT: srli a2, a2, 6
376 ; RV64IM-NEXT: addw a2, a2, a3
377 ; RV64IM-NEXT: addi a3, zero, 95
378 ; RV64IM-NEXT: mulw a2, a2, a3
379 ; RV64IM-NEXT: subw t0, a1, a2
380 ; RV64IM-NEXT: mulh a2, a4, a5
381 ; RV64IM-NEXT: add a2, a2, a4
382 ; RV64IM-NEXT: srli a1, a2, 63
383 ; RV64IM-NEXT: srli a2, a2, 6
384 ; RV64IM-NEXT: addw a1, a2, a1
385 ; RV64IM-NEXT: mulw a1, a1, a3
386 ; RV64IM-NEXT: subw a1, a4, a1
387 ; RV64IM-NEXT: mulh a2, a7, a5
388 ; RV64IM-NEXT: add a2, a2, a7
389 ; RV64IM-NEXT: srli a4, a2, 63
390 ; RV64IM-NEXT: srli a2, a2, 6
391 ; RV64IM-NEXT: addw a2, a2, a4
392 ; RV64IM-NEXT: mulw a2, a2, a3
393 ; RV64IM-NEXT: subw a2, a7, a2
394 ; RV64IM-NEXT: mulh a4, a6, a5
395 ; RV64IM-NEXT: add a4, a4, a6
396 ; RV64IM-NEXT: srli a5, a4, 63
397 ; RV64IM-NEXT: srli a4, a4, 6
398 ; RV64IM-NEXT: addw a4, a4, a5
399 ; RV64IM-NEXT: mulw a3, a4, a3
400 ; RV64IM-NEXT: subw a3, a6, a3
401 ; RV64IM-NEXT: sh a3, 6(a0)
402 ; RV64IM-NEXT: sh a2, 4(a0)
403 ; RV64IM-NEXT: sh a1, 2(a0)
404 ; RV64IM-NEXT: sh t0, 0(a0)
406 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
411 ; Don't fold if we can combine srem with sdiv.
412 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
413 ; RV32I-LABEL: combine_srem_sdiv:
415 ; RV32I-NEXT: addi sp, sp, -48
416 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
417 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
418 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
419 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
420 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
421 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
422 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
423 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
424 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
425 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
426 ; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
427 ; RV32I-NEXT: lh s2, 0(a1)
428 ; RV32I-NEXT: lh s3, 4(a1)
429 ; RV32I-NEXT: lh s4, 8(a1)
430 ; RV32I-NEXT: lh s1, 12(a1)
431 ; RV32I-NEXT: mv s0, a0
432 ; RV32I-NEXT: addi a1, zero, 95
433 ; RV32I-NEXT: mv a0, s1
434 ; RV32I-NEXT: call __modsi3@plt
435 ; RV32I-NEXT: mv s5, a0
436 ; RV32I-NEXT: addi a1, zero, 95
437 ; RV32I-NEXT: mv a0, s4
438 ; RV32I-NEXT: call __modsi3@plt
439 ; RV32I-NEXT: mv s6, a0
440 ; RV32I-NEXT: addi a1, zero, 95
441 ; RV32I-NEXT: mv a0, s3
442 ; RV32I-NEXT: call __modsi3@plt
443 ; RV32I-NEXT: mv s7, a0
444 ; RV32I-NEXT: addi a1, zero, 95
445 ; RV32I-NEXT: mv a0, s2
446 ; RV32I-NEXT: call __modsi3@plt
447 ; RV32I-NEXT: mv s8, a0
448 ; RV32I-NEXT: addi a1, zero, 95
449 ; RV32I-NEXT: mv a0, s1
450 ; RV32I-NEXT: call __divsi3@plt
451 ; RV32I-NEXT: mv s9, a0
452 ; RV32I-NEXT: addi a1, zero, 95
453 ; RV32I-NEXT: mv a0, s4
454 ; RV32I-NEXT: call __divsi3@plt
455 ; RV32I-NEXT: mv s4, a0
456 ; RV32I-NEXT: addi a1, zero, 95
457 ; RV32I-NEXT: mv a0, s3
458 ; RV32I-NEXT: call __divsi3@plt
459 ; RV32I-NEXT: mv s1, a0
460 ; RV32I-NEXT: addi a1, zero, 95
461 ; RV32I-NEXT: mv a0, s2
462 ; RV32I-NEXT: call __divsi3@plt
463 ; RV32I-NEXT: add a0, s8, a0
464 ; RV32I-NEXT: add a1, s7, s1
465 ; RV32I-NEXT: add a2, s6, s4
466 ; RV32I-NEXT: add a3, s5, s9
467 ; RV32I-NEXT: sh a3, 6(s0)
468 ; RV32I-NEXT: sh a2, 4(s0)
469 ; RV32I-NEXT: sh a1, 2(s0)
470 ; RV32I-NEXT: sh a0, 0(s0)
471 ; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
472 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
473 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
474 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
475 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
476 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
477 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
478 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
479 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
480 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
481 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
482 ; RV32I-NEXT: addi sp, sp, 48
485 ; RV32IM-LABEL: combine_srem_sdiv:
487 ; RV32IM-NEXT: lh a6, 0(a1)
488 ; RV32IM-NEXT: lh a3, 4(a1)
489 ; RV32IM-NEXT: lh a4, 12(a1)
490 ; RV32IM-NEXT: lh a1, 8(a1)
491 ; RV32IM-NEXT: lui a5, 706409
492 ; RV32IM-NEXT: addi a5, a5, 389
493 ; RV32IM-NEXT: mulh a2, a4, a5
494 ; RV32IM-NEXT: add a2, a2, a4
495 ; RV32IM-NEXT: srli a7, a2, 31
496 ; RV32IM-NEXT: srai a2, a2, 6
497 ; RV32IM-NEXT: add t0, a2, a7
498 ; RV32IM-NEXT: addi a7, zero, 95
499 ; RV32IM-NEXT: mul a2, t0, a7
500 ; RV32IM-NEXT: sub t1, a4, a2
501 ; RV32IM-NEXT: mulh a4, a1, a5
502 ; RV32IM-NEXT: add a4, a4, a1
503 ; RV32IM-NEXT: srli a2, a4, 31
504 ; RV32IM-NEXT: srai a4, a4, 6
505 ; RV32IM-NEXT: add a2, a4, a2
506 ; RV32IM-NEXT: mul a4, a2, a7
507 ; RV32IM-NEXT: sub t2, a1, a4
508 ; RV32IM-NEXT: mulh a4, a3, a5
509 ; RV32IM-NEXT: add a4, a4, a3
510 ; RV32IM-NEXT: srli a1, a4, 31
511 ; RV32IM-NEXT: srai a4, a4, 6
512 ; RV32IM-NEXT: add a1, a4, a1
513 ; RV32IM-NEXT: mul a4, a1, a7
514 ; RV32IM-NEXT: sub a3, a3, a4
515 ; RV32IM-NEXT: mulh a4, a6, a5
516 ; RV32IM-NEXT: add a4, a4, a6
517 ; RV32IM-NEXT: srli a5, a4, 31
518 ; RV32IM-NEXT: srai a4, a4, 6
519 ; RV32IM-NEXT: add a4, a4, a5
520 ; RV32IM-NEXT: mul a5, a4, a7
521 ; RV32IM-NEXT: sub a5, a6, a5
522 ; RV32IM-NEXT: add a4, a5, a4
523 ; RV32IM-NEXT: add a1, a3, a1
524 ; RV32IM-NEXT: add a2, t2, a2
525 ; RV32IM-NEXT: add a3, t1, t0
526 ; RV32IM-NEXT: sh a3, 6(a0)
527 ; RV32IM-NEXT: sh a2, 4(a0)
528 ; RV32IM-NEXT: sh a1, 2(a0)
529 ; RV32IM-NEXT: sh a4, 0(a0)
532 ; RV64I-LABEL: combine_srem_sdiv:
534 ; RV64I-NEXT: addi sp, sp, -96
535 ; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
536 ; RV64I-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
537 ; RV64I-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
538 ; RV64I-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
539 ; RV64I-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
540 ; RV64I-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
541 ; RV64I-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
542 ; RV64I-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
543 ; RV64I-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
544 ; RV64I-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
545 ; RV64I-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
546 ; RV64I-NEXT: lh s2, 0(a1)
547 ; RV64I-NEXT: lh s3, 8(a1)
548 ; RV64I-NEXT: lh s4, 16(a1)
549 ; RV64I-NEXT: lh s1, 24(a1)
550 ; RV64I-NEXT: mv s0, a0
551 ; RV64I-NEXT: addi a1, zero, 95
552 ; RV64I-NEXT: mv a0, s1
553 ; RV64I-NEXT: call __moddi3@plt
554 ; RV64I-NEXT: mv s5, a0
555 ; RV64I-NEXT: addi a1, zero, 95
556 ; RV64I-NEXT: mv a0, s4
557 ; RV64I-NEXT: call __moddi3@plt
558 ; RV64I-NEXT: mv s6, a0
559 ; RV64I-NEXT: addi a1, zero, 95
560 ; RV64I-NEXT: mv a0, s3
561 ; RV64I-NEXT: call __moddi3@plt
562 ; RV64I-NEXT: mv s7, a0
563 ; RV64I-NEXT: addi a1, zero, 95
564 ; RV64I-NEXT: mv a0, s2
565 ; RV64I-NEXT: call __moddi3@plt
566 ; RV64I-NEXT: mv s8, a0
567 ; RV64I-NEXT: addi a1, zero, 95
568 ; RV64I-NEXT: mv a0, s1
569 ; RV64I-NEXT: call __divdi3@plt
570 ; RV64I-NEXT: mv s9, a0
571 ; RV64I-NEXT: addi a1, zero, 95
572 ; RV64I-NEXT: mv a0, s4
573 ; RV64I-NEXT: call __divdi3@plt
574 ; RV64I-NEXT: mv s4, a0
575 ; RV64I-NEXT: addi a1, zero, 95
576 ; RV64I-NEXT: mv a0, s3
577 ; RV64I-NEXT: call __divdi3@plt
578 ; RV64I-NEXT: mv s1, a0
579 ; RV64I-NEXT: addi a1, zero, 95
580 ; RV64I-NEXT: mv a0, s2
581 ; RV64I-NEXT: call __divdi3@plt
582 ; RV64I-NEXT: addw a0, s8, a0
583 ; RV64I-NEXT: addw a1, s7, s1
584 ; RV64I-NEXT: addw a2, s6, s4
585 ; RV64I-NEXT: addw a3, s5, s9
586 ; RV64I-NEXT: sh a3, 6(s0)
587 ; RV64I-NEXT: sh a2, 4(s0)
588 ; RV64I-NEXT: sh a1, 2(s0)
589 ; RV64I-NEXT: sh a0, 0(s0)
590 ; RV64I-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
591 ; RV64I-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
592 ; RV64I-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
593 ; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
594 ; RV64I-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
595 ; RV64I-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
596 ; RV64I-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
597 ; RV64I-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
598 ; RV64I-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
599 ; RV64I-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
600 ; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
601 ; RV64I-NEXT: addi sp, sp, 96
604 ; RV64IM-LABEL: combine_srem_sdiv:
606 ; RV64IM-NEXT: lh a6, 0(a1)
607 ; RV64IM-NEXT: lh a7, 8(a1)
608 ; RV64IM-NEXT: lh a4, 16(a1)
609 ; RV64IM-NEXT: lh a1, 24(a1)
610 ; RV64IM-NEXT: lui a5, 1045903
611 ; RV64IM-NEXT: addiw a5, a5, -733
612 ; RV64IM-NEXT: slli a5, a5, 15
613 ; RV64IM-NEXT: addi a5, a5, 1035
614 ; RV64IM-NEXT: slli a5, a5, 12
615 ; RV64IM-NEXT: addi a5, a5, -905
616 ; RV64IM-NEXT: slli a5, a5, 12
617 ; RV64IM-NEXT: addi a5, a5, -1767
618 ; RV64IM-NEXT: mulh a2, a1, a5
619 ; RV64IM-NEXT: add a2, a2, a1
620 ; RV64IM-NEXT: srli a3, a2, 63
621 ; RV64IM-NEXT: srai a2, a2, 6
622 ; RV64IM-NEXT: addw t3, a2, a3
623 ; RV64IM-NEXT: addi t0, zero, 95
624 ; RV64IM-NEXT: mulw a3, t3, t0
625 ; RV64IM-NEXT: subw t1, a1, a3
626 ; RV64IM-NEXT: mulh a3, a4, a5
627 ; RV64IM-NEXT: add a3, a3, a4
628 ; RV64IM-NEXT: srli a1, a3, 63
629 ; RV64IM-NEXT: srai a3, a3, 6
630 ; RV64IM-NEXT: addw a1, a3, a1
631 ; RV64IM-NEXT: mulw a3, a1, t0
632 ; RV64IM-NEXT: subw t2, a4, a3
633 ; RV64IM-NEXT: mulh a4, a7, a5
634 ; RV64IM-NEXT: add a4, a4, a7
635 ; RV64IM-NEXT: srli a3, a4, 63
636 ; RV64IM-NEXT: srai a4, a4, 6
637 ; RV64IM-NEXT: addw a3, a4, a3
638 ; RV64IM-NEXT: mulw a4, a3, t0
639 ; RV64IM-NEXT: subw a4, a7, a4
640 ; RV64IM-NEXT: mulh a5, a6, a5
641 ; RV64IM-NEXT: add a5, a5, a6
642 ; RV64IM-NEXT: srli a2, a5, 63
643 ; RV64IM-NEXT: srai a5, a5, 6
644 ; RV64IM-NEXT: addw a2, a5, a2
645 ; RV64IM-NEXT: mulw a5, a2, t0
646 ; RV64IM-NEXT: subw a5, a6, a5
647 ; RV64IM-NEXT: addw a2, a5, a2
648 ; RV64IM-NEXT: addw a3, a4, a3
649 ; RV64IM-NEXT: addw a1, t2, a1
650 ; RV64IM-NEXT: addw a4, t1, t3
651 ; RV64IM-NEXT: sh a4, 6(a0)
652 ; RV64IM-NEXT: sh a1, 4(a0)
653 ; RV64IM-NEXT: sh a3, 2(a0)
654 ; RV64IM-NEXT: sh a2, 0(a0)
656 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
657 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
658 %3 = add <4 x i16> %1, %2
662 ; Don't fold for divisors that are a power of two.
663 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
664 ; RV32I-LABEL: dont_fold_srem_power_of_two:
666 ; RV32I-NEXT: addi sp, sp, -32
667 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
668 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
669 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
670 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
671 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
672 ; RV32I-NEXT: mv s0, a0
673 ; RV32I-NEXT: lh a2, 0(a1)
674 ; RV32I-NEXT: lh a0, 12(a1)
675 ; RV32I-NEXT: lh a3, 8(a1)
676 ; RV32I-NEXT: lh a1, 4(a1)
677 ; RV32I-NEXT: srli a4, a2, 26
678 ; RV32I-NEXT: add a4, a2, a4
679 ; RV32I-NEXT: andi a4, a4, -64
680 ; RV32I-NEXT: sub s2, a2, a4
681 ; RV32I-NEXT: srli a2, a1, 27
682 ; RV32I-NEXT: add a2, a1, a2
683 ; RV32I-NEXT: andi a2, a2, -32
684 ; RV32I-NEXT: sub s3, a1, a2
685 ; RV32I-NEXT: srli a1, a3, 29
686 ; RV32I-NEXT: add a1, a3, a1
687 ; RV32I-NEXT: andi a1, a1, -8
688 ; RV32I-NEXT: sub s1, a3, a1
689 ; RV32I-NEXT: addi a1, zero, 95
690 ; RV32I-NEXT: call __modsi3@plt
691 ; RV32I-NEXT: sh a0, 6(s0)
692 ; RV32I-NEXT: sh s1, 4(s0)
693 ; RV32I-NEXT: sh s3, 2(s0)
694 ; RV32I-NEXT: sh s2, 0(s0)
695 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
696 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
697 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
698 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
699 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
700 ; RV32I-NEXT: addi sp, sp, 32
703 ; RV32IM-LABEL: dont_fold_srem_power_of_two:
705 ; RV32IM-NEXT: lh a2, 8(a1)
706 ; RV32IM-NEXT: lh a3, 4(a1)
707 ; RV32IM-NEXT: lh a4, 12(a1)
708 ; RV32IM-NEXT: lh a1, 0(a1)
709 ; RV32IM-NEXT: lui a5, 706409
710 ; RV32IM-NEXT: addi a5, a5, 389
711 ; RV32IM-NEXT: mulh a5, a4, a5
712 ; RV32IM-NEXT: add a5, a5, a4
713 ; RV32IM-NEXT: srli a6, a5, 31
714 ; RV32IM-NEXT: srli a5, a5, 6
715 ; RV32IM-NEXT: add a6, a5, a6
716 ; RV32IM-NEXT: addi a5, zero, 95
717 ; RV32IM-NEXT: mul a5, a6, a5
718 ; RV32IM-NEXT: sub a4, a4, a5
719 ; RV32IM-NEXT: srli a5, a1, 26
720 ; RV32IM-NEXT: add a5, a1, a5
721 ; RV32IM-NEXT: andi a5, a5, -64
722 ; RV32IM-NEXT: sub a1, a1, a5
723 ; RV32IM-NEXT: srli a5, a3, 27
724 ; RV32IM-NEXT: add a5, a3, a5
725 ; RV32IM-NEXT: andi a5, a5, -32
726 ; RV32IM-NEXT: sub a3, a3, a5
727 ; RV32IM-NEXT: srli a5, a2, 29
728 ; RV32IM-NEXT: add a5, a2, a5
729 ; RV32IM-NEXT: andi a5, a5, -8
730 ; RV32IM-NEXT: sub a2, a2, a5
731 ; RV32IM-NEXT: sh a2, 4(a0)
732 ; RV32IM-NEXT: sh a3, 2(a0)
733 ; RV32IM-NEXT: sh a1, 0(a0)
734 ; RV32IM-NEXT: sh a4, 6(a0)
737 ; RV64I-LABEL: dont_fold_srem_power_of_two:
739 ; RV64I-NEXT: addi sp, sp, -48
740 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
741 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
742 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
743 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
744 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
745 ; RV64I-NEXT: mv s0, a0
746 ; RV64I-NEXT: lh a2, 0(a1)
747 ; RV64I-NEXT: lh a0, 24(a1)
748 ; RV64I-NEXT: lh a3, 16(a1)
749 ; RV64I-NEXT: lh a1, 8(a1)
750 ; RV64I-NEXT: srli a4, a2, 58
751 ; RV64I-NEXT: add a4, a2, a4
752 ; RV64I-NEXT: andi a4, a4, -64
753 ; RV64I-NEXT: subw s2, a2, a4
754 ; RV64I-NEXT: srli a2, a1, 59
755 ; RV64I-NEXT: add a2, a1, a2
756 ; RV64I-NEXT: andi a2, a2, -32
757 ; RV64I-NEXT: subw s3, a1, a2
758 ; RV64I-NEXT: srli a1, a3, 61
759 ; RV64I-NEXT: add a1, a3, a1
760 ; RV64I-NEXT: andi a1, a1, -8
761 ; RV64I-NEXT: subw s1, a3, a1
762 ; RV64I-NEXT: addi a1, zero, 95
763 ; RV64I-NEXT: call __moddi3@plt
764 ; RV64I-NEXT: sh a0, 6(s0)
765 ; RV64I-NEXT: sh s1, 4(s0)
766 ; RV64I-NEXT: sh s3, 2(s0)
767 ; RV64I-NEXT: sh s2, 0(s0)
768 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
769 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
770 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
771 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
772 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
773 ; RV64I-NEXT: addi sp, sp, 48
776 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
778 ; RV64IM-NEXT: lh a6, 16(a1)
779 ; RV64IM-NEXT: lh a3, 8(a1)
780 ; RV64IM-NEXT: lh a4, 0(a1)
781 ; RV64IM-NEXT: lh a1, 24(a1)
782 ; RV64IM-NEXT: lui a5, 1045903
783 ; RV64IM-NEXT: addiw a5, a5, -733
784 ; RV64IM-NEXT: slli a5, a5, 15
785 ; RV64IM-NEXT: addi a5, a5, 1035
786 ; RV64IM-NEXT: slli a5, a5, 12
787 ; RV64IM-NEXT: addi a5, a5, -905
788 ; RV64IM-NEXT: slli a5, a5, 12
789 ; RV64IM-NEXT: addi a5, a5, -1767
790 ; RV64IM-NEXT: mulh a5, a1, a5
791 ; RV64IM-NEXT: add a5, a5, a1
792 ; RV64IM-NEXT: srli a2, a5, 63
793 ; RV64IM-NEXT: srli a5, a5, 6
794 ; RV64IM-NEXT: addw a2, a5, a2
795 ; RV64IM-NEXT: addi a5, zero, 95
796 ; RV64IM-NEXT: mulw a2, a2, a5
797 ; RV64IM-NEXT: subw a1, a1, a2
798 ; RV64IM-NEXT: srli a2, a4, 58
799 ; RV64IM-NEXT: add a2, a4, a2
800 ; RV64IM-NEXT: andi a2, a2, -64
801 ; RV64IM-NEXT: subw a2, a4, a2
802 ; RV64IM-NEXT: srli a4, a3, 59
803 ; RV64IM-NEXT: add a4, a3, a4
804 ; RV64IM-NEXT: andi a4, a4, -32
805 ; RV64IM-NEXT: subw a3, a3, a4
806 ; RV64IM-NEXT: srli a4, a6, 61
807 ; RV64IM-NEXT: add a4, a6, a4
808 ; RV64IM-NEXT: andi a4, a4, -8
809 ; RV64IM-NEXT: subw a4, a6, a4
810 ; RV64IM-NEXT: sh a4, 4(a0)
811 ; RV64IM-NEXT: sh a3, 2(a0)
812 ; RV64IM-NEXT: sh a2, 0(a0)
813 ; RV64IM-NEXT: sh a1, 6(a0)
815 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
819 ; Don't fold if the divisor is one.
820 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
821 ; RV32I-LABEL: dont_fold_srem_one:
823 ; RV32I-NEXT: addi sp, sp, -32
824 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
825 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
826 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
827 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
828 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
829 ; RV32I-NEXT: lh s2, 12(a1)
830 ; RV32I-NEXT: lh s1, 8(a1)
831 ; RV32I-NEXT: lh a2, 4(a1)
832 ; RV32I-NEXT: mv s0, a0
833 ; RV32I-NEXT: addi a1, zero, 654
834 ; RV32I-NEXT: mv a0, a2
835 ; RV32I-NEXT: call __modsi3@plt
836 ; RV32I-NEXT: mv s3, a0
837 ; RV32I-NEXT: addi a1, zero, 23
838 ; RV32I-NEXT: mv a0, s1
839 ; RV32I-NEXT: call __modsi3@plt
840 ; RV32I-NEXT: mv s1, a0
841 ; RV32I-NEXT: lui a0, 1
842 ; RV32I-NEXT: addi a1, a0, 1327
843 ; RV32I-NEXT: mv a0, s2
844 ; RV32I-NEXT: call __modsi3@plt
845 ; RV32I-NEXT: sh zero, 0(s0)
846 ; RV32I-NEXT: sh a0, 6(s0)
847 ; RV32I-NEXT: sh s1, 4(s0)
848 ; RV32I-NEXT: sh s3, 2(s0)
849 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
850 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
851 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
852 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
853 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
854 ; RV32I-NEXT: addi sp, sp, 32
857 ; RV32IM-LABEL: dont_fold_srem_one:
859 ; RV32IM-NEXT: lh a2, 12(a1)
860 ; RV32IM-NEXT: lh a3, 4(a1)
861 ; RV32IM-NEXT: lh a1, 8(a1)
862 ; RV32IM-NEXT: lui a4, 820904
863 ; RV32IM-NEXT: addi a4, a4, -1903
864 ; RV32IM-NEXT: mulh a4, a3, a4
865 ; RV32IM-NEXT: add a4, a4, a3
866 ; RV32IM-NEXT: srli a5, a4, 31
867 ; RV32IM-NEXT: srli a4, a4, 9
868 ; RV32IM-NEXT: add a4, a4, a5
869 ; RV32IM-NEXT: addi a5, zero, 654
870 ; RV32IM-NEXT: mul a4, a4, a5
871 ; RV32IM-NEXT: sub a3, a3, a4
872 ; RV32IM-NEXT: lui a4, 729444
873 ; RV32IM-NEXT: addi a4, a4, 713
874 ; RV32IM-NEXT: mulh a4, a1, a4
875 ; RV32IM-NEXT: add a4, a4, a1
876 ; RV32IM-NEXT: srli a5, a4, 31
877 ; RV32IM-NEXT: srli a4, a4, 4
878 ; RV32IM-NEXT: add a4, a4, a5
879 ; RV32IM-NEXT: addi a5, zero, 23
880 ; RV32IM-NEXT: mul a4, a4, a5
881 ; RV32IM-NEXT: sub a1, a1, a4
882 ; RV32IM-NEXT: lui a4, 395996
883 ; RV32IM-NEXT: addi a4, a4, -2009
884 ; RV32IM-NEXT: mulh a4, a2, a4
885 ; RV32IM-NEXT: srli a5, a4, 31
886 ; RV32IM-NEXT: srli a4, a4, 11
887 ; RV32IM-NEXT: add a4, a4, a5
888 ; RV32IM-NEXT: lui a5, 1
889 ; RV32IM-NEXT: addi a5, a5, 1327
890 ; RV32IM-NEXT: mul a4, a4, a5
891 ; RV32IM-NEXT: sub a2, a2, a4
892 ; RV32IM-NEXT: sh zero, 0(a0)
893 ; RV32IM-NEXT: sh a2, 6(a0)
894 ; RV32IM-NEXT: sh a1, 4(a0)
895 ; RV32IM-NEXT: sh a3, 2(a0)
898 ; RV64I-LABEL: dont_fold_srem_one:
900 ; RV64I-NEXT: addi sp, sp, -48
901 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
902 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
903 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
904 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
905 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
906 ; RV64I-NEXT: lh s2, 24(a1)
907 ; RV64I-NEXT: lh s1, 16(a1)
908 ; RV64I-NEXT: lh a2, 8(a1)
909 ; RV64I-NEXT: mv s0, a0
910 ; RV64I-NEXT: addi a1, zero, 654
911 ; RV64I-NEXT: mv a0, a2
912 ; RV64I-NEXT: call __moddi3@plt
913 ; RV64I-NEXT: mv s3, a0
914 ; RV64I-NEXT: addi a1, zero, 23
915 ; RV64I-NEXT: mv a0, s1
916 ; RV64I-NEXT: call __moddi3@plt
917 ; RV64I-NEXT: mv s1, a0
918 ; RV64I-NEXT: lui a0, 1
919 ; RV64I-NEXT: addiw a1, a0, 1327
920 ; RV64I-NEXT: mv a0, s2
921 ; RV64I-NEXT: call __moddi3@plt
922 ; RV64I-NEXT: sh zero, 0(s0)
923 ; RV64I-NEXT: sh a0, 6(s0)
924 ; RV64I-NEXT: sh s1, 4(s0)
925 ; RV64I-NEXT: sh s3, 2(s0)
926 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
927 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
928 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
929 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
930 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
931 ; RV64I-NEXT: addi sp, sp, 48
934 ; RV64IM-LABEL: dont_fold_srem_one:
936 ; RV64IM-NEXT: lh a2, 24(a1)
937 ; RV64IM-NEXT: lh a3, 8(a1)
938 ; RV64IM-NEXT: lh a1, 16(a1)
939 ; RV64IM-NEXT: lui a4, 1043590
940 ; RV64IM-NEXT: addiw a4, a4, -1781
941 ; RV64IM-NEXT: slli a4, a4, 13
942 ; RV64IM-NEXT: addi a4, a4, 1069
943 ; RV64IM-NEXT: slli a4, a4, 12
944 ; RV64IM-NEXT: addi a4, a4, -1959
945 ; RV64IM-NEXT: slli a4, a4, 13
946 ; RV64IM-NEXT: addi a4, a4, 357
947 ; RV64IM-NEXT: mulh a4, a1, a4
948 ; RV64IM-NEXT: add a4, a4, a1
949 ; RV64IM-NEXT: srli a5, a4, 63
950 ; RV64IM-NEXT: srli a4, a4, 4
951 ; RV64IM-NEXT: addw a4, a4, a5
952 ; RV64IM-NEXT: addi a5, zero, 23
953 ; RV64IM-NEXT: mulw a4, a4, a5
954 ; RV64IM-NEXT: subw a1, a1, a4
955 ; RV64IM-NEXT: lui a4, 6413
956 ; RV64IM-NEXT: addiw a4, a4, 1265
957 ; RV64IM-NEXT: slli a4, a4, 13
958 ; RV64IM-NEXT: addi a4, a4, 1027
959 ; RV64IM-NEXT: slli a4, a4, 13
960 ; RV64IM-NEXT: addi a4, a4, 1077
961 ; RV64IM-NEXT: slli a4, a4, 12
962 ; RV64IM-NEXT: addi a4, a4, 965
963 ; RV64IM-NEXT: mulh a4, a3, a4
964 ; RV64IM-NEXT: srli a5, a4, 63
965 ; RV64IM-NEXT: srli a4, a4, 8
966 ; RV64IM-NEXT: addw a4, a4, a5
967 ; RV64IM-NEXT: addi a5, zero, 654
968 ; RV64IM-NEXT: mulw a4, a4, a5
969 ; RV64IM-NEXT: subw a3, a3, a4
970 ; RV64IM-NEXT: lui a4, 12375
971 ; RV64IM-NEXT: addiw a4, a4, -575
972 ; RV64IM-NEXT: slli a4, a4, 12
973 ; RV64IM-NEXT: addi a4, a4, 883
974 ; RV64IM-NEXT: slli a4, a4, 13
975 ; RV64IM-NEXT: addi a4, a4, -431
976 ; RV64IM-NEXT: slli a4, a4, 12
977 ; RV64IM-NEXT: addi a4, a4, 1959
978 ; RV64IM-NEXT: mulh a4, a2, a4
979 ; RV64IM-NEXT: srli a5, a4, 63
980 ; RV64IM-NEXT: srli a4, a4, 11
981 ; RV64IM-NEXT: addw a4, a4, a5
982 ; RV64IM-NEXT: lui a5, 1
983 ; RV64IM-NEXT: addiw a5, a5, 1327
984 ; RV64IM-NEXT: mulw a4, a4, a5
985 ; RV64IM-NEXT: subw a2, a2, a4
986 ; RV64IM-NEXT: sh zero, 0(a0)
987 ; RV64IM-NEXT: sh a2, 6(a0)
988 ; RV64IM-NEXT: sh a3, 2(a0)
989 ; RV64IM-NEXT: sh a1, 4(a0)
991 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
995 ; Don't fold if the divisor is 2^15.
996 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
997 ; RV32I-LABEL: dont_fold_urem_i16_smax:
999 ; RV32I-NEXT: addi sp, sp, -32
1000 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1001 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1002 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1003 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1004 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1005 ; RV32I-NEXT: lh a2, 4(a1)
1006 ; RV32I-NEXT: mv s0, a0
1007 ; RV32I-NEXT: lh s2, 12(a1)
1008 ; RV32I-NEXT: lh a0, 8(a1)
1009 ; RV32I-NEXT: srli a1, a2, 17
1010 ; RV32I-NEXT: add a1, a2, a1
1011 ; RV32I-NEXT: lui a3, 8
1012 ; RV32I-NEXT: and a1, a1, a3
1013 ; RV32I-NEXT: sub s3, a2, a1
1014 ; RV32I-NEXT: addi a1, zero, 23
1015 ; RV32I-NEXT: call __modsi3@plt
1016 ; RV32I-NEXT: mv s1, a0
1017 ; RV32I-NEXT: lui a0, 1
1018 ; RV32I-NEXT: addi a1, a0, 1327
1019 ; RV32I-NEXT: mv a0, s2
1020 ; RV32I-NEXT: call __modsi3@plt
1021 ; RV32I-NEXT: sh zero, 0(s0)
1022 ; RV32I-NEXT: sh a0, 6(s0)
1023 ; RV32I-NEXT: sh s1, 4(s0)
1024 ; RV32I-NEXT: sh s3, 2(s0)
1025 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1026 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1027 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1028 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1029 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1030 ; RV32I-NEXT: addi sp, sp, 32
1033 ; RV32IM-LABEL: dont_fold_urem_i16_smax:
1035 ; RV32IM-NEXT: lh a2, 4(a1)
1036 ; RV32IM-NEXT: lh a3, 8(a1)
1037 ; RV32IM-NEXT: lh a1, 12(a1)
1038 ; RV32IM-NEXT: lui a4, 729444
1039 ; RV32IM-NEXT: addi a4, a4, 713
1040 ; RV32IM-NEXT: mulh a4, a3, a4
1041 ; RV32IM-NEXT: add a4, a4, a3
1042 ; RV32IM-NEXT: srli a5, a4, 31
1043 ; RV32IM-NEXT: srli a4, a4, 4
1044 ; RV32IM-NEXT: add a4, a4, a5
1045 ; RV32IM-NEXT: addi a5, zero, 23
1046 ; RV32IM-NEXT: mul a4, a4, a5
1047 ; RV32IM-NEXT: sub a3, a3, a4
1048 ; RV32IM-NEXT: lui a4, 395996
1049 ; RV32IM-NEXT: addi a4, a4, -2009
1050 ; RV32IM-NEXT: mulh a4, a1, a4
1051 ; RV32IM-NEXT: srli a5, a4, 31
1052 ; RV32IM-NEXT: srli a4, a4, 11
1053 ; RV32IM-NEXT: add a4, a4, a5
1054 ; RV32IM-NEXT: lui a5, 1
1055 ; RV32IM-NEXT: addi a5, a5, 1327
1056 ; RV32IM-NEXT: mul a4, a4, a5
1057 ; RV32IM-NEXT: sub a1, a1, a4
1058 ; RV32IM-NEXT: srli a4, a2, 17
1059 ; RV32IM-NEXT: add a4, a2, a4
1060 ; RV32IM-NEXT: lui a5, 8
1061 ; RV32IM-NEXT: and a4, a4, a5
1062 ; RV32IM-NEXT: sub a2, a2, a4
1063 ; RV32IM-NEXT: sh zero, 0(a0)
1064 ; RV32IM-NEXT: sh a2, 2(a0)
1065 ; RV32IM-NEXT: sh a1, 6(a0)
1066 ; RV32IM-NEXT: sh a3, 4(a0)
1069 ; RV64I-LABEL: dont_fold_urem_i16_smax:
1071 ; RV64I-NEXT: addi sp, sp, -48
1072 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1073 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1074 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1075 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1076 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1077 ; RV64I-NEXT: lh a2, 8(a1)
1078 ; RV64I-NEXT: mv s0, a0
1079 ; RV64I-NEXT: lh s2, 24(a1)
1080 ; RV64I-NEXT: lh a0, 16(a1)
1081 ; RV64I-NEXT: srli a1, a2, 49
1082 ; RV64I-NEXT: add a1, a2, a1
1083 ; RV64I-NEXT: lui a3, 8
1084 ; RV64I-NEXT: and a1, a1, a3
1085 ; RV64I-NEXT: subw s3, a2, a1
1086 ; RV64I-NEXT: addi a1, zero, 23
1087 ; RV64I-NEXT: call __moddi3@plt
1088 ; RV64I-NEXT: mv s1, a0
1089 ; RV64I-NEXT: lui a0, 1
1090 ; RV64I-NEXT: addiw a1, a0, 1327
1091 ; RV64I-NEXT: mv a0, s2
1092 ; RV64I-NEXT: call __moddi3@plt
1093 ; RV64I-NEXT: sh zero, 0(s0)
1094 ; RV64I-NEXT: sh a0, 6(s0)
1095 ; RV64I-NEXT: sh s1, 4(s0)
1096 ; RV64I-NEXT: sh s3, 2(s0)
1097 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1098 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1099 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1100 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1101 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1102 ; RV64I-NEXT: addi sp, sp, 48
1105 ; RV64IM-LABEL: dont_fold_urem_i16_smax:
1107 ; RV64IM-NEXT: lh a2, 8(a1)
1108 ; RV64IM-NEXT: lh a3, 24(a1)
1109 ; RV64IM-NEXT: lh a1, 16(a1)
1110 ; RV64IM-NEXT: lui a4, 1043590
1111 ; RV64IM-NEXT: addiw a4, a4, -1781
1112 ; RV64IM-NEXT: slli a4, a4, 13
1113 ; RV64IM-NEXT: addi a4, a4, 1069
1114 ; RV64IM-NEXT: slli a4, a4, 12
1115 ; RV64IM-NEXT: addi a4, a4, -1959
1116 ; RV64IM-NEXT: slli a4, a4, 13
1117 ; RV64IM-NEXT: addi a4, a4, 357
1118 ; RV64IM-NEXT: mulh a4, a1, a4
1119 ; RV64IM-NEXT: add a4, a4, a1
1120 ; RV64IM-NEXT: srli a5, a4, 63
1121 ; RV64IM-NEXT: srli a4, a4, 4
1122 ; RV64IM-NEXT: addw a4, a4, a5
1123 ; RV64IM-NEXT: addi a5, zero, 23
1124 ; RV64IM-NEXT: mulw a4, a4, a5
1125 ; RV64IM-NEXT: subw a1, a1, a4
1126 ; RV64IM-NEXT: lui a4, 12375
1127 ; RV64IM-NEXT: addiw a4, a4, -575
1128 ; RV64IM-NEXT: slli a4, a4, 12
1129 ; RV64IM-NEXT: addi a4, a4, 883
1130 ; RV64IM-NEXT: slli a4, a4, 13
1131 ; RV64IM-NEXT: addi a4, a4, -431
1132 ; RV64IM-NEXT: slli a4, a4, 12
1133 ; RV64IM-NEXT: addi a4, a4, 1959
1134 ; RV64IM-NEXT: mulh a4, a3, a4
1135 ; RV64IM-NEXT: srli a5, a4, 63
1136 ; RV64IM-NEXT: srli a4, a4, 11
1137 ; RV64IM-NEXT: addw a4, a4, a5
1138 ; RV64IM-NEXT: lui a5, 1
1139 ; RV64IM-NEXT: addiw a5, a5, 1327
1140 ; RV64IM-NEXT: mulw a4, a4, a5
1141 ; RV64IM-NEXT: subw a3, a3, a4
1142 ; RV64IM-NEXT: srli a4, a2, 49
1143 ; RV64IM-NEXT: add a4, a2, a4
1144 ; RV64IM-NEXT: lui a5, 8
1145 ; RV64IM-NEXT: and a4, a4, a5
1146 ; RV64IM-NEXT: subw a2, a2, a4
1147 ; RV64IM-NEXT: sh zero, 0(a0)
1148 ; RV64IM-NEXT: sh a2, 2(a0)
1149 ; RV64IM-NEXT: sh a3, 6(a0)
1150 ; RV64IM-NEXT: sh a1, 4(a0)
1152 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1156 ; Don't fold i64 srem.
1157 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
1158 ; RV32I-LABEL: dont_fold_srem_i64:
1160 ; RV32I-NEXT: addi sp, sp, -48
1161 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
1162 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
1163 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
1164 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
1165 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
1166 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
1167 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
1168 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
1169 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
1170 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
1171 ; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
1172 ; RV32I-NEXT: lw s2, 24(a1)
1173 ; RV32I-NEXT: lw s3, 28(a1)
1174 ; RV32I-NEXT: lw s4, 16(a1)
1175 ; RV32I-NEXT: lw s5, 20(a1)
1176 ; RV32I-NEXT: lw s6, 8(a1)
1177 ; RV32I-NEXT: lw s1, 12(a1)
1178 ; RV32I-NEXT: lw a3, 0(a1)
1179 ; RV32I-NEXT: lw a1, 4(a1)
1180 ; RV32I-NEXT: mv s0, a0
1181 ; RV32I-NEXT: addi a2, zero, 1
1182 ; RV32I-NEXT: mv a0, a3
1183 ; RV32I-NEXT: mv a3, zero
1184 ; RV32I-NEXT: call __moddi3@plt
1185 ; RV32I-NEXT: mv s7, a0
1186 ; RV32I-NEXT: mv s8, a1
1187 ; RV32I-NEXT: addi a2, zero, 654
1188 ; RV32I-NEXT: mv a0, s6
1189 ; RV32I-NEXT: mv a1, s1
1190 ; RV32I-NEXT: mv a3, zero
1191 ; RV32I-NEXT: call __moddi3@plt
1192 ; RV32I-NEXT: mv s6, a0
1193 ; RV32I-NEXT: mv s9, a1
1194 ; RV32I-NEXT: addi a2, zero, 23
1195 ; RV32I-NEXT: mv a0, s4
1196 ; RV32I-NEXT: mv a1, s5
1197 ; RV32I-NEXT: mv a3, zero
1198 ; RV32I-NEXT: call __moddi3@plt
1199 ; RV32I-NEXT: mv s4, a0
1200 ; RV32I-NEXT: mv s1, a1
1201 ; RV32I-NEXT: lui a0, 1
1202 ; RV32I-NEXT: addi a2, a0, 1327
1203 ; RV32I-NEXT: mv a0, s2
1204 ; RV32I-NEXT: mv a1, s3
1205 ; RV32I-NEXT: mv a3, zero
1206 ; RV32I-NEXT: call __moddi3@plt
1207 ; RV32I-NEXT: sw a1, 28(s0)
1208 ; RV32I-NEXT: sw a0, 24(s0)
1209 ; RV32I-NEXT: sw s1, 20(s0)
1210 ; RV32I-NEXT: sw s4, 16(s0)
1211 ; RV32I-NEXT: sw s9, 12(s0)
1212 ; RV32I-NEXT: sw s6, 8(s0)
1213 ; RV32I-NEXT: sw s8, 4(s0)
1214 ; RV32I-NEXT: sw s7, 0(s0)
1215 ; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
1216 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
1217 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
1218 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
1219 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
1220 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
1221 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
1222 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
1223 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
1224 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
1225 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
1226 ; RV32I-NEXT: addi sp, sp, 48
1229 ; RV32IM-LABEL: dont_fold_srem_i64:
1231 ; RV32IM-NEXT: addi sp, sp, -48
1232 ; RV32IM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
1233 ; RV32IM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
1234 ; RV32IM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
1235 ; RV32IM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
1236 ; RV32IM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
1237 ; RV32IM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
1238 ; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
1239 ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
1240 ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
1241 ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
1242 ; RV32IM-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
1243 ; RV32IM-NEXT: lw s2, 24(a1)
1244 ; RV32IM-NEXT: lw s3, 28(a1)
1245 ; RV32IM-NEXT: lw s4, 16(a1)
1246 ; RV32IM-NEXT: lw s5, 20(a1)
1247 ; RV32IM-NEXT: lw s6, 8(a1)
1248 ; RV32IM-NEXT: lw s1, 12(a1)
1249 ; RV32IM-NEXT: lw a3, 0(a1)
1250 ; RV32IM-NEXT: lw a1, 4(a1)
1251 ; RV32IM-NEXT: mv s0, a0
1252 ; RV32IM-NEXT: addi a2, zero, 1
1253 ; RV32IM-NEXT: mv a0, a3
1254 ; RV32IM-NEXT: mv a3, zero
1255 ; RV32IM-NEXT: call __moddi3@plt
1256 ; RV32IM-NEXT: mv s7, a0
1257 ; RV32IM-NEXT: mv s8, a1
1258 ; RV32IM-NEXT: addi a2, zero, 654
1259 ; RV32IM-NEXT: mv a0, s6
1260 ; RV32IM-NEXT: mv a1, s1
1261 ; RV32IM-NEXT: mv a3, zero
1262 ; RV32IM-NEXT: call __moddi3@plt
1263 ; RV32IM-NEXT: mv s6, a0
1264 ; RV32IM-NEXT: mv s9, a1
1265 ; RV32IM-NEXT: addi a2, zero, 23
1266 ; RV32IM-NEXT: mv a0, s4
1267 ; RV32IM-NEXT: mv a1, s5
1268 ; RV32IM-NEXT: mv a3, zero
1269 ; RV32IM-NEXT: call __moddi3@plt
1270 ; RV32IM-NEXT: mv s4, a0
1271 ; RV32IM-NEXT: mv s1, a1
1272 ; RV32IM-NEXT: lui a0, 1
1273 ; RV32IM-NEXT: addi a2, a0, 1327
1274 ; RV32IM-NEXT: mv a0, s2
1275 ; RV32IM-NEXT: mv a1, s3
1276 ; RV32IM-NEXT: mv a3, zero
1277 ; RV32IM-NEXT: call __moddi3@plt
1278 ; RV32IM-NEXT: sw a1, 28(s0)
1279 ; RV32IM-NEXT: sw a0, 24(s0)
1280 ; RV32IM-NEXT: sw s1, 20(s0)
1281 ; RV32IM-NEXT: sw s4, 16(s0)
1282 ; RV32IM-NEXT: sw s9, 12(s0)
1283 ; RV32IM-NEXT: sw s6, 8(s0)
1284 ; RV32IM-NEXT: sw s8, 4(s0)
1285 ; RV32IM-NEXT: sw s7, 0(s0)
1286 ; RV32IM-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
1287 ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
1288 ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
1289 ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
1290 ; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
1291 ; RV32IM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
1292 ; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
1293 ; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
1294 ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
1295 ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
1296 ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
1297 ; RV32IM-NEXT: addi sp, sp, 48
1300 ; RV64I-LABEL: dont_fold_srem_i64:
1302 ; RV64I-NEXT: addi sp, sp, -48
1303 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1304 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1305 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1306 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1307 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1308 ; RV64I-NEXT: ld s2, 24(a1)
1309 ; RV64I-NEXT: ld s1, 16(a1)
1310 ; RV64I-NEXT: ld a2, 8(a1)
1311 ; RV64I-NEXT: mv s0, a0
1312 ; RV64I-NEXT: addi a1, zero, 654
1313 ; RV64I-NEXT: mv a0, a2
1314 ; RV64I-NEXT: call __moddi3@plt
1315 ; RV64I-NEXT: mv s3, a0
1316 ; RV64I-NEXT: addi a1, zero, 23
1317 ; RV64I-NEXT: mv a0, s1
1318 ; RV64I-NEXT: call __moddi3@plt
1319 ; RV64I-NEXT: mv s1, a0
1320 ; RV64I-NEXT: lui a0, 1
1321 ; RV64I-NEXT: addiw a1, a0, 1327
1322 ; RV64I-NEXT: mv a0, s2
1323 ; RV64I-NEXT: call __moddi3@plt
1324 ; RV64I-NEXT: sd zero, 0(s0)
1325 ; RV64I-NEXT: sd a0, 24(s0)
1326 ; RV64I-NEXT: sd s1, 16(s0)
1327 ; RV64I-NEXT: sd s3, 8(s0)
1328 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1329 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1330 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1331 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1332 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1333 ; RV64I-NEXT: addi sp, sp, 48
1336 ; RV64IM-LABEL: dont_fold_srem_i64:
1338 ; RV64IM-NEXT: ld a2, 24(a1)
1339 ; RV64IM-NEXT: ld a3, 8(a1)
1340 ; RV64IM-NEXT: ld a1, 16(a1)
1341 ; RV64IM-NEXT: lui a4, 1043590
1342 ; RV64IM-NEXT: addiw a4, a4, -1781
1343 ; RV64IM-NEXT: slli a4, a4, 13
1344 ; RV64IM-NEXT: addi a4, a4, 1069
1345 ; RV64IM-NEXT: slli a4, a4, 12
1346 ; RV64IM-NEXT: addi a4, a4, -1959
1347 ; RV64IM-NEXT: slli a4, a4, 13
1348 ; RV64IM-NEXT: addi a4, a4, 357
1349 ; RV64IM-NEXT: mulh a4, a1, a4
1350 ; RV64IM-NEXT: add a4, a4, a1
1351 ; RV64IM-NEXT: srli a5, a4, 63
1352 ; RV64IM-NEXT: srai a4, a4, 4
1353 ; RV64IM-NEXT: add a4, a4, a5
1354 ; RV64IM-NEXT: addi a5, zero, 23
1355 ; RV64IM-NEXT: mul a4, a4, a5
1356 ; RV64IM-NEXT: sub a1, a1, a4
1357 ; RV64IM-NEXT: lui a4, 6413
1358 ; RV64IM-NEXT: addiw a4, a4, 1265
1359 ; RV64IM-NEXT: slli a4, a4, 13
1360 ; RV64IM-NEXT: addi a4, a4, 1027
1361 ; RV64IM-NEXT: slli a4, a4, 13
1362 ; RV64IM-NEXT: addi a4, a4, 1077
1363 ; RV64IM-NEXT: slli a4, a4, 12
1364 ; RV64IM-NEXT: addi a4, a4, 965
1365 ; RV64IM-NEXT: mulh a4, a3, a4
1366 ; RV64IM-NEXT: srli a5, a4, 63
1367 ; RV64IM-NEXT: srai a4, a4, 8
1368 ; RV64IM-NEXT: add a4, a4, a5
1369 ; RV64IM-NEXT: addi a5, zero, 654
1370 ; RV64IM-NEXT: mul a4, a4, a5
1371 ; RV64IM-NEXT: sub a3, a3, a4
1372 ; RV64IM-NEXT: lui a4, 12375
1373 ; RV64IM-NEXT: addiw a4, a4, -575
1374 ; RV64IM-NEXT: slli a4, a4, 12
1375 ; RV64IM-NEXT: addi a4, a4, 883
1376 ; RV64IM-NEXT: slli a4, a4, 13
1377 ; RV64IM-NEXT: addi a4, a4, -431
1378 ; RV64IM-NEXT: slli a4, a4, 12
1379 ; RV64IM-NEXT: addi a4, a4, 1959
1380 ; RV64IM-NEXT: mulh a4, a2, a4
1381 ; RV64IM-NEXT: srli a5, a4, 63
1382 ; RV64IM-NEXT: srai a4, a4, 11
1383 ; RV64IM-NEXT: add a4, a4, a5
1384 ; RV64IM-NEXT: lui a5, 1
1385 ; RV64IM-NEXT: addiw a5, a5, 1327
1386 ; RV64IM-NEXT: mul a4, a4, a5
1387 ; RV64IM-NEXT: sub a2, a2, a4
1388 ; RV64IM-NEXT: sd zero, 0(a0)
1389 ; RV64IM-NEXT: sd a2, 24(a0)
1390 ; RV64IM-NEXT: sd a3, 8(a0)
1391 ; RV64IM-NEXT: sd a1, 16(a0)
1393 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>