1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32
3 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M
5 ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV
9 define i1 @test_urem_odd(i13 %X) nounwind {
10 ; RV32-LABEL: test_urem_odd:
12 ; RV32-NEXT: addi sp, sp, -16
13 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
14 ; RV32-NEXT: lui a1, 1
15 ; RV32-NEXT: addi a1, a1, -819
16 ; RV32-NEXT: call __mulsi3@plt
17 ; RV32-NEXT: slli a0, a0, 19
18 ; RV32-NEXT: srli a0, a0, 19
19 ; RV32-NEXT: sltiu a0, a0, 1639
20 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
21 ; RV32-NEXT: addi sp, sp, 16
24 ; RV64-LABEL: test_urem_odd:
26 ; RV64-NEXT: addi sp, sp, -16
27 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
28 ; RV64-NEXT: lui a1, 1
29 ; RV64-NEXT: addiw a1, a1, -819
30 ; RV64-NEXT: call __muldi3@plt
31 ; RV64-NEXT: slli a0, a0, 51
32 ; RV64-NEXT: srli a0, a0, 51
33 ; RV64-NEXT: sltiu a0, a0, 1639
34 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
35 ; RV64-NEXT: addi sp, sp, 16
38 ; RV32M-LABEL: test_urem_odd:
40 ; RV32M-NEXT: lui a1, 1
41 ; RV32M-NEXT: addi a1, a1, -819
42 ; RV32M-NEXT: mul a0, a0, a1
43 ; RV32M-NEXT: slli a0, a0, 19
44 ; RV32M-NEXT: srli a0, a0, 19
45 ; RV32M-NEXT: sltiu a0, a0, 1639
48 ; RV64M-LABEL: test_urem_odd:
50 ; RV64M-NEXT: lui a1, 1
51 ; RV64M-NEXT: addiw a1, a1, -819
52 ; RV64M-NEXT: mulw a0, a0, a1
53 ; RV64M-NEXT: slli a0, a0, 51
54 ; RV64M-NEXT: srli a0, a0, 51
55 ; RV64M-NEXT: sltiu a0, a0, 1639
58 ; RV32MV-LABEL: test_urem_odd:
60 ; RV32MV-NEXT: lui a1, 1
61 ; RV32MV-NEXT: addi a1, a1, -819
62 ; RV32MV-NEXT: mul a0, a0, a1
63 ; RV32MV-NEXT: slli a0, a0, 19
64 ; RV32MV-NEXT: srli a0, a0, 19
65 ; RV32MV-NEXT: sltiu a0, a0, 1639
68 ; RV64MV-LABEL: test_urem_odd:
70 ; RV64MV-NEXT: lui a1, 1
71 ; RV64MV-NEXT: addiw a1, a1, -819
72 ; RV64MV-NEXT: mulw a0, a0, a1
73 ; RV64MV-NEXT: slli a0, a0, 51
74 ; RV64MV-NEXT: srli a0, a0, 51
75 ; RV64MV-NEXT: sltiu a0, a0, 1639
77 %urem = urem i13 %X, 5
78 %cmp = icmp eq i13 %urem, 0
82 define i1 @test_urem_even(i27 %X) nounwind {
83 ; RV32-LABEL: test_urem_even:
85 ; RV32-NEXT: addi sp, sp, -16
86 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
87 ; RV32-NEXT: lui a1, 28087
88 ; RV32-NEXT: addi a1, a1, -585
89 ; RV32-NEXT: call __mulsi3@plt
90 ; RV32-NEXT: slli a1, a0, 26
91 ; RV32-NEXT: slli a0, a0, 5
92 ; RV32-NEXT: srli a0, a0, 6
93 ; RV32-NEXT: or a0, a0, a1
94 ; RV32-NEXT: slli a0, a0, 5
95 ; RV32-NEXT: srli a0, a0, 5
96 ; RV32-NEXT: lui a1, 2341
97 ; RV32-NEXT: addi a1, a1, -1755
98 ; RV32-NEXT: sltu a0, a0, a1
99 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
100 ; RV32-NEXT: addi sp, sp, 16
103 ; RV64-LABEL: test_urem_even:
105 ; RV64-NEXT: addi sp, sp, -16
106 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
107 ; RV64-NEXT: lui a1, 28087
108 ; RV64-NEXT: addiw a1, a1, -585
109 ; RV64-NEXT: call __muldi3@plt
110 ; RV64-NEXT: slli a1, a0, 26
111 ; RV64-NEXT: slli a0, a0, 37
112 ; RV64-NEXT: srli a0, a0, 38
113 ; RV64-NEXT: or a0, a0, a1
114 ; RV64-NEXT: slli a0, a0, 37
115 ; RV64-NEXT: srli a0, a0, 37
116 ; RV64-NEXT: lui a1, 2341
117 ; RV64-NEXT: addiw a1, a1, -1755
118 ; RV64-NEXT: sltu a0, a0, a1
119 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
120 ; RV64-NEXT: addi sp, sp, 16
123 ; RV32M-LABEL: test_urem_even:
125 ; RV32M-NEXT: lui a1, 28087
126 ; RV32M-NEXT: addi a1, a1, -585
127 ; RV32M-NEXT: mul a0, a0, a1
128 ; RV32M-NEXT: slli a1, a0, 26
129 ; RV32M-NEXT: slli a0, a0, 5
130 ; RV32M-NEXT: srli a0, a0, 6
131 ; RV32M-NEXT: or a0, a0, a1
132 ; RV32M-NEXT: slli a0, a0, 5
133 ; RV32M-NEXT: srli a0, a0, 5
134 ; RV32M-NEXT: lui a1, 2341
135 ; RV32M-NEXT: addi a1, a1, -1755
136 ; RV32M-NEXT: sltu a0, a0, a1
139 ; RV64M-LABEL: test_urem_even:
141 ; RV64M-NEXT: lui a1, 28087
142 ; RV64M-NEXT: addiw a1, a1, -585
143 ; RV64M-NEXT: mul a0, a0, a1
144 ; RV64M-NEXT: slli a1, a0, 26
145 ; RV64M-NEXT: slli a0, a0, 37
146 ; RV64M-NEXT: srli a0, a0, 38
147 ; RV64M-NEXT: or a0, a0, a1
148 ; RV64M-NEXT: slli a0, a0, 37
149 ; RV64M-NEXT: srli a0, a0, 37
150 ; RV64M-NEXT: lui a1, 2341
151 ; RV64M-NEXT: addiw a1, a1, -1755
152 ; RV64M-NEXT: sltu a0, a0, a1
155 ; RV32MV-LABEL: test_urem_even:
157 ; RV32MV-NEXT: lui a1, 28087
158 ; RV32MV-NEXT: addi a1, a1, -585
159 ; RV32MV-NEXT: mul a0, a0, a1
160 ; RV32MV-NEXT: slli a1, a0, 26
161 ; RV32MV-NEXT: slli a0, a0, 5
162 ; RV32MV-NEXT: srli a0, a0, 6
163 ; RV32MV-NEXT: or a0, a0, a1
164 ; RV32MV-NEXT: slli a0, a0, 5
165 ; RV32MV-NEXT: srli a0, a0, 5
166 ; RV32MV-NEXT: lui a1, 2341
167 ; RV32MV-NEXT: addi a1, a1, -1755
168 ; RV32MV-NEXT: sltu a0, a0, a1
171 ; RV64MV-LABEL: test_urem_even:
173 ; RV64MV-NEXT: lui a1, 28087
174 ; RV64MV-NEXT: addiw a1, a1, -585
175 ; RV64MV-NEXT: mul a0, a0, a1
176 ; RV64MV-NEXT: slli a1, a0, 26
177 ; RV64MV-NEXT: slli a0, a0, 37
178 ; RV64MV-NEXT: srli a0, a0, 38
179 ; RV64MV-NEXT: or a0, a0, a1
180 ; RV64MV-NEXT: slli a0, a0, 37
181 ; RV64MV-NEXT: srli a0, a0, 37
182 ; RV64MV-NEXT: lui a1, 2341
183 ; RV64MV-NEXT: addiw a1, a1, -1755
184 ; RV64MV-NEXT: sltu a0, a0, a1
186 %urem = urem i27 %X, 14
187 %cmp = icmp eq i27 %urem, 0
191 define i1 @test_urem_odd_setne(i4 %X) nounwind {
192 ; RV32-LABEL: test_urem_odd_setne:
194 ; RV32-NEXT: slli a1, a0, 1
195 ; RV32-NEXT: add a0, a1, a0
196 ; RV32-NEXT: neg a0, a0
197 ; RV32-NEXT: andi a0, a0, 15
198 ; RV32-NEXT: sltiu a0, a0, 4
199 ; RV32-NEXT: xori a0, a0, 1
202 ; RV64-LABEL: test_urem_odd_setne:
204 ; RV64-NEXT: slliw a1, a0, 1
205 ; RV64-NEXT: addw a0, a1, a0
206 ; RV64-NEXT: negw a0, a0
207 ; RV64-NEXT: andi a0, a0, 15
208 ; RV64-NEXT: sltiu a0, a0, 4
209 ; RV64-NEXT: xori a0, a0, 1
212 ; RV32M-LABEL: test_urem_odd_setne:
214 ; RV32M-NEXT: slli a1, a0, 1
215 ; RV32M-NEXT: add a0, a1, a0
216 ; RV32M-NEXT: neg a0, a0
217 ; RV32M-NEXT: andi a0, a0, 15
218 ; RV32M-NEXT: sltiu a0, a0, 4
219 ; RV32M-NEXT: xori a0, a0, 1
222 ; RV64M-LABEL: test_urem_odd_setne:
224 ; RV64M-NEXT: slliw a1, a0, 1
225 ; RV64M-NEXT: addw a0, a1, a0
226 ; RV64M-NEXT: negw a0, a0
227 ; RV64M-NEXT: andi a0, a0, 15
228 ; RV64M-NEXT: sltiu a0, a0, 4
229 ; RV64M-NEXT: xori a0, a0, 1
232 ; RV32MV-LABEL: test_urem_odd_setne:
234 ; RV32MV-NEXT: slli a1, a0, 1
235 ; RV32MV-NEXT: add a0, a1, a0
236 ; RV32MV-NEXT: neg a0, a0
237 ; RV32MV-NEXT: andi a0, a0, 15
238 ; RV32MV-NEXT: sltiu a0, a0, 4
239 ; RV32MV-NEXT: xori a0, a0, 1
242 ; RV64MV-LABEL: test_urem_odd_setne:
244 ; RV64MV-NEXT: slliw a1, a0, 1
245 ; RV64MV-NEXT: addw a0, a1, a0
246 ; RV64MV-NEXT: negw a0, a0
247 ; RV64MV-NEXT: andi a0, a0, 15
248 ; RV64MV-NEXT: sltiu a0, a0, 4
249 ; RV64MV-NEXT: xori a0, a0, 1
251 %urem = urem i4 %X, 5
252 %cmp = icmp ne i4 %urem, 0
256 define i1 @test_urem_negative_odd(i9 %X) nounwind {
257 ; RV32-LABEL: test_urem_negative_odd:
259 ; RV32-NEXT: addi sp, sp, -16
260 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
261 ; RV32-NEXT: li a1, 307
262 ; RV32-NEXT: call __mulsi3@plt
263 ; RV32-NEXT: andi a0, a0, 511
264 ; RV32-NEXT: sltiu a0, a0, 2
265 ; RV32-NEXT: xori a0, a0, 1
266 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
267 ; RV32-NEXT: addi sp, sp, 16
270 ; RV64-LABEL: test_urem_negative_odd:
272 ; RV64-NEXT: addi sp, sp, -16
273 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
274 ; RV64-NEXT: li a1, 307
275 ; RV64-NEXT: call __muldi3@plt
276 ; RV64-NEXT: andi a0, a0, 511
277 ; RV64-NEXT: sltiu a0, a0, 2
278 ; RV64-NEXT: xori a0, a0, 1
279 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
280 ; RV64-NEXT: addi sp, sp, 16
283 ; RV32M-LABEL: test_urem_negative_odd:
285 ; RV32M-NEXT: li a1, 307
286 ; RV32M-NEXT: mul a0, a0, a1
287 ; RV32M-NEXT: andi a0, a0, 511
288 ; RV32M-NEXT: sltiu a0, a0, 2
289 ; RV32M-NEXT: xori a0, a0, 1
292 ; RV64M-LABEL: test_urem_negative_odd:
294 ; RV64M-NEXT: li a1, 307
295 ; RV64M-NEXT: mulw a0, a0, a1
296 ; RV64M-NEXT: andi a0, a0, 511
297 ; RV64M-NEXT: sltiu a0, a0, 2
298 ; RV64M-NEXT: xori a0, a0, 1
301 ; RV32MV-LABEL: test_urem_negative_odd:
303 ; RV32MV-NEXT: li a1, 307
304 ; RV32MV-NEXT: mul a0, a0, a1
305 ; RV32MV-NEXT: andi a0, a0, 511
306 ; RV32MV-NEXT: sltiu a0, a0, 2
307 ; RV32MV-NEXT: xori a0, a0, 1
310 ; RV64MV-LABEL: test_urem_negative_odd:
312 ; RV64MV-NEXT: li a1, 307
313 ; RV64MV-NEXT: mulw a0, a0, a1
314 ; RV64MV-NEXT: andi a0, a0, 511
315 ; RV64MV-NEXT: sltiu a0, a0, 2
316 ; RV64MV-NEXT: xori a0, a0, 1
318 %urem = urem i9 %X, -5
319 %cmp = icmp ne i9 %urem, 0
323 define void @test_urem_vec(<3 x i11>* %X) nounwind {
324 ; RV32-LABEL: test_urem_vec:
326 ; RV32-NEXT: addi sp, sp, -32
327 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
328 ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
329 ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
330 ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
331 ; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
332 ; RV32-NEXT: mv s0, a0
333 ; RV32-NEXT: lb a0, 4(a0)
334 ; RV32-NEXT: lw a1, 0(s0)
335 ; RV32-NEXT: slli a0, a0, 10
336 ; RV32-NEXT: srli a2, a1, 22
337 ; RV32-NEXT: or s1, a2, a0
338 ; RV32-NEXT: srli s2, a1, 11
339 ; RV32-NEXT: andi a0, a1, 2047
340 ; RV32-NEXT: li a1, 683
341 ; RV32-NEXT: call __mulsi3@plt
342 ; RV32-NEXT: slli a1, a0, 10
343 ; RV32-NEXT: slli a0, a0, 21
344 ; RV32-NEXT: srli a0, a0, 22
345 ; RV32-NEXT: or a0, a0, a1
346 ; RV32-NEXT: andi a0, a0, 2047
347 ; RV32-NEXT: sltiu a0, a0, 342
348 ; RV32-NEXT: xori s3, a0, 1
349 ; RV32-NEXT: li a1, 819
350 ; RV32-NEXT: mv a0, s1
351 ; RV32-NEXT: call __mulsi3@plt
352 ; RV32-NEXT: addi a0, a0, -1638
353 ; RV32-NEXT: andi a0, a0, 2047
354 ; RV32-NEXT: li a1, 1
355 ; RV32-NEXT: sltu s1, a1, a0
356 ; RV32-NEXT: li a1, 1463
357 ; RV32-NEXT: mv a0, s2
358 ; RV32-NEXT: call __mulsi3@plt
359 ; RV32-NEXT: addi a0, a0, -1463
360 ; RV32-NEXT: andi a0, a0, 2047
361 ; RV32-NEXT: sltiu a0, a0, 293
362 ; RV32-NEXT: xori a0, a0, 1
363 ; RV32-NEXT: neg a1, s3
364 ; RV32-NEXT: neg a0, a0
365 ; RV32-NEXT: neg a2, s1
366 ; RV32-NEXT: slli a2, a2, 21
367 ; RV32-NEXT: srli a2, a2, 31
368 ; RV32-NEXT: sb a2, 4(s0)
369 ; RV32-NEXT: andi a1, a1, 2047
370 ; RV32-NEXT: andi a0, a0, 2047
371 ; RV32-NEXT: slli a0, a0, 11
372 ; RV32-NEXT: or a0, a1, a0
373 ; RV32-NEXT: slli a1, s1, 22
374 ; RV32-NEXT: sub a0, a0, a1
375 ; RV32-NEXT: sw a0, 0(s0)
376 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
377 ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
378 ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
379 ; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
380 ; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
381 ; RV32-NEXT: addi sp, sp, 32
384 ; RV64-LABEL: test_urem_vec:
386 ; RV64-NEXT: addi sp, sp, -48
387 ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
388 ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
389 ; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
390 ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
391 ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
392 ; RV64-NEXT: mv s0, a0
393 ; RV64-NEXT: lbu a0, 4(a0)
394 ; RV64-NEXT: lwu a1, 0(s0)
395 ; RV64-NEXT: slli a0, a0, 32
396 ; RV64-NEXT: or a0, a1, a0
397 ; RV64-NEXT: srli s1, a0, 11
398 ; RV64-NEXT: srli s2, a0, 22
399 ; RV64-NEXT: andi a0, a0, 2047
400 ; RV64-NEXT: li a1, 683
401 ; RV64-NEXT: call __muldi3@plt
402 ; RV64-NEXT: slli a1, a0, 10
403 ; RV64-NEXT: slli a0, a0, 53
404 ; RV64-NEXT: srli a0, a0, 54
405 ; RV64-NEXT: or a0, a0, a1
406 ; RV64-NEXT: andi a0, a0, 2047
407 ; RV64-NEXT: sltiu a0, a0, 342
408 ; RV64-NEXT: xori s3, a0, 1
409 ; RV64-NEXT: li a1, 819
410 ; RV64-NEXT: mv a0, s2
411 ; RV64-NEXT: call __muldi3@plt
412 ; RV64-NEXT: addiw a0, a0, -1638
413 ; RV64-NEXT: andi a0, a0, 2047
414 ; RV64-NEXT: li a1, 1
415 ; RV64-NEXT: sltu s2, a1, a0
416 ; RV64-NEXT: li a1, 1463
417 ; RV64-NEXT: mv a0, s1
418 ; RV64-NEXT: call __muldi3@plt
419 ; RV64-NEXT: addiw a0, a0, -1463
420 ; RV64-NEXT: andi a0, a0, 2047
421 ; RV64-NEXT: sltiu a0, a0, 293
422 ; RV64-NEXT: xori a0, a0, 1
423 ; RV64-NEXT: negw a1, s3
424 ; RV64-NEXT: negw a0, a0
425 ; RV64-NEXT: andi a1, a1, 2047
426 ; RV64-NEXT: andi a0, a0, 2047
427 ; RV64-NEXT: slli a0, a0, 11
428 ; RV64-NEXT: or a0, a1, a0
429 ; RV64-NEXT: slli a1, s2, 22
430 ; RV64-NEXT: sub a0, a0, a1
431 ; RV64-NEXT: sw a0, 0(s0)
432 ; RV64-NEXT: slli a0, a0, 31
433 ; RV64-NEXT: srli a0, a0, 63
434 ; RV64-NEXT: sb a0, 4(s0)
435 ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
436 ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
437 ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
438 ; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
439 ; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
440 ; RV64-NEXT: addi sp, sp, 48
443 ; RV32M-LABEL: test_urem_vec:
445 ; RV32M-NEXT: lb a1, 4(a0)
446 ; RV32M-NEXT: lw a2, 0(a0)
447 ; RV32M-NEXT: slli a1, a1, 10
448 ; RV32M-NEXT: srli a3, a2, 22
449 ; RV32M-NEXT: or a1, a3, a1
450 ; RV32M-NEXT: srli a3, a2, 11
451 ; RV32M-NEXT: andi a2, a2, 2047
452 ; RV32M-NEXT: li a4, 683
453 ; RV32M-NEXT: mul a2, a2, a4
454 ; RV32M-NEXT: slli a4, a2, 10
455 ; RV32M-NEXT: slli a2, a2, 21
456 ; RV32M-NEXT: srli a2, a2, 22
457 ; RV32M-NEXT: or a2, a2, a4
458 ; RV32M-NEXT: andi a2, a2, 2047
459 ; RV32M-NEXT: sltiu a2, a2, 342
460 ; RV32M-NEXT: xori a2, a2, 1
461 ; RV32M-NEXT: li a4, 819
462 ; RV32M-NEXT: mul a1, a1, a4
463 ; RV32M-NEXT: addi a1, a1, -1638
464 ; RV32M-NEXT: andi a1, a1, 2047
465 ; RV32M-NEXT: li a4, 1
466 ; RV32M-NEXT: sltu a1, a4, a1
467 ; RV32M-NEXT: li a4, 1463
468 ; RV32M-NEXT: mul a3, a3, a4
469 ; RV32M-NEXT: addi a3, a3, -1463
470 ; RV32M-NEXT: andi a3, a3, 2047
471 ; RV32M-NEXT: sltiu a3, a3, 293
472 ; RV32M-NEXT: xori a3, a3, 1
473 ; RV32M-NEXT: neg a2, a2
474 ; RV32M-NEXT: neg a3, a3
475 ; RV32M-NEXT: neg a4, a1
476 ; RV32M-NEXT: slli a4, a4, 21
477 ; RV32M-NEXT: srli a4, a4, 31
478 ; RV32M-NEXT: sb a4, 4(a0)
479 ; RV32M-NEXT: andi a2, a2, 2047
480 ; RV32M-NEXT: andi a3, a3, 2047
481 ; RV32M-NEXT: slli a3, a3, 11
482 ; RV32M-NEXT: or a2, a2, a3
483 ; RV32M-NEXT: slli a1, a1, 22
484 ; RV32M-NEXT: sub a1, a2, a1
485 ; RV32M-NEXT: sw a1, 0(a0)
488 ; RV64M-LABEL: test_urem_vec:
490 ; RV64M-NEXT: lbu a1, 4(a0)
491 ; RV64M-NEXT: lwu a2, 0(a0)
492 ; RV64M-NEXT: slli a1, a1, 32
493 ; RV64M-NEXT: or a1, a2, a1
494 ; RV64M-NEXT: srli a2, a1, 11
495 ; RV64M-NEXT: srli a3, a1, 22
496 ; RV64M-NEXT: andi a1, a1, 2047
497 ; RV64M-NEXT: li a4, 683
498 ; RV64M-NEXT: mul a1, a1, a4
499 ; RV64M-NEXT: slli a4, a1, 10
500 ; RV64M-NEXT: slli a1, a1, 53
501 ; RV64M-NEXT: srli a1, a1, 54
502 ; RV64M-NEXT: or a1, a1, a4
503 ; RV64M-NEXT: andi a1, a1, 2047
504 ; RV64M-NEXT: sltiu a1, a1, 342
505 ; RV64M-NEXT: xori a1, a1, 1
506 ; RV64M-NEXT: li a4, 819
507 ; RV64M-NEXT: mulw a3, a3, a4
508 ; RV64M-NEXT: addiw a3, a3, -1638
509 ; RV64M-NEXT: andi a3, a3, 2047
510 ; RV64M-NEXT: li a4, 1
511 ; RV64M-NEXT: sltu a3, a4, a3
512 ; RV64M-NEXT: li a4, 1463
513 ; RV64M-NEXT: mulw a2, a2, a4
514 ; RV64M-NEXT: addiw a2, a2, -1463
515 ; RV64M-NEXT: andi a2, a2, 2047
516 ; RV64M-NEXT: sltiu a2, a2, 293
517 ; RV64M-NEXT: xori a2, a2, 1
518 ; RV64M-NEXT: negw a1, a1
519 ; RV64M-NEXT: negw a2, a2
520 ; RV64M-NEXT: andi a1, a1, 2047
521 ; RV64M-NEXT: andi a2, a2, 2047
522 ; RV64M-NEXT: slli a2, a2, 11
523 ; RV64M-NEXT: or a1, a1, a2
524 ; RV64M-NEXT: slli a2, a3, 22
525 ; RV64M-NEXT: sub a1, a1, a2
526 ; RV64M-NEXT: sw a1, 0(a0)
527 ; RV64M-NEXT: slli a1, a1, 31
528 ; RV64M-NEXT: srli a1, a1, 63
529 ; RV64M-NEXT: sb a1, 4(a0)
532 ; RV32MV-LABEL: test_urem_vec:
534 ; RV32MV-NEXT: addi sp, sp, -16
535 ; RV32MV-NEXT: lw a1, 0(a0)
536 ; RV32MV-NEXT: andi a2, a1, 2047
537 ; RV32MV-NEXT: sh a2, 8(sp)
538 ; RV32MV-NEXT: slli a2, a1, 10
539 ; RV32MV-NEXT: srli a2, a2, 21
540 ; RV32MV-NEXT: sh a2, 10(sp)
541 ; RV32MV-NEXT: lb a2, 4(a0)
542 ; RV32MV-NEXT: slli a2, a2, 10
543 ; RV32MV-NEXT: srli a1, a1, 22
544 ; RV32MV-NEXT: or a1, a1, a2
545 ; RV32MV-NEXT: andi a1, a1, 2047
546 ; RV32MV-NEXT: sh a1, 12(sp)
547 ; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
548 ; RV32MV-NEXT: addi a1, sp, 8
549 ; RV32MV-NEXT: vle16.v v8, (a1)
550 ; RV32MV-NEXT: vmv.v.i v9, 10
551 ; RV32MV-NEXT: li a1, 9
552 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
553 ; RV32MV-NEXT: vmv.s.x v9, a1
554 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
555 ; RV32MV-NEXT: lui a1, %hi(.LCPI4_0)
556 ; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
557 ; RV32MV-NEXT: vle16.v v10, (a1)
558 ; RV32MV-NEXT: vid.v v11
559 ; RV32MV-NEXT: vsub.vv v8, v8, v11
560 ; RV32MV-NEXT: vmul.vv v8, v8, v10
561 ; RV32MV-NEXT: vadd.vv v10, v8, v8
562 ; RV32MV-NEXT: vsll.vv v9, v10, v9
563 ; RV32MV-NEXT: vmv.v.i v10, 0
564 ; RV32MV-NEXT: li a1, 1
565 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
566 ; RV32MV-NEXT: vmv1r.v v11, v10
567 ; RV32MV-NEXT: vmv.s.x v11, a1
568 ; RV32MV-NEXT: li a1, 2047
569 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
570 ; RV32MV-NEXT: vand.vx v8, v8, a1
571 ; RV32MV-NEXT: lui a2, %hi(.LCPI4_1)
572 ; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1)
573 ; RV32MV-NEXT: vle16.v v12, (a2)
574 ; RV32MV-NEXT: vsrl.vv v8, v8, v11
575 ; RV32MV-NEXT: vor.vv v8, v8, v9
576 ; RV32MV-NEXT: vand.vx v8, v8, a1
577 ; RV32MV-NEXT: vmsltu.vv v0, v12, v8
578 ; RV32MV-NEXT: vmerge.vim v8, v10, -1, v0
579 ; RV32MV-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
580 ; RV32MV-NEXT: vslidedown.vi v9, v8, 2
581 ; RV32MV-NEXT: vmv.x.s a1, v9
582 ; RV32MV-NEXT: slli a2, a1, 21
583 ; RV32MV-NEXT: srli a2, a2, 31
584 ; RV32MV-NEXT: sb a2, 4(a0)
585 ; RV32MV-NEXT: vmv.x.s a2, v8
586 ; RV32MV-NEXT: andi a2, a2, 2047
587 ; RV32MV-NEXT: vslidedown.vi v8, v8, 1
588 ; RV32MV-NEXT: vmv.x.s a3, v8
589 ; RV32MV-NEXT: andi a3, a3, 2047
590 ; RV32MV-NEXT: slli a3, a3, 11
591 ; RV32MV-NEXT: or a2, a2, a3
592 ; RV32MV-NEXT: slli a1, a1, 22
593 ; RV32MV-NEXT: or a1, a2, a1
594 ; RV32MV-NEXT: sw a1, 0(a0)
595 ; RV32MV-NEXT: addi sp, sp, 16
598 ; RV64MV-LABEL: test_urem_vec:
600 ; RV64MV-NEXT: addi sp, sp, -16
601 ; RV64MV-NEXT: lbu a1, 4(a0)
602 ; RV64MV-NEXT: lwu a2, 0(a0)
603 ; RV64MV-NEXT: slli a1, a1, 32
604 ; RV64MV-NEXT: or a1, a2, a1
605 ; RV64MV-NEXT: srli a2, a1, 22
606 ; RV64MV-NEXT: sh a2, 12(sp)
607 ; RV64MV-NEXT: andi a2, a1, 2047
608 ; RV64MV-NEXT: sh a2, 8(sp)
609 ; RV64MV-NEXT: slli a1, a1, 42
610 ; RV64MV-NEXT: srli a1, a1, 53
611 ; RV64MV-NEXT: sh a1, 10(sp)
612 ; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
613 ; RV64MV-NEXT: addi a1, sp, 8
614 ; RV64MV-NEXT: vle16.v v8, (a1)
615 ; RV64MV-NEXT: vmv.v.i v9, 10
616 ; RV64MV-NEXT: li a1, 9
617 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
618 ; RV64MV-NEXT: vmv.s.x v9, a1
619 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
620 ; RV64MV-NEXT: lui a1, %hi(.LCPI4_0)
621 ; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
622 ; RV64MV-NEXT: vle16.v v10, (a1)
623 ; RV64MV-NEXT: vid.v v11
624 ; RV64MV-NEXT: vsub.vv v8, v8, v11
625 ; RV64MV-NEXT: vmul.vv v8, v8, v10
626 ; RV64MV-NEXT: vadd.vv v10, v8, v8
627 ; RV64MV-NEXT: vsll.vv v9, v10, v9
628 ; RV64MV-NEXT: vmv.v.i v10, 0
629 ; RV64MV-NEXT: li a1, 1
630 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
631 ; RV64MV-NEXT: vmv1r.v v11, v10
632 ; RV64MV-NEXT: vmv.s.x v11, a1
633 ; RV64MV-NEXT: li a1, 2047
634 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
635 ; RV64MV-NEXT: vand.vx v8, v8, a1
636 ; RV64MV-NEXT: lui a2, %hi(.LCPI4_1)
637 ; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1)
638 ; RV64MV-NEXT: vle16.v v12, (a2)
639 ; RV64MV-NEXT: vsrl.vv v8, v8, v11
640 ; RV64MV-NEXT: vor.vv v8, v8, v9
641 ; RV64MV-NEXT: vand.vx v8, v8, a1
642 ; RV64MV-NEXT: vmsltu.vv v0, v12, v8
643 ; RV64MV-NEXT: vmerge.vim v8, v10, -1, v0
644 ; RV64MV-NEXT: vmv.x.s a1, v8
645 ; RV64MV-NEXT: andi a1, a1, 2047
646 ; RV64MV-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
647 ; RV64MV-NEXT: vslidedown.vi v9, v8, 1
648 ; RV64MV-NEXT: vmv.x.s a2, v9
649 ; RV64MV-NEXT: andi a2, a2, 2047
650 ; RV64MV-NEXT: slli a2, a2, 11
651 ; RV64MV-NEXT: or a1, a1, a2
652 ; RV64MV-NEXT: vslidedown.vi v8, v8, 2
653 ; RV64MV-NEXT: vmv.x.s a2, v8
654 ; RV64MV-NEXT: slli a2, a2, 22
655 ; RV64MV-NEXT: or a1, a1, a2
656 ; RV64MV-NEXT: sw a1, 0(a0)
657 ; RV64MV-NEXT: slli a1, a1, 31
658 ; RV64MV-NEXT: srli a1, a1, 63
659 ; RV64MV-NEXT: sb a1, 4(a0)
660 ; RV64MV-NEXT: addi sp, sp, 16
662 %ld = load <3 x i11>, <3 x i11>* %X
663 %urem = urem <3 x i11> %ld, <i11 6, i11 7, i11 -5>
664 %cmp = icmp ne <3 x i11> %urem, <i11 0, i11 1, i11 2>
665 %ext = sext <3 x i1> %cmp to <3 x i11>
666 store <3 x i11> %ext, <3 x i11>* %X