1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32I
4 ; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define i32 @ctlz_i32(i32 %a) nounwind {
10 ; RV32I-LABEL: ctlz_i32:
12 ; RV32I-NEXT: beqz a0, .LBB0_2
13 ; RV32I-NEXT: # %bb.1: # %cond.false
14 ; RV32I-NEXT: srli a1, a0, 1
15 ; RV32I-NEXT: or a0, a0, a1
16 ; RV32I-NEXT: srli a1, a0, 2
17 ; RV32I-NEXT: or a0, a0, a1
18 ; RV32I-NEXT: srli a1, a0, 4
19 ; RV32I-NEXT: or a0, a0, a1
20 ; RV32I-NEXT: srli a1, a0, 8
21 ; RV32I-NEXT: or a0, a0, a1
22 ; RV32I-NEXT: srli a1, a0, 16
23 ; RV32I-NEXT: or a0, a0, a1
24 ; RV32I-NEXT: not a0, a0
25 ; RV32I-NEXT: srli a1, a0, 1
26 ; RV32I-NEXT: lui a2, 349525
27 ; RV32I-NEXT: addi a2, a2, 1365
28 ; RV32I-NEXT: and a1, a1, a2
29 ; RV32I-NEXT: sub a0, a0, a1
30 ; RV32I-NEXT: lui a1, 209715
31 ; RV32I-NEXT: addi a1, a1, 819
32 ; RV32I-NEXT: and a2, a0, a1
33 ; RV32I-NEXT: srli a0, a0, 2
34 ; RV32I-NEXT: and a0, a0, a1
35 ; RV32I-NEXT: add a0, a2, a0
36 ; RV32I-NEXT: srli a1, a0, 4
37 ; RV32I-NEXT: add a0, a0, a1
38 ; RV32I-NEXT: lui a1, 61681
39 ; RV32I-NEXT: addi a1, a1, -241
40 ; RV32I-NEXT: and a0, a0, a1
41 ; RV32I-NEXT: slli a1, a0, 8
42 ; RV32I-NEXT: add a0, a0, a1
43 ; RV32I-NEXT: slli a1, a0, 16
44 ; RV32I-NEXT: add a0, a0, a1
45 ; RV32I-NEXT: srli a0, a0, 24
47 ; RV32I-NEXT: .LBB0_2:
48 ; RV32I-NEXT: li a0, 32
51 ; RV32ZBB-LABEL: ctlz_i32:
53 ; RV32ZBB-NEXT: clz a0, a0
55 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
59 declare i64 @llvm.ctlz.i64(i64, i1)
61 define i64 @ctlz_i64(i64 %a) nounwind {
62 ; RV32I-LABEL: ctlz_i64:
64 ; RV32I-NEXT: lui a2, 349525
65 ; RV32I-NEXT: addi a4, a2, 1365
66 ; RV32I-NEXT: lui a2, 209715
67 ; RV32I-NEXT: addi a3, a2, 819
68 ; RV32I-NEXT: lui a2, 61681
69 ; RV32I-NEXT: addi a2, a2, -241
70 ; RV32I-NEXT: bnez a1, .LBB1_2
71 ; RV32I-NEXT: # %bb.1:
72 ; RV32I-NEXT: srli a1, a0, 1
73 ; RV32I-NEXT: or a0, a0, a1
74 ; RV32I-NEXT: srli a1, a0, 2
75 ; RV32I-NEXT: or a0, a0, a1
76 ; RV32I-NEXT: srli a1, a0, 4
77 ; RV32I-NEXT: or a0, a0, a1
78 ; RV32I-NEXT: srli a1, a0, 8
79 ; RV32I-NEXT: or a0, a0, a1
80 ; RV32I-NEXT: srli a1, a0, 16
81 ; RV32I-NEXT: or a0, a0, a1
82 ; RV32I-NEXT: not a0, a0
83 ; RV32I-NEXT: srli a1, a0, 1
84 ; RV32I-NEXT: and a1, a1, a4
85 ; RV32I-NEXT: sub a0, a0, a1
86 ; RV32I-NEXT: and a1, a0, a3
87 ; RV32I-NEXT: srli a0, a0, 2
88 ; RV32I-NEXT: and a0, a0, a3
89 ; RV32I-NEXT: add a0, a1, a0
90 ; RV32I-NEXT: srli a1, a0, 4
91 ; RV32I-NEXT: add a0, a0, a1
92 ; RV32I-NEXT: and a0, a0, a2
93 ; RV32I-NEXT: slli a1, a0, 8
94 ; RV32I-NEXT: add a0, a0, a1
95 ; RV32I-NEXT: slli a1, a0, 16
96 ; RV32I-NEXT: add a0, a0, a1
97 ; RV32I-NEXT: srli a0, a0, 24
98 ; RV32I-NEXT: addi a0, a0, 32
99 ; RV32I-NEXT: li a1, 0
101 ; RV32I-NEXT: .LBB1_2:
102 ; RV32I-NEXT: srli a0, a1, 1
103 ; RV32I-NEXT: or a0, a1, a0
104 ; RV32I-NEXT: srli a1, a0, 2
105 ; RV32I-NEXT: or a0, a0, a1
106 ; RV32I-NEXT: srli a1, a0, 4
107 ; RV32I-NEXT: or a0, a0, a1
108 ; RV32I-NEXT: srli a1, a0, 8
109 ; RV32I-NEXT: or a0, a0, a1
110 ; RV32I-NEXT: srli a1, a0, 16
111 ; RV32I-NEXT: or a0, a0, a1
112 ; RV32I-NEXT: not a0, a0
113 ; RV32I-NEXT: srli a1, a0, 1
114 ; RV32I-NEXT: and a1, a1, a4
115 ; RV32I-NEXT: sub a0, a0, a1
116 ; RV32I-NEXT: and a1, a0, a3
117 ; RV32I-NEXT: srli a0, a0, 2
118 ; RV32I-NEXT: and a0, a0, a3
119 ; RV32I-NEXT: add a0, a1, a0
120 ; RV32I-NEXT: srli a1, a0, 4
121 ; RV32I-NEXT: add a0, a0, a1
122 ; RV32I-NEXT: and a0, a0, a2
123 ; RV32I-NEXT: slli a1, a0, 8
124 ; RV32I-NEXT: add a0, a0, a1
125 ; RV32I-NEXT: slli a1, a0, 16
126 ; RV32I-NEXT: add a0, a0, a1
127 ; RV32I-NEXT: srli a0, a0, 24
128 ; RV32I-NEXT: li a1, 0
131 ; RV32ZBB-LABEL: ctlz_i64:
133 ; RV32ZBB-NEXT: bnez a1, .LBB1_2
134 ; RV32ZBB-NEXT: # %bb.1:
135 ; RV32ZBB-NEXT: clz a0, a0
136 ; RV32ZBB-NEXT: addi a0, a0, 32
137 ; RV32ZBB-NEXT: li a1, 0
139 ; RV32ZBB-NEXT: .LBB1_2:
140 ; RV32ZBB-NEXT: clz a0, a1
141 ; RV32ZBB-NEXT: li a1, 0
143 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
147 declare i32 @llvm.cttz.i32(i32, i1)
149 define i32 @cttz_i32(i32 %a) nounwind {
150 ; RV32I-LABEL: cttz_i32:
152 ; RV32I-NEXT: beqz a0, .LBB2_2
153 ; RV32I-NEXT: # %bb.1: # %cond.false
154 ; RV32I-NEXT: addi sp, sp, -16
155 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
156 ; RV32I-NEXT: neg a1, a0
157 ; RV32I-NEXT: and a0, a0, a1
158 ; RV32I-NEXT: lui a1, 30667
159 ; RV32I-NEXT: addi a1, a1, 1329
160 ; RV32I-NEXT: call __mulsi3
161 ; RV32I-NEXT: srli a0, a0, 27
162 ; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
163 ; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
164 ; RV32I-NEXT: add a0, a1, a0
165 ; RV32I-NEXT: lbu a0, 0(a0)
166 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
167 ; RV32I-NEXT: addi sp, sp, 16
169 ; RV32I-NEXT: .LBB2_2:
170 ; RV32I-NEXT: li a0, 32
173 ; RV32ZBB-LABEL: cttz_i32:
175 ; RV32ZBB-NEXT: ctz a0, a0
177 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
181 declare i64 @llvm.cttz.i64(i64, i1)
183 define i64 @cttz_i64(i64 %a) nounwind {
184 ; RV32I-LABEL: cttz_i64:
186 ; RV32I-NEXT: addi sp, sp, -32
187 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
188 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
189 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
190 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
191 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
192 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
193 ; RV32I-NEXT: mv s2, a1
194 ; RV32I-NEXT: mv s0, a0
195 ; RV32I-NEXT: neg a0, a0
196 ; RV32I-NEXT: and a0, s0, a0
197 ; RV32I-NEXT: lui a1, 30667
198 ; RV32I-NEXT: addi s3, a1, 1329
199 ; RV32I-NEXT: mv a1, s3
200 ; RV32I-NEXT: call __mulsi3
201 ; RV32I-NEXT: mv s1, a0
202 ; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
203 ; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
204 ; RV32I-NEXT: neg a0, s2
205 ; RV32I-NEXT: and a0, s2, a0
206 ; RV32I-NEXT: mv a1, s3
207 ; RV32I-NEXT: call __mulsi3
208 ; RV32I-NEXT: bnez s2, .LBB3_3
209 ; RV32I-NEXT: # %bb.1:
210 ; RV32I-NEXT: li a0, 32
211 ; RV32I-NEXT: beqz s0, .LBB3_4
212 ; RV32I-NEXT: .LBB3_2:
213 ; RV32I-NEXT: srli s1, s1, 27
214 ; RV32I-NEXT: add s1, s4, s1
215 ; RV32I-NEXT: lbu a0, 0(s1)
216 ; RV32I-NEXT: j .LBB3_5
217 ; RV32I-NEXT: .LBB3_3:
218 ; RV32I-NEXT: srli a0, a0, 27
219 ; RV32I-NEXT: add a0, s4, a0
220 ; RV32I-NEXT: lbu a0, 0(a0)
221 ; RV32I-NEXT: bnez s0, .LBB3_2
222 ; RV32I-NEXT: .LBB3_4:
223 ; RV32I-NEXT: addi a0, a0, 32
224 ; RV32I-NEXT: .LBB3_5:
225 ; RV32I-NEXT: li a1, 0
226 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
227 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
228 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
229 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
230 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
231 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
232 ; RV32I-NEXT: addi sp, sp, 32
235 ; RV32ZBB-LABEL: cttz_i64:
237 ; RV32ZBB-NEXT: bnez a0, .LBB3_2
238 ; RV32ZBB-NEXT: # %bb.1:
239 ; RV32ZBB-NEXT: ctz a0, a1
240 ; RV32ZBB-NEXT: addi a0, a0, 32
241 ; RV32ZBB-NEXT: li a1, 0
243 ; RV32ZBB-NEXT: .LBB3_2:
244 ; RV32ZBB-NEXT: ctz a0, a0
245 ; RV32ZBB-NEXT: li a1, 0
247 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
251 declare i32 @llvm.ctpop.i32(i32)
253 define i32 @ctpop_i32(i32 %a) nounwind {
254 ; RV32I-LABEL: ctpop_i32:
256 ; RV32I-NEXT: srli a1, a0, 1
257 ; RV32I-NEXT: lui a2, 349525
258 ; RV32I-NEXT: addi a2, a2, 1365
259 ; RV32I-NEXT: and a1, a1, a2
260 ; RV32I-NEXT: sub a0, a0, a1
261 ; RV32I-NEXT: lui a1, 209715
262 ; RV32I-NEXT: addi a1, a1, 819
263 ; RV32I-NEXT: and a2, a0, a1
264 ; RV32I-NEXT: srli a0, a0, 2
265 ; RV32I-NEXT: and a0, a0, a1
266 ; RV32I-NEXT: add a0, a2, a0
267 ; RV32I-NEXT: srli a1, a0, 4
268 ; RV32I-NEXT: add a0, a0, a1
269 ; RV32I-NEXT: lui a1, 61681
270 ; RV32I-NEXT: addi a1, a1, -241
271 ; RV32I-NEXT: and a0, a0, a1
272 ; RV32I-NEXT: slli a1, a0, 8
273 ; RV32I-NEXT: add a0, a0, a1
274 ; RV32I-NEXT: slli a1, a0, 16
275 ; RV32I-NEXT: add a0, a0, a1
276 ; RV32I-NEXT: srli a0, a0, 24
279 ; RV32ZBB-LABEL: ctpop_i32:
281 ; RV32ZBB-NEXT: cpop a0, a0
283 %1 = call i32 @llvm.ctpop.i32(i32 %a)
287 define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind {
288 ; RV32I-LABEL: ctpop_i32_ult_two:
290 ; RV32I-NEXT: addi a1, a0, -1
291 ; RV32I-NEXT: and a0, a0, a1
292 ; RV32I-NEXT: seqz a0, a0
295 ; RV32ZBB-LABEL: ctpop_i32_ult_two:
297 ; RV32ZBB-NEXT: cpop a0, a0
298 ; RV32ZBB-NEXT: sltiu a0, a0, 2
300 %1 = call i32 @llvm.ctpop.i32(i32 %a)
301 %2 = icmp ult i32 %1, 2
305 define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind {
306 ; RV32I-LABEL: ctpop_i32_ugt_one:
308 ; RV32I-NEXT: addi a1, a0, -1
309 ; RV32I-NEXT: and a0, a0, a1
310 ; RV32I-NEXT: snez a0, a0
313 ; RV32ZBB-LABEL: ctpop_i32_ugt_one:
315 ; RV32ZBB-NEXT: cpop a0, a0
316 ; RV32ZBB-NEXT: sltiu a0, a0, 2
317 ; RV32ZBB-NEXT: xori a0, a0, 1
319 %1 = call i32 @llvm.ctpop.i32(i32 %a)
320 %2 = icmp ugt i32 %1, 1
324 define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
325 ; RV32I-LABEL: ctpop_i32_eq_one:
327 ; RV32I-NEXT: addi a1, a0, -1
328 ; RV32I-NEXT: xor a0, a0, a1
329 ; RV32I-NEXT: sltu a0, a1, a0
332 ; RV32ZBB-LABEL: ctpop_i32_eq_one:
334 ; RV32ZBB-NEXT: cpop a0, a0
335 ; RV32ZBB-NEXT: addi a0, a0, -1
336 ; RV32ZBB-NEXT: seqz a0, a0
338 %1 = call i32 @llvm.ctpop.i32(i32 %a)
339 %2 = icmp eq i32 %1, 1
343 define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
344 ; RV32I-LABEL: ctpop_i32_ne_one:
346 ; RV32I-NEXT: addi a1, a0, -1
347 ; RV32I-NEXT: xor a0, a0, a1
348 ; RV32I-NEXT: sltu a0, a1, a0
349 ; RV32I-NEXT: xori a0, a0, 1
352 ; RV32ZBB-LABEL: ctpop_i32_ne_one:
354 ; RV32ZBB-NEXT: cpop a0, a0
355 ; RV32ZBB-NEXT: addi a0, a0, -1
356 ; RV32ZBB-NEXT: snez a0, a0
358 %1 = call i32 @llvm.ctpop.i32(i32 %a)
359 %2 = icmp ne i32 %1, 1
363 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
365 define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
366 ; RV32I-LABEL: ctpop_v2i32:
368 ; RV32I-NEXT: srli a2, a0, 1
369 ; RV32I-NEXT: lui a3, 349525
370 ; RV32I-NEXT: addi a3, a3, 1365
371 ; RV32I-NEXT: and a2, a2, a3
372 ; RV32I-NEXT: sub a0, a0, a2
373 ; RV32I-NEXT: lui a2, 209715
374 ; RV32I-NEXT: addi a2, a2, 819
375 ; RV32I-NEXT: and a4, a0, a2
376 ; RV32I-NEXT: srli a0, a0, 2
377 ; RV32I-NEXT: and a0, a0, a2
378 ; RV32I-NEXT: add a0, a4, a0
379 ; RV32I-NEXT: srli a4, a0, 4
380 ; RV32I-NEXT: add a0, a0, a4
381 ; RV32I-NEXT: lui a4, 61681
382 ; RV32I-NEXT: addi a4, a4, -241
383 ; RV32I-NEXT: and a0, a0, a4
384 ; RV32I-NEXT: slli a5, a0, 8
385 ; RV32I-NEXT: add a0, a0, a5
386 ; RV32I-NEXT: slli a5, a0, 16
387 ; RV32I-NEXT: add a0, a0, a5
388 ; RV32I-NEXT: srli a0, a0, 24
389 ; RV32I-NEXT: srli a5, a1, 1
390 ; RV32I-NEXT: and a3, a5, a3
391 ; RV32I-NEXT: sub a1, a1, a3
392 ; RV32I-NEXT: and a3, a1, a2
393 ; RV32I-NEXT: srli a1, a1, 2
394 ; RV32I-NEXT: and a1, a1, a2
395 ; RV32I-NEXT: add a1, a3, a1
396 ; RV32I-NEXT: srli a2, a1, 4
397 ; RV32I-NEXT: add a1, a1, a2
398 ; RV32I-NEXT: and a1, a1, a4
399 ; RV32I-NEXT: slli a2, a1, 8
400 ; RV32I-NEXT: add a1, a1, a2
401 ; RV32I-NEXT: slli a2, a1, 16
402 ; RV32I-NEXT: add a1, a1, a2
403 ; RV32I-NEXT: srli a1, a1, 24
406 ; RV32ZBB-LABEL: ctpop_v2i32:
408 ; RV32ZBB-NEXT: cpop a0, a0
409 ; RV32ZBB-NEXT: cpop a1, a1
411 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
415 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
416 ; RV32I-LABEL: ctpop_v2i32_ult_two:
418 ; RV32I-NEXT: addi a2, a0, -1
419 ; RV32I-NEXT: and a0, a0, a2
420 ; RV32I-NEXT: seqz a0, a0
421 ; RV32I-NEXT: addi a2, a1, -1
422 ; RV32I-NEXT: and a1, a1, a2
423 ; RV32I-NEXT: seqz a1, a1
426 ; RV32ZBB-LABEL: ctpop_v2i32_ult_two:
428 ; RV32ZBB-NEXT: cpop a1, a1
429 ; RV32ZBB-NEXT: cpop a0, a0
430 ; RV32ZBB-NEXT: sltiu a0, a0, 2
431 ; RV32ZBB-NEXT: sltiu a1, a1, 2
433 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
434 %2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
438 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
439 ; RV32I-LABEL: ctpop_v2i32_ugt_one:
441 ; RV32I-NEXT: addi a2, a0, -1
442 ; RV32I-NEXT: and a0, a0, a2
443 ; RV32I-NEXT: snez a0, a0
444 ; RV32I-NEXT: addi a2, a1, -1
445 ; RV32I-NEXT: and a1, a1, a2
446 ; RV32I-NEXT: snez a1, a1
449 ; RV32ZBB-LABEL: ctpop_v2i32_ugt_one:
451 ; RV32ZBB-NEXT: cpop a1, a1
452 ; RV32ZBB-NEXT: cpop a0, a0
453 ; RV32ZBB-NEXT: sltiu a0, a0, 2
454 ; RV32ZBB-NEXT: xori a0, a0, 1
455 ; RV32ZBB-NEXT: sltiu a1, a1, 2
456 ; RV32ZBB-NEXT: xori a1, a1, 1
458 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
459 %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
463 define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
464 ; RV32I-LABEL: ctpop_v2i32_eq_one:
466 ; RV32I-NEXT: addi a2, a0, -1
467 ; RV32I-NEXT: xor a0, a0, a2
468 ; RV32I-NEXT: sltu a0, a2, a0
469 ; RV32I-NEXT: addi a2, a1, -1
470 ; RV32I-NEXT: xor a1, a1, a2
471 ; RV32I-NEXT: sltu a1, a2, a1
474 ; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
476 ; RV32ZBB-NEXT: cpop a1, a1
477 ; RV32ZBB-NEXT: cpop a0, a0
478 ; RV32ZBB-NEXT: addi a0, a0, -1
479 ; RV32ZBB-NEXT: seqz a0, a0
480 ; RV32ZBB-NEXT: addi a1, a1, -1
481 ; RV32ZBB-NEXT: seqz a1, a1
483 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
484 %2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
488 define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
489 ; RV32I-LABEL: ctpop_v2i32_ne_one:
491 ; RV32I-NEXT: addi a2, a0, -1
492 ; RV32I-NEXT: xor a0, a0, a2
493 ; RV32I-NEXT: sltu a0, a2, a0
494 ; RV32I-NEXT: xori a0, a0, 1
495 ; RV32I-NEXT: addi a2, a1, -1
496 ; RV32I-NEXT: xor a1, a1, a2
497 ; RV32I-NEXT: sltu a1, a2, a1
498 ; RV32I-NEXT: xori a1, a1, 1
501 ; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
503 ; RV32ZBB-NEXT: cpop a1, a1
504 ; RV32ZBB-NEXT: cpop a0, a0
505 ; RV32ZBB-NEXT: addi a0, a0, -1
506 ; RV32ZBB-NEXT: snez a0, a0
507 ; RV32ZBB-NEXT: addi a1, a1, -1
508 ; RV32ZBB-NEXT: snez a1, a1
510 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
511 %2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
515 declare i64 @llvm.ctpop.i64(i64)
517 define i64 @ctpop_i64(i64 %a) nounwind {
518 ; RV32I-LABEL: ctpop_i64:
520 ; RV32I-NEXT: srli a2, a1, 1
521 ; RV32I-NEXT: lui a3, 349525
522 ; RV32I-NEXT: addi a3, a3, 1365
523 ; RV32I-NEXT: and a2, a2, a3
524 ; RV32I-NEXT: sub a1, a1, a2
525 ; RV32I-NEXT: lui a2, 209715
526 ; RV32I-NEXT: addi a2, a2, 819
527 ; RV32I-NEXT: and a4, a1, a2
528 ; RV32I-NEXT: srli a1, a1, 2
529 ; RV32I-NEXT: and a1, a1, a2
530 ; RV32I-NEXT: add a1, a4, a1
531 ; RV32I-NEXT: srli a4, a1, 4
532 ; RV32I-NEXT: add a1, a1, a4
533 ; RV32I-NEXT: lui a4, 61681
534 ; RV32I-NEXT: addi a4, a4, -241
535 ; RV32I-NEXT: and a1, a1, a4
536 ; RV32I-NEXT: slli a5, a1, 8
537 ; RV32I-NEXT: add a1, a1, a5
538 ; RV32I-NEXT: slli a5, a1, 16
539 ; RV32I-NEXT: add a1, a1, a5
540 ; RV32I-NEXT: srli a1, a1, 24
541 ; RV32I-NEXT: srli a5, a0, 1
542 ; RV32I-NEXT: and a3, a5, a3
543 ; RV32I-NEXT: sub a0, a0, a3
544 ; RV32I-NEXT: and a3, a0, a2
545 ; RV32I-NEXT: srli a0, a0, 2
546 ; RV32I-NEXT: and a0, a0, a2
547 ; RV32I-NEXT: add a0, a3, a0
548 ; RV32I-NEXT: srli a2, a0, 4
549 ; RV32I-NEXT: add a0, a0, a2
550 ; RV32I-NEXT: and a0, a0, a4
551 ; RV32I-NEXT: slli a2, a0, 8
552 ; RV32I-NEXT: add a0, a0, a2
553 ; RV32I-NEXT: slli a2, a0, 16
554 ; RV32I-NEXT: add a0, a0, a2
555 ; RV32I-NEXT: srli a0, a0, 24
556 ; RV32I-NEXT: add a0, a0, a1
557 ; RV32I-NEXT: li a1, 0
560 ; RV32ZBB-LABEL: ctpop_i64:
562 ; RV32ZBB-NEXT: cpop a1, a1
563 ; RV32ZBB-NEXT: cpop a0, a0
564 ; RV32ZBB-NEXT: add a0, a0, a1
565 ; RV32ZBB-NEXT: li a1, 0
567 %1 = call i64 @llvm.ctpop.i64(i64 %a)
571 define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
572 ; RV32I-LABEL: ctpop_i64_ugt_two:
574 ; RV32I-NEXT: addi a2, a0, -1
575 ; RV32I-NEXT: and a2, a0, a2
576 ; RV32I-NEXT: seqz a0, a0
577 ; RV32I-NEXT: sub a0, a1, a0
578 ; RV32I-NEXT: and a0, a1, a0
579 ; RV32I-NEXT: or a0, a2, a0
580 ; RV32I-NEXT: seqz a0, a0
583 ; RV32ZBB-LABEL: ctpop_i64_ugt_two:
585 ; RV32ZBB-NEXT: cpop a1, a1
586 ; RV32ZBB-NEXT: cpop a0, a0
587 ; RV32ZBB-NEXT: add a0, a0, a1
588 ; RV32ZBB-NEXT: sltiu a0, a0, 2
590 %1 = call i64 @llvm.ctpop.i64(i64 %a)
591 %2 = icmp ult i64 %1, 2
595 define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
596 ; RV32I-LABEL: ctpop_i64_ugt_one:
598 ; RV32I-NEXT: addi a2, a0, -1
599 ; RV32I-NEXT: and a2, a0, a2
600 ; RV32I-NEXT: seqz a0, a0
601 ; RV32I-NEXT: sub a0, a1, a0
602 ; RV32I-NEXT: and a0, a1, a0
603 ; RV32I-NEXT: or a0, a2, a0
604 ; RV32I-NEXT: snez a0, a0
607 ; RV32ZBB-LABEL: ctpop_i64_ugt_one:
609 ; RV32ZBB-NEXT: cpop a1, a1
610 ; RV32ZBB-NEXT: cpop a0, a0
611 ; RV32ZBB-NEXT: add a0, a0, a1
612 ; RV32ZBB-NEXT: sltiu a0, a0, 2
613 ; RV32ZBB-NEXT: xori a0, a0, 1
615 %1 = call i64 @llvm.ctpop.i64(i64 %a)
616 %2 = icmp ugt i64 %1, 1
620 define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
621 ; RV32I-LABEL: ctpop_i64_eq_one:
623 ; RV32I-NEXT: beqz a1, .LBB17_2
624 ; RV32I-NEXT: # %bb.1:
625 ; RV32I-NEXT: seqz a0, a0
626 ; RV32I-NEXT: sub a0, a1, a0
627 ; RV32I-NEXT: xor a1, a1, a0
628 ; RV32I-NEXT: sltu a0, a0, a1
630 ; RV32I-NEXT: .LBB17_2:
631 ; RV32I-NEXT: addi a1, a0, -1
632 ; RV32I-NEXT: xor a0, a0, a1
633 ; RV32I-NEXT: sltu a0, a1, a0
636 ; RV32ZBB-LABEL: ctpop_i64_eq_one:
638 ; RV32ZBB-NEXT: cpop a1, a1
639 ; RV32ZBB-NEXT: cpop a0, a0
640 ; RV32ZBB-NEXT: add a0, a0, a1
641 ; RV32ZBB-NEXT: addi a0, a0, -1
642 ; RV32ZBB-NEXT: seqz a0, a0
644 %1 = call i64 @llvm.ctpop.i64(i64 %a)
645 %2 = icmp eq i64 %1, 1
649 define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
650 ; RV32I-LABEL: ctpop_i64_ne_one:
652 ; RV32I-NEXT: beqz a1, .LBB18_2
653 ; RV32I-NEXT: # %bb.1:
654 ; RV32I-NEXT: seqz a0, a0
655 ; RV32I-NEXT: sub a0, a1, a0
656 ; RV32I-NEXT: xor a1, a1, a0
657 ; RV32I-NEXT: sltu a0, a0, a1
658 ; RV32I-NEXT: xori a0, a0, 1
660 ; RV32I-NEXT: .LBB18_2:
661 ; RV32I-NEXT: addi a1, a0, -1
662 ; RV32I-NEXT: xor a0, a0, a1
663 ; RV32I-NEXT: sltu a0, a1, a0
664 ; RV32I-NEXT: xori a0, a0, 1
667 ; RV32ZBB-LABEL: ctpop_i64_ne_one:
669 ; RV32ZBB-NEXT: cpop a1, a1
670 ; RV32ZBB-NEXT: cpop a0, a0
671 ; RV32ZBB-NEXT: add a0, a0, a1
672 ; RV32ZBB-NEXT: addi a0, a0, -1
673 ; RV32ZBB-NEXT: snez a0, a0
675 %1 = call i64 @llvm.ctpop.i64(i64 %a)
676 %2 = icmp ne i64 %1, 1
680 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
682 define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
683 ; RV32I-LABEL: ctpop_v2i64:
685 ; RV32I-NEXT: lw a3, 4(a1)
686 ; RV32I-NEXT: lw a2, 8(a1)
687 ; RV32I-NEXT: lw a4, 12(a1)
688 ; RV32I-NEXT: lw a1, 0(a1)
689 ; RV32I-NEXT: srli a5, a3, 1
690 ; RV32I-NEXT: lui a6, 349525
691 ; RV32I-NEXT: addi a6, a6, 1365
692 ; RV32I-NEXT: and a5, a5, a6
693 ; RV32I-NEXT: sub a3, a3, a5
694 ; RV32I-NEXT: lui a5, 209715
695 ; RV32I-NEXT: addi a5, a5, 819
696 ; RV32I-NEXT: and a7, a3, a5
697 ; RV32I-NEXT: srli a3, a3, 2
698 ; RV32I-NEXT: and a3, a3, a5
699 ; RV32I-NEXT: add a3, a7, a3
700 ; RV32I-NEXT: srli a7, a3, 4
701 ; RV32I-NEXT: add a3, a3, a7
702 ; RV32I-NEXT: lui a7, 61681
703 ; RV32I-NEXT: addi a7, a7, -241
704 ; RV32I-NEXT: and a3, a3, a7
705 ; RV32I-NEXT: slli t0, a3, 8
706 ; RV32I-NEXT: add a3, a3, t0
707 ; RV32I-NEXT: slli t0, a3, 16
708 ; RV32I-NEXT: add a3, a3, t0
709 ; RV32I-NEXT: srli a3, a3, 24
710 ; RV32I-NEXT: srli t0, a1, 1
711 ; RV32I-NEXT: and t0, t0, a6
712 ; RV32I-NEXT: sub a1, a1, t0
713 ; RV32I-NEXT: and t0, a1, a5
714 ; RV32I-NEXT: srli a1, a1, 2
715 ; RV32I-NEXT: and a1, a1, a5
716 ; RV32I-NEXT: add a1, t0, a1
717 ; RV32I-NEXT: srli t0, a1, 4
718 ; RV32I-NEXT: add a1, a1, t0
719 ; RV32I-NEXT: and a1, a1, a7
720 ; RV32I-NEXT: slli t0, a1, 8
721 ; RV32I-NEXT: add a1, a1, t0
722 ; RV32I-NEXT: slli t0, a1, 16
723 ; RV32I-NEXT: add a1, a1, t0
724 ; RV32I-NEXT: srli a1, a1, 24
725 ; RV32I-NEXT: add a1, a1, a3
726 ; RV32I-NEXT: srli a3, a4, 1
727 ; RV32I-NEXT: and a3, a3, a6
728 ; RV32I-NEXT: sub a4, a4, a3
729 ; RV32I-NEXT: and a3, a4, a5
730 ; RV32I-NEXT: srli a4, a4, 2
731 ; RV32I-NEXT: and a4, a4, a5
732 ; RV32I-NEXT: add a3, a3, a4
733 ; RV32I-NEXT: srli a4, a3, 4
734 ; RV32I-NEXT: add a3, a3, a4
735 ; RV32I-NEXT: and a3, a3, a7
736 ; RV32I-NEXT: slli a4, a3, 8
737 ; RV32I-NEXT: add a3, a3, a4
738 ; RV32I-NEXT: slli a4, a3, 16
739 ; RV32I-NEXT: add a3, a3, a4
740 ; RV32I-NEXT: srli a3, a3, 24
741 ; RV32I-NEXT: srli a4, a2, 1
742 ; RV32I-NEXT: and a4, a4, a6
743 ; RV32I-NEXT: sub a2, a2, a4
744 ; RV32I-NEXT: and a4, a2, a5
745 ; RV32I-NEXT: srli a2, a2, 2
746 ; RV32I-NEXT: and a2, a2, a5
747 ; RV32I-NEXT: add a2, a4, a2
748 ; RV32I-NEXT: srli a4, a2, 4
749 ; RV32I-NEXT: add a2, a2, a4
750 ; RV32I-NEXT: and a2, a2, a7
751 ; RV32I-NEXT: slli a4, a2, 8
752 ; RV32I-NEXT: add a2, a2, a4
753 ; RV32I-NEXT: slli a4, a2, 16
754 ; RV32I-NEXT: add a2, a2, a4
755 ; RV32I-NEXT: srli a2, a2, 24
756 ; RV32I-NEXT: add a2, a2, a3
757 ; RV32I-NEXT: sw zero, 12(a0)
758 ; RV32I-NEXT: sw zero, 4(a0)
759 ; RV32I-NEXT: sw a2, 8(a0)
760 ; RV32I-NEXT: sw a1, 0(a0)
763 ; RV32ZBB-LABEL: ctpop_v2i64:
765 ; RV32ZBB-NEXT: lw a2, 4(a1)
766 ; RV32ZBB-NEXT: lw a3, 0(a1)
767 ; RV32ZBB-NEXT: lw a4, 8(a1)
768 ; RV32ZBB-NEXT: lw a1, 12(a1)
769 ; RV32ZBB-NEXT: cpop a2, a2
770 ; RV32ZBB-NEXT: cpop a3, a3
771 ; RV32ZBB-NEXT: add a2, a3, a2
772 ; RV32ZBB-NEXT: cpop a1, a1
773 ; RV32ZBB-NEXT: cpop a3, a4
774 ; RV32ZBB-NEXT: add a1, a3, a1
775 ; RV32ZBB-NEXT: sw zero, 12(a0)
776 ; RV32ZBB-NEXT: sw zero, 4(a0)
777 ; RV32ZBB-NEXT: sw a1, 8(a0)
778 ; RV32ZBB-NEXT: sw a2, 0(a0)
780 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
784 define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
785 ; RV32I-LABEL: ctpop_v2i64_ult_two:
787 ; RV32I-NEXT: lw a1, 0(a0)
788 ; RV32I-NEXT: lw a2, 12(a0)
789 ; RV32I-NEXT: lw a3, 8(a0)
790 ; RV32I-NEXT: lw a0, 4(a0)
791 ; RV32I-NEXT: addi a4, a1, -1
792 ; RV32I-NEXT: and a4, a1, a4
793 ; RV32I-NEXT: seqz a1, a1
794 ; RV32I-NEXT: sub a1, a0, a1
795 ; RV32I-NEXT: and a0, a0, a1
796 ; RV32I-NEXT: or a0, a4, a0
797 ; RV32I-NEXT: seqz a0, a0
798 ; RV32I-NEXT: addi a1, a3, -1
799 ; RV32I-NEXT: and a1, a3, a1
800 ; RV32I-NEXT: seqz a3, a3
801 ; RV32I-NEXT: sub a3, a2, a3
802 ; RV32I-NEXT: and a2, a2, a3
803 ; RV32I-NEXT: or a1, a1, a2
804 ; RV32I-NEXT: seqz a1, a1
807 ; RV32ZBB-LABEL: ctpop_v2i64_ult_two:
809 ; RV32ZBB-NEXT: lw a1, 12(a0)
810 ; RV32ZBB-NEXT: lw a2, 8(a0)
811 ; RV32ZBB-NEXT: lw a3, 0(a0)
812 ; RV32ZBB-NEXT: lw a0, 4(a0)
813 ; RV32ZBB-NEXT: cpop a1, a1
814 ; RV32ZBB-NEXT: cpop a2, a2
815 ; RV32ZBB-NEXT: add a1, a2, a1
816 ; RV32ZBB-NEXT: cpop a0, a0
817 ; RV32ZBB-NEXT: cpop a2, a3
818 ; RV32ZBB-NEXT: add a0, a2, a0
819 ; RV32ZBB-NEXT: sltiu a0, a0, 2
820 ; RV32ZBB-NEXT: sltiu a1, a1, 2
822 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
823 %2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
827 define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
828 ; RV32I-LABEL: ctpop_v2i64_ugt_one:
830 ; RV32I-NEXT: lw a1, 0(a0)
831 ; RV32I-NEXT: lw a2, 12(a0)
832 ; RV32I-NEXT: lw a3, 8(a0)
833 ; RV32I-NEXT: lw a0, 4(a0)
834 ; RV32I-NEXT: addi a4, a1, -1
835 ; RV32I-NEXT: and a4, a1, a4
836 ; RV32I-NEXT: seqz a1, a1
837 ; RV32I-NEXT: sub a1, a0, a1
838 ; RV32I-NEXT: and a0, a0, a1
839 ; RV32I-NEXT: or a0, a4, a0
840 ; RV32I-NEXT: snez a0, a0
841 ; RV32I-NEXT: addi a1, a3, -1
842 ; RV32I-NEXT: and a1, a3, a1
843 ; RV32I-NEXT: seqz a3, a3
844 ; RV32I-NEXT: sub a3, a2, a3
845 ; RV32I-NEXT: and a2, a2, a3
846 ; RV32I-NEXT: or a1, a1, a2
847 ; RV32I-NEXT: snez a1, a1
850 ; RV32ZBB-LABEL: ctpop_v2i64_ugt_one:
852 ; RV32ZBB-NEXT: lw a1, 12(a0)
853 ; RV32ZBB-NEXT: lw a2, 8(a0)
854 ; RV32ZBB-NEXT: lw a3, 0(a0)
855 ; RV32ZBB-NEXT: lw a0, 4(a0)
856 ; RV32ZBB-NEXT: cpop a1, a1
857 ; RV32ZBB-NEXT: cpop a2, a2
858 ; RV32ZBB-NEXT: add a1, a2, a1
859 ; RV32ZBB-NEXT: cpop a0, a0
860 ; RV32ZBB-NEXT: cpop a2, a3
861 ; RV32ZBB-NEXT: add a0, a2, a0
862 ; RV32ZBB-NEXT: sltiu a0, a0, 2
863 ; RV32ZBB-NEXT: xori a0, a0, 1
864 ; RV32ZBB-NEXT: sltiu a1, a1, 2
865 ; RV32ZBB-NEXT: xori a1, a1, 1
867 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
868 %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
872 define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
873 ; RV32I-LABEL: ctpop_v2i64_eq_one:
875 ; RV32I-NEXT: mv a1, a0
876 ; RV32I-NEXT: lw a2, 12(a0)
877 ; RV32I-NEXT: lw a0, 4(a0)
878 ; RV32I-NEXT: lw a3, 0(a1)
879 ; RV32I-NEXT: beqz a0, .LBB22_3
880 ; RV32I-NEXT: # %bb.1:
881 ; RV32I-NEXT: seqz a3, a3
882 ; RV32I-NEXT: sub a3, a0, a3
883 ; RV32I-NEXT: xor a0, a0, a3
884 ; RV32I-NEXT: sltu a0, a3, a0
885 ; RV32I-NEXT: lw a1, 8(a1)
886 ; RV32I-NEXT: bnez a2, .LBB22_4
887 ; RV32I-NEXT: .LBB22_2:
888 ; RV32I-NEXT: addi a2, a1, -1
889 ; RV32I-NEXT: xor a1, a1, a2
890 ; RV32I-NEXT: sltu a1, a2, a1
892 ; RV32I-NEXT: .LBB22_3:
893 ; RV32I-NEXT: addi a0, a3, -1
894 ; RV32I-NEXT: xor a3, a3, a0
895 ; RV32I-NEXT: sltu a0, a0, a3
896 ; RV32I-NEXT: lw a1, 8(a1)
897 ; RV32I-NEXT: beqz a2, .LBB22_2
898 ; RV32I-NEXT: .LBB22_4:
899 ; RV32I-NEXT: seqz a1, a1
900 ; RV32I-NEXT: sub a1, a2, a1
901 ; RV32I-NEXT: xor a2, a2, a1
902 ; RV32I-NEXT: sltu a1, a1, a2
905 ; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
907 ; RV32ZBB-NEXT: lw a1, 12(a0)
908 ; RV32ZBB-NEXT: lw a2, 8(a0)
909 ; RV32ZBB-NEXT: lw a3, 0(a0)
910 ; RV32ZBB-NEXT: lw a0, 4(a0)
911 ; RV32ZBB-NEXT: cpop a1, a1
912 ; RV32ZBB-NEXT: cpop a2, a2
913 ; RV32ZBB-NEXT: add a1, a2, a1
914 ; RV32ZBB-NEXT: cpop a0, a0
915 ; RV32ZBB-NEXT: cpop a2, a3
916 ; RV32ZBB-NEXT: add a0, a2, a0
917 ; RV32ZBB-NEXT: addi a0, a0, -1
918 ; RV32ZBB-NEXT: seqz a0, a0
919 ; RV32ZBB-NEXT: addi a1, a1, -1
920 ; RV32ZBB-NEXT: seqz a1, a1
922 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
923 %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
927 define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
928 ; RV32I-LABEL: ctpop_v2i64_ne_one:
930 ; RV32I-NEXT: lw a1, 12(a0)
931 ; RV32I-NEXT: lw a2, 4(a0)
932 ; RV32I-NEXT: lw a3, 0(a0)
933 ; RV32I-NEXT: beqz a2, .LBB23_2
934 ; RV32I-NEXT: # %bb.1:
935 ; RV32I-NEXT: seqz a3, a3
936 ; RV32I-NEXT: sub a3, a2, a3
937 ; RV32I-NEXT: xor a2, a2, a3
938 ; RV32I-NEXT: sltu a2, a3, a2
939 ; RV32I-NEXT: j .LBB23_3
940 ; RV32I-NEXT: .LBB23_2:
941 ; RV32I-NEXT: addi a2, a3, -1
942 ; RV32I-NEXT: xor a3, a3, a2
943 ; RV32I-NEXT: sltu a2, a2, a3
944 ; RV32I-NEXT: .LBB23_3:
945 ; RV32I-NEXT: lw a3, 8(a0)
946 ; RV32I-NEXT: xori a0, a2, 1
947 ; RV32I-NEXT: beqz a1, .LBB23_5
948 ; RV32I-NEXT: # %bb.4:
949 ; RV32I-NEXT: seqz a2, a3
950 ; RV32I-NEXT: sub a2, a1, a2
951 ; RV32I-NEXT: xor a1, a1, a2
952 ; RV32I-NEXT: sltu a1, a2, a1
953 ; RV32I-NEXT: xori a1, a1, 1
955 ; RV32I-NEXT: .LBB23_5:
956 ; RV32I-NEXT: addi a1, a3, -1
957 ; RV32I-NEXT: xor a3, a3, a1
958 ; RV32I-NEXT: sltu a1, a1, a3
959 ; RV32I-NEXT: xori a1, a1, 1
962 ; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
964 ; RV32ZBB-NEXT: lw a1, 12(a0)
965 ; RV32ZBB-NEXT: lw a2, 8(a0)
966 ; RV32ZBB-NEXT: lw a3, 0(a0)
967 ; RV32ZBB-NEXT: lw a0, 4(a0)
968 ; RV32ZBB-NEXT: cpop a1, a1
969 ; RV32ZBB-NEXT: cpop a2, a2
970 ; RV32ZBB-NEXT: add a1, a2, a1
971 ; RV32ZBB-NEXT: cpop a0, a0
972 ; RV32ZBB-NEXT: cpop a2, a3
973 ; RV32ZBB-NEXT: add a0, a2, a0
974 ; RV32ZBB-NEXT: addi a0, a0, -1
975 ; RV32ZBB-NEXT: snez a0, a0
976 ; RV32ZBB-NEXT: addi a1, a1, -1
977 ; RV32ZBB-NEXT: snez a1, a1
979 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
980 %2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
984 define i32 @sextb_i32(i32 %a) nounwind {
985 ; RV32I-LABEL: sextb_i32:
987 ; RV32I-NEXT: slli a0, a0, 24
988 ; RV32I-NEXT: srai a0, a0, 24
991 ; RV32ZBB-LABEL: sextb_i32:
993 ; RV32ZBB-NEXT: sext.b a0, a0
995 %shl = shl i32 %a, 24
996 %shr = ashr exact i32 %shl, 24
1000 define i64 @sextb_i64(i64 %a) nounwind {
1001 ; RV32I-LABEL: sextb_i64:
1003 ; RV32I-NEXT: slli a1, a0, 24
1004 ; RV32I-NEXT: srai a0, a1, 24
1005 ; RV32I-NEXT: srai a1, a1, 31
1008 ; RV32ZBB-LABEL: sextb_i64:
1010 ; RV32ZBB-NEXT: sext.b a0, a0
1011 ; RV32ZBB-NEXT: srai a1, a0, 31
1013 %shl = shl i64 %a, 56
1014 %shr = ashr exact i64 %shl, 56
1018 define i32 @sexth_i32(i32 %a) nounwind {
1019 ; RV32I-LABEL: sexth_i32:
1021 ; RV32I-NEXT: slli a0, a0, 16
1022 ; RV32I-NEXT: srai a0, a0, 16
1025 ; RV32ZBB-LABEL: sexth_i32:
1027 ; RV32ZBB-NEXT: sext.h a0, a0
1029 %shl = shl i32 %a, 16
1030 %shr = ashr exact i32 %shl, 16
1034 define i64 @sexth_i64(i64 %a) nounwind {
1035 ; RV32I-LABEL: sexth_i64:
1037 ; RV32I-NEXT: slli a1, a0, 16
1038 ; RV32I-NEXT: srai a0, a1, 16
1039 ; RV32I-NEXT: srai a1, a1, 31
1042 ; RV32ZBB-LABEL: sexth_i64:
1044 ; RV32ZBB-NEXT: sext.h a0, a0
1045 ; RV32ZBB-NEXT: srai a1, a0, 31
1047 %shl = shl i64 %a, 48
1048 %shr = ashr exact i64 %shl, 48
1052 define i32 @min_i32(i32 %a, i32 %b) nounwind {
1053 ; RV32I-LABEL: min_i32:
1055 ; RV32I-NEXT: blt a0, a1, .LBB28_2
1056 ; RV32I-NEXT: # %bb.1:
1057 ; RV32I-NEXT: mv a0, a1
1058 ; RV32I-NEXT: .LBB28_2:
1061 ; RV32ZBB-LABEL: min_i32:
1063 ; RV32ZBB-NEXT: min a0, a0, a1
1065 %cmp = icmp slt i32 %a, %b
1066 %cond = select i1 %cmp, i32 %a, i32 %b
1070 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1071 ; don't have yet any matching bit manipulation instructions on RV32.
1072 ; This test is presented here in case future expansions of the Bitmanip
1073 ; extensions introduce instructions suitable for this pattern.
1075 define i64 @min_i64(i64 %a, i64 %b) nounwind {
1076 ; CHECK-LABEL: min_i64:
1078 ; CHECK-NEXT: beq a1, a3, .LBB29_2
1079 ; CHECK-NEXT: # %bb.1:
1080 ; CHECK-NEXT: slt a4, a1, a3
1081 ; CHECK-NEXT: beqz a4, .LBB29_3
1082 ; CHECK-NEXT: j .LBB29_4
1083 ; CHECK-NEXT: .LBB29_2:
1084 ; CHECK-NEXT: sltu a4, a0, a2
1085 ; CHECK-NEXT: bnez a4, .LBB29_4
1086 ; CHECK-NEXT: .LBB29_3:
1087 ; CHECK-NEXT: mv a0, a2
1088 ; CHECK-NEXT: mv a1, a3
1089 ; CHECK-NEXT: .LBB29_4:
1091 %cmp = icmp slt i64 %a, %b
1092 %cond = select i1 %cmp, i64 %a, i64 %b
1096 define i32 @max_i32(i32 %a, i32 %b) nounwind {
1097 ; RV32I-LABEL: max_i32:
1099 ; RV32I-NEXT: blt a1, a0, .LBB30_2
1100 ; RV32I-NEXT: # %bb.1:
1101 ; RV32I-NEXT: mv a0, a1
1102 ; RV32I-NEXT: .LBB30_2:
1105 ; RV32ZBB-LABEL: max_i32:
1107 ; RV32ZBB-NEXT: max a0, a0, a1
1109 %cmp = icmp sgt i32 %a, %b
1110 %cond = select i1 %cmp, i32 %a, i32 %b
1114 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1115 ; don't have yet any matching bit manipulation instructions on RV32.
1116 ; This test is presented here in case future expansions of the Bitmanip
1117 ; extensions introduce instructions suitable for this pattern.
1119 define i64 @max_i64(i64 %a, i64 %b) nounwind {
1120 ; CHECK-LABEL: max_i64:
1122 ; CHECK-NEXT: beq a1, a3, .LBB31_2
1123 ; CHECK-NEXT: # %bb.1:
1124 ; CHECK-NEXT: slt a4, a3, a1
1125 ; CHECK-NEXT: beqz a4, .LBB31_3
1126 ; CHECK-NEXT: j .LBB31_4
1127 ; CHECK-NEXT: .LBB31_2:
1128 ; CHECK-NEXT: sltu a4, a2, a0
1129 ; CHECK-NEXT: bnez a4, .LBB31_4
1130 ; CHECK-NEXT: .LBB31_3:
1131 ; CHECK-NEXT: mv a0, a2
1132 ; CHECK-NEXT: mv a1, a3
1133 ; CHECK-NEXT: .LBB31_4:
1135 %cmp = icmp sgt i64 %a, %b
1136 %cond = select i1 %cmp, i64 %a, i64 %b
1140 define i32 @minu_i32(i32 %a, i32 %b) nounwind {
1141 ; RV32I-LABEL: minu_i32:
1143 ; RV32I-NEXT: bltu a0, a1, .LBB32_2
1144 ; RV32I-NEXT: # %bb.1:
1145 ; RV32I-NEXT: mv a0, a1
1146 ; RV32I-NEXT: .LBB32_2:
1149 ; RV32ZBB-LABEL: minu_i32:
1151 ; RV32ZBB-NEXT: minu a0, a0, a1
1153 %cmp = icmp ult i32 %a, %b
1154 %cond = select i1 %cmp, i32 %a, i32 %b
1158 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1159 ; don't have yet any matching bit manipulation instructions on RV32.
1160 ; This test is presented here in case future expansions of the Bitmanip
1161 ; extensions introduce instructions suitable for this pattern.
1163 define i64 @minu_i64(i64 %a, i64 %b) nounwind {
1164 ; CHECK-LABEL: minu_i64:
1166 ; CHECK-NEXT: beq a1, a3, .LBB33_2
1167 ; CHECK-NEXT: # %bb.1:
1168 ; CHECK-NEXT: sltu a4, a1, a3
1169 ; CHECK-NEXT: beqz a4, .LBB33_3
1170 ; CHECK-NEXT: j .LBB33_4
1171 ; CHECK-NEXT: .LBB33_2:
1172 ; CHECK-NEXT: sltu a4, a0, a2
1173 ; CHECK-NEXT: bnez a4, .LBB33_4
1174 ; CHECK-NEXT: .LBB33_3:
1175 ; CHECK-NEXT: mv a0, a2
1176 ; CHECK-NEXT: mv a1, a3
1177 ; CHECK-NEXT: .LBB33_4:
1179 %cmp = icmp ult i64 %a, %b
1180 %cond = select i1 %cmp, i64 %a, i64 %b
1184 define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
1185 ; RV32I-LABEL: maxu_i32:
1187 ; RV32I-NEXT: bltu a1, a0, .LBB34_2
1188 ; RV32I-NEXT: # %bb.1:
1189 ; RV32I-NEXT: mv a0, a1
1190 ; RV32I-NEXT: .LBB34_2:
1193 ; RV32ZBB-LABEL: maxu_i32:
1195 ; RV32ZBB-NEXT: maxu a0, a0, a1
1197 %cmp = icmp ugt i32 %a, %b
1198 %cond = select i1 %cmp, i32 %a, i32 %b
1202 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1203 ; don't have yet any matching bit manipulation instructions on RV32.
1204 ; This test is presented here in case future expansions of the Bitmanip
1205 ; extensions introduce instructions suitable for this pattern.
1207 define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
1208 ; CHECK-LABEL: maxu_i64:
1210 ; CHECK-NEXT: beq a1, a3, .LBB35_2
1211 ; CHECK-NEXT: # %bb.1:
1212 ; CHECK-NEXT: sltu a4, a3, a1
1213 ; CHECK-NEXT: beqz a4, .LBB35_3
1214 ; CHECK-NEXT: j .LBB35_4
1215 ; CHECK-NEXT: .LBB35_2:
1216 ; CHECK-NEXT: sltu a4, a2, a0
1217 ; CHECK-NEXT: bnez a4, .LBB35_4
1218 ; CHECK-NEXT: .LBB35_3:
1219 ; CHECK-NEXT: mv a0, a2
1220 ; CHECK-NEXT: mv a1, a3
1221 ; CHECK-NEXT: .LBB35_4:
1223 %cmp = icmp ugt i64 %a, %b
1224 %cond = select i1 %cmp, i64 %a, i64 %b
1228 declare i32 @llvm.abs.i32(i32, i1 immarg)
1230 define i32 @abs_i32(i32 %x) {
1231 ; RV32I-LABEL: abs_i32:
1233 ; RV32I-NEXT: srai a1, a0, 31
1234 ; RV32I-NEXT: xor a0, a0, a1
1235 ; RV32I-NEXT: sub a0, a0, a1
1238 ; RV32ZBB-LABEL: abs_i32:
1240 ; RV32ZBB-NEXT: neg a1, a0
1241 ; RV32ZBB-NEXT: max a0, a0, a1
1243 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
1247 declare i64 @llvm.abs.i64(i64, i1 immarg)
1249 define i64 @abs_i64(i64 %x) {
1250 ; CHECK-LABEL: abs_i64:
1252 ; CHECK-NEXT: bgez a1, .LBB37_2
1253 ; CHECK-NEXT: # %bb.1:
1254 ; CHECK-NEXT: snez a2, a0
1255 ; CHECK-NEXT: neg a0, a0
1256 ; CHECK-NEXT: neg a1, a1
1257 ; CHECK-NEXT: sub a1, a1, a2
1258 ; CHECK-NEXT: .LBB37_2:
1260 %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
1264 define i32 @zexth_i32(i32 %a) nounwind {
1265 ; RV32I-LABEL: zexth_i32:
1267 ; RV32I-NEXT: slli a0, a0, 16
1268 ; RV32I-NEXT: srli a0, a0, 16
1271 ; RV32ZBB-LABEL: zexth_i32:
1273 ; RV32ZBB-NEXT: zext.h a0, a0
1275 %and = and i32 %a, 65535
1279 define i64 @zexth_i64(i64 %a) nounwind {
1280 ; RV32I-LABEL: zexth_i64:
1282 ; RV32I-NEXT: slli a0, a0, 16
1283 ; RV32I-NEXT: srli a0, a0, 16
1284 ; RV32I-NEXT: li a1, 0
1287 ; RV32ZBB-LABEL: zexth_i64:
1289 ; RV32ZBB-NEXT: zext.h a0, a0
1290 ; RV32ZBB-NEXT: li a1, 0
1292 %and = and i64 %a, 65535
1296 declare i32 @llvm.bswap.i32(i32)
1298 define i32 @bswap_i32(i32 %a) nounwind {
1299 ; RV32I-LABEL: bswap_i32:
1301 ; RV32I-NEXT: srli a1, a0, 8
1302 ; RV32I-NEXT: lui a2, 16
1303 ; RV32I-NEXT: addi a2, a2, -256
1304 ; RV32I-NEXT: and a1, a1, a2
1305 ; RV32I-NEXT: srli a3, a0, 24
1306 ; RV32I-NEXT: or a1, a1, a3
1307 ; RV32I-NEXT: and a2, a0, a2
1308 ; RV32I-NEXT: slli a2, a2, 8
1309 ; RV32I-NEXT: slli a0, a0, 24
1310 ; RV32I-NEXT: or a0, a0, a2
1311 ; RV32I-NEXT: or a0, a0, a1
1314 ; RV32ZBB-LABEL: bswap_i32:
1316 ; RV32ZBB-NEXT: rev8 a0, a0
1318 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
1322 declare i64 @llvm.bswap.i64(i64)
1324 define i64 @bswap_i64(i64 %a) {
1325 ; RV32I-LABEL: bswap_i64:
1327 ; RV32I-NEXT: srli a2, a1, 8
1328 ; RV32I-NEXT: lui a3, 16
1329 ; RV32I-NEXT: addi a3, a3, -256
1330 ; RV32I-NEXT: and a2, a2, a3
1331 ; RV32I-NEXT: srli a4, a1, 24
1332 ; RV32I-NEXT: or a2, a2, a4
1333 ; RV32I-NEXT: and a4, a1, a3
1334 ; RV32I-NEXT: slli a4, a4, 8
1335 ; RV32I-NEXT: slli a1, a1, 24
1336 ; RV32I-NEXT: or a1, a1, a4
1337 ; RV32I-NEXT: or a2, a1, a2
1338 ; RV32I-NEXT: srli a1, a0, 8
1339 ; RV32I-NEXT: and a1, a1, a3
1340 ; RV32I-NEXT: srli a4, a0, 24
1341 ; RV32I-NEXT: or a1, a1, a4
1342 ; RV32I-NEXT: and a3, a0, a3
1343 ; RV32I-NEXT: slli a3, a3, 8
1344 ; RV32I-NEXT: slli a0, a0, 24
1345 ; RV32I-NEXT: or a0, a0, a3
1346 ; RV32I-NEXT: or a1, a0, a1
1347 ; RV32I-NEXT: mv a0, a2
1350 ; RV32ZBB-LABEL: bswap_i64:
1352 ; RV32ZBB-NEXT: rev8 a2, a1
1353 ; RV32ZBB-NEXT: rev8 a1, a0
1354 ; RV32ZBB-NEXT: mv a0, a2
1356 %1 = call i64 @llvm.bswap.i64(i64 %a)
1360 define i16 @orc_b_i16(i16 %a) {
1361 ; RV32I-LABEL: orc_b_i16:
1363 ; RV32I-NEXT: andi a0, a0, 257
1364 ; RV32I-NEXT: slli a1, a0, 8
1365 ; RV32I-NEXT: sub a0, a1, a0
1368 ; RV32ZBB-LABEL: orc_b_i16:
1370 ; RV32ZBB-NEXT: andi a0, a0, 257
1371 ; RV32ZBB-NEXT: orc.b a0, a0
1373 %1 = and i16 %a, 257
1374 %2 = mul nuw i16 %1, 255
1378 define i32 @orc_b_i32(i32 %a) {
1379 ; RV32I-LABEL: orc_b_i32:
1381 ; RV32I-NEXT: lui a1, 4112
1382 ; RV32I-NEXT: addi a1, a1, 257
1383 ; RV32I-NEXT: and a0, a0, a1
1384 ; RV32I-NEXT: slli a1, a0, 8
1385 ; RV32I-NEXT: sub a0, a1, a0
1388 ; RV32ZBB-LABEL: orc_b_i32:
1390 ; RV32ZBB-NEXT: lui a1, 4112
1391 ; RV32ZBB-NEXT: addi a1, a1, 257
1392 ; RV32ZBB-NEXT: and a0, a0, a1
1393 ; RV32ZBB-NEXT: orc.b a0, a0
1395 %1 = and i32 %a, 16843009
1396 %2 = mul nuw i32 %1, 255
1400 define i64 @orc_b_i64(i64 %a) {
1401 ; CHECK-LABEL: orc_b_i64:
1403 ; CHECK-NEXT: lui a2, 4112
1404 ; CHECK-NEXT: addi a2, a2, 257
1405 ; CHECK-NEXT: and a1, a1, a2
1406 ; CHECK-NEXT: and a0, a0, a2
1407 ; CHECK-NEXT: slli a2, a0, 8
1408 ; CHECK-NEXT: sltu a3, a2, a0
1409 ; CHECK-NEXT: srli a4, a0, 24
1410 ; CHECK-NEXT: slli a5, a1, 8
1411 ; CHECK-NEXT: or a4, a5, a4
1412 ; CHECK-NEXT: sub a1, a4, a1
1413 ; CHECK-NEXT: sub a1, a1, a3
1414 ; CHECK-NEXT: sub a0, a2, a0
1416 %1 = and i64 %a, 72340172838076673
1417 %2 = mul nuw i64 %1, 255