1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32I
4 ; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define i32 @ctlz_i32(i32 %a) nounwind {
10 ; RV32I-LABEL: ctlz_i32:
12 ; RV32I-NEXT: beqz a0, .LBB0_2
13 ; RV32I-NEXT: # %bb.1: # %cond.false
14 ; RV32I-NEXT: addi sp, sp, -16
15 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
16 ; RV32I-NEXT: srli a1, a0, 1
17 ; RV32I-NEXT: or a0, a0, a1
18 ; RV32I-NEXT: srli a1, a0, 2
19 ; RV32I-NEXT: or a0, a0, a1
20 ; RV32I-NEXT: srli a1, a0, 4
21 ; RV32I-NEXT: or a0, a0, a1
22 ; RV32I-NEXT: srli a1, a0, 8
23 ; RV32I-NEXT: or a0, a0, a1
24 ; RV32I-NEXT: srli a1, a0, 16
25 ; RV32I-NEXT: or a0, a0, a1
26 ; RV32I-NEXT: not a0, a0
27 ; RV32I-NEXT: srli a1, a0, 1
28 ; RV32I-NEXT: lui a2, 349525
29 ; RV32I-NEXT: addi a2, a2, 1365
30 ; RV32I-NEXT: and a1, a1, a2
31 ; RV32I-NEXT: sub a0, a0, a1
32 ; RV32I-NEXT: lui a1, 209715
33 ; RV32I-NEXT: addi a1, a1, 819
34 ; RV32I-NEXT: and a2, a0, a1
35 ; RV32I-NEXT: srli a0, a0, 2
36 ; RV32I-NEXT: and a0, a0, a1
37 ; RV32I-NEXT: add a0, a2, a0
38 ; RV32I-NEXT: srli a1, a0, 4
39 ; RV32I-NEXT: add a0, a0, a1
40 ; RV32I-NEXT: lui a1, 61681
41 ; RV32I-NEXT: addi a1, a1, -241
42 ; RV32I-NEXT: and a0, a0, a1
43 ; RV32I-NEXT: lui a1, 4112
44 ; RV32I-NEXT: addi a1, a1, 257
45 ; RV32I-NEXT: call __mulsi3@plt
46 ; RV32I-NEXT: srli a0, a0, 24
47 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
48 ; RV32I-NEXT: addi sp, sp, 16
50 ; RV32I-NEXT: .LBB0_2:
51 ; RV32I-NEXT: li a0, 32
54 ; RV32ZBB-LABEL: ctlz_i32:
56 ; RV32ZBB-NEXT: clz a0, a0
58 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
62 declare i64 @llvm.ctlz.i64(i64, i1)
64 define i64 @ctlz_i64(i64 %a) nounwind {
65 ; RV32I-LABEL: ctlz_i64:
67 ; RV32I-NEXT: addi sp, sp, -32
68 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
69 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
70 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
71 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
72 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
73 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
74 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
75 ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
76 ; RV32I-NEXT: mv s0, a1
77 ; RV32I-NEXT: mv s2, a0
78 ; RV32I-NEXT: srli a0, a1, 1
79 ; RV32I-NEXT: or a0, a1, a0
80 ; RV32I-NEXT: srli a1, a0, 2
81 ; RV32I-NEXT: or a0, a0, a1
82 ; RV32I-NEXT: srli a1, a0, 4
83 ; RV32I-NEXT: or a0, a0, a1
84 ; RV32I-NEXT: srli a1, a0, 8
85 ; RV32I-NEXT: or a0, a0, a1
86 ; RV32I-NEXT: srli a1, a0, 16
87 ; RV32I-NEXT: or a0, a0, a1
88 ; RV32I-NEXT: not a0, a0
89 ; RV32I-NEXT: srli a1, a0, 1
90 ; RV32I-NEXT: lui a2, 349525
91 ; RV32I-NEXT: addi s4, a2, 1365
92 ; RV32I-NEXT: and a1, a1, s4
93 ; RV32I-NEXT: sub a0, a0, a1
94 ; RV32I-NEXT: lui a1, 209715
95 ; RV32I-NEXT: addi s5, a1, 819
96 ; RV32I-NEXT: and a1, a0, s5
97 ; RV32I-NEXT: srli a0, a0, 2
98 ; RV32I-NEXT: and a0, a0, s5
99 ; RV32I-NEXT: add a0, a1, a0
100 ; RV32I-NEXT: srli a1, a0, 4
101 ; RV32I-NEXT: add a0, a0, a1
102 ; RV32I-NEXT: lui a1, 61681
103 ; RV32I-NEXT: addi s6, a1, -241
104 ; RV32I-NEXT: and a0, a0, s6
105 ; RV32I-NEXT: lui a1, 4112
106 ; RV32I-NEXT: addi s3, a1, 257
107 ; RV32I-NEXT: mv a1, s3
108 ; RV32I-NEXT: call __mulsi3@plt
109 ; RV32I-NEXT: mv s1, a0
110 ; RV32I-NEXT: srli a0, s2, 1
111 ; RV32I-NEXT: or a0, s2, a0
112 ; RV32I-NEXT: srli a1, a0, 2
113 ; RV32I-NEXT: or a0, a0, a1
114 ; RV32I-NEXT: srli a1, a0, 4
115 ; RV32I-NEXT: or a0, a0, a1
116 ; RV32I-NEXT: srli a1, a0, 8
117 ; RV32I-NEXT: or a0, a0, a1
118 ; RV32I-NEXT: srli a1, a0, 16
119 ; RV32I-NEXT: or a0, a0, a1
120 ; RV32I-NEXT: not a0, a0
121 ; RV32I-NEXT: srli a1, a0, 1
122 ; RV32I-NEXT: and a1, a1, s4
123 ; RV32I-NEXT: sub a0, a0, a1
124 ; RV32I-NEXT: and a1, a0, s5
125 ; RV32I-NEXT: srli a0, a0, 2
126 ; RV32I-NEXT: and a0, a0, s5
127 ; RV32I-NEXT: add a0, a1, a0
128 ; RV32I-NEXT: srli a1, a0, 4
129 ; RV32I-NEXT: add a0, a0, a1
130 ; RV32I-NEXT: and a0, a0, s6
131 ; RV32I-NEXT: mv a1, s3
132 ; RV32I-NEXT: call __mulsi3@plt
133 ; RV32I-NEXT: bnez s0, .LBB1_2
134 ; RV32I-NEXT: # %bb.1:
135 ; RV32I-NEXT: srli a0, a0, 24
136 ; RV32I-NEXT: addi a0, a0, 32
137 ; RV32I-NEXT: j .LBB1_3
138 ; RV32I-NEXT: .LBB1_2:
139 ; RV32I-NEXT: srli a0, s1, 24
140 ; RV32I-NEXT: .LBB1_3:
141 ; RV32I-NEXT: li a1, 0
142 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
143 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
144 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
145 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
146 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
147 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
148 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
149 ; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
150 ; RV32I-NEXT: addi sp, sp, 32
153 ; RV32ZBB-LABEL: ctlz_i64:
155 ; RV32ZBB-NEXT: bnez a1, .LBB1_2
156 ; RV32ZBB-NEXT: # %bb.1:
157 ; RV32ZBB-NEXT: clz a0, a0
158 ; RV32ZBB-NEXT: addi a0, a0, 32
159 ; RV32ZBB-NEXT: li a1, 0
161 ; RV32ZBB-NEXT: .LBB1_2:
162 ; RV32ZBB-NEXT: clz a0, a1
163 ; RV32ZBB-NEXT: li a1, 0
165 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
169 declare i32 @llvm.cttz.i32(i32, i1)
171 define i32 @cttz_i32(i32 %a) nounwind {
172 ; RV32I-LABEL: cttz_i32:
174 ; RV32I-NEXT: beqz a0, .LBB2_2
175 ; RV32I-NEXT: # %bb.1: # %cond.false
176 ; RV32I-NEXT: addi sp, sp, -16
177 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
178 ; RV32I-NEXT: neg a1, a0
179 ; RV32I-NEXT: and a0, a0, a1
180 ; RV32I-NEXT: lui a1, 30667
181 ; RV32I-NEXT: addi a1, a1, 1329
182 ; RV32I-NEXT: call __mulsi3@plt
183 ; RV32I-NEXT: srli a0, a0, 27
184 ; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
185 ; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
186 ; RV32I-NEXT: add a0, a1, a0
187 ; RV32I-NEXT: lbu a0, 0(a0)
188 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
189 ; RV32I-NEXT: addi sp, sp, 16
191 ; RV32I-NEXT: .LBB2_2:
192 ; RV32I-NEXT: li a0, 32
195 ; RV32ZBB-LABEL: cttz_i32:
197 ; RV32ZBB-NEXT: ctz a0, a0
199 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
203 declare i64 @llvm.cttz.i64(i64, i1)
205 define i64 @cttz_i64(i64 %a) nounwind {
206 ; RV32I-LABEL: cttz_i64:
208 ; RV32I-NEXT: addi sp, sp, -32
209 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
210 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
211 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
212 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
213 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
214 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
215 ; RV32I-NEXT: mv s2, a1
216 ; RV32I-NEXT: mv s0, a0
217 ; RV32I-NEXT: neg a0, a0
218 ; RV32I-NEXT: and a0, s0, a0
219 ; RV32I-NEXT: lui a1, 30667
220 ; RV32I-NEXT: addi s3, a1, 1329
221 ; RV32I-NEXT: mv a1, s3
222 ; RV32I-NEXT: call __mulsi3@plt
223 ; RV32I-NEXT: mv s1, a0
224 ; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
225 ; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
226 ; RV32I-NEXT: neg a0, s2
227 ; RV32I-NEXT: and a0, s2, a0
228 ; RV32I-NEXT: mv a1, s3
229 ; RV32I-NEXT: call __mulsi3@plt
230 ; RV32I-NEXT: bnez s2, .LBB3_3
231 ; RV32I-NEXT: # %bb.1:
232 ; RV32I-NEXT: li a0, 32
233 ; RV32I-NEXT: beqz s0, .LBB3_4
234 ; RV32I-NEXT: .LBB3_2:
235 ; RV32I-NEXT: srli s1, s1, 27
236 ; RV32I-NEXT: add s1, s4, s1
237 ; RV32I-NEXT: lbu a0, 0(s1)
238 ; RV32I-NEXT: j .LBB3_5
239 ; RV32I-NEXT: .LBB3_3:
240 ; RV32I-NEXT: srli a0, a0, 27
241 ; RV32I-NEXT: add a0, s4, a0
242 ; RV32I-NEXT: lbu a0, 0(a0)
243 ; RV32I-NEXT: bnez s0, .LBB3_2
244 ; RV32I-NEXT: .LBB3_4:
245 ; RV32I-NEXT: addi a0, a0, 32
246 ; RV32I-NEXT: .LBB3_5:
247 ; RV32I-NEXT: li a1, 0
248 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
249 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
250 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
251 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
252 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
253 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
254 ; RV32I-NEXT: addi sp, sp, 32
257 ; RV32ZBB-LABEL: cttz_i64:
259 ; RV32ZBB-NEXT: bnez a0, .LBB3_2
260 ; RV32ZBB-NEXT: # %bb.1:
261 ; RV32ZBB-NEXT: ctz a0, a1
262 ; RV32ZBB-NEXT: addi a0, a0, 32
263 ; RV32ZBB-NEXT: li a1, 0
265 ; RV32ZBB-NEXT: .LBB3_2:
266 ; RV32ZBB-NEXT: ctz a0, a0
267 ; RV32ZBB-NEXT: li a1, 0
269 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
273 declare i32 @llvm.ctpop.i32(i32)
275 define i32 @ctpop_i32(i32 %a) nounwind {
276 ; RV32I-LABEL: ctpop_i32:
278 ; RV32I-NEXT: addi sp, sp, -16
279 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
280 ; RV32I-NEXT: srli a1, a0, 1
281 ; RV32I-NEXT: lui a2, 349525
282 ; RV32I-NEXT: addi a2, a2, 1365
283 ; RV32I-NEXT: and a1, a1, a2
284 ; RV32I-NEXT: sub a0, a0, a1
285 ; RV32I-NEXT: lui a1, 209715
286 ; RV32I-NEXT: addi a1, a1, 819
287 ; RV32I-NEXT: and a2, a0, a1
288 ; RV32I-NEXT: srli a0, a0, 2
289 ; RV32I-NEXT: and a0, a0, a1
290 ; RV32I-NEXT: add a0, a2, a0
291 ; RV32I-NEXT: srli a1, a0, 4
292 ; RV32I-NEXT: add a0, a0, a1
293 ; RV32I-NEXT: lui a1, 61681
294 ; RV32I-NEXT: addi a1, a1, -241
295 ; RV32I-NEXT: and a0, a0, a1
296 ; RV32I-NEXT: lui a1, 4112
297 ; RV32I-NEXT: addi a1, a1, 257
298 ; RV32I-NEXT: call __mulsi3@plt
299 ; RV32I-NEXT: srli a0, a0, 24
300 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
301 ; RV32I-NEXT: addi sp, sp, 16
304 ; RV32ZBB-LABEL: ctpop_i32:
306 ; RV32ZBB-NEXT: cpop a0, a0
308 %1 = call i32 @llvm.ctpop.i32(i32 %a)
312 define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind {
313 ; RV32I-LABEL: ctpop_i32_ult_two:
315 ; RV32I-NEXT: addi a1, a0, -1
316 ; RV32I-NEXT: and a0, a0, a1
317 ; RV32I-NEXT: seqz a0, a0
320 ; RV32ZBB-LABEL: ctpop_i32_ult_two:
322 ; RV32ZBB-NEXT: cpop a0, a0
323 ; RV32ZBB-NEXT: sltiu a0, a0, 2
325 %1 = call i32 @llvm.ctpop.i32(i32 %a)
326 %2 = icmp ult i32 %1, 2
330 define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind {
331 ; RV32I-LABEL: ctpop_i32_ugt_one:
333 ; RV32I-NEXT: addi a1, a0, -1
334 ; RV32I-NEXT: and a0, a0, a1
335 ; RV32I-NEXT: snez a0, a0
338 ; RV32ZBB-LABEL: ctpop_i32_ugt_one:
340 ; RV32ZBB-NEXT: cpop a0, a0
341 ; RV32ZBB-NEXT: sltiu a0, a0, 2
342 ; RV32ZBB-NEXT: xori a0, a0, 1
344 %1 = call i32 @llvm.ctpop.i32(i32 %a)
345 %2 = icmp ugt i32 %1, 1
349 define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
350 ; RV32I-LABEL: ctpop_i32_eq_one:
352 ; RV32I-NEXT: addi a1, a0, -1
353 ; RV32I-NEXT: and a1, a0, a1
354 ; RV32I-NEXT: seqz a1, a1
355 ; RV32I-NEXT: snez a0, a0
356 ; RV32I-NEXT: and a0, a0, a1
359 ; RV32ZBB-LABEL: ctpop_i32_eq_one:
361 ; RV32ZBB-NEXT: cpop a0, a0
362 ; RV32ZBB-NEXT: addi a0, a0, -1
363 ; RV32ZBB-NEXT: seqz a0, a0
365 %1 = call i32 @llvm.ctpop.i32(i32 %a)
366 %2 = icmp eq i32 %1, 1
370 define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
371 ; RV32I-LABEL: ctpop_i32_ne_one:
373 ; RV32I-NEXT: addi a1, a0, -1
374 ; RV32I-NEXT: and a1, a0, a1
375 ; RV32I-NEXT: snez a1, a1
376 ; RV32I-NEXT: seqz a0, a0
377 ; RV32I-NEXT: or a0, a0, a1
380 ; RV32ZBB-LABEL: ctpop_i32_ne_one:
382 ; RV32ZBB-NEXT: cpop a0, a0
383 ; RV32ZBB-NEXT: addi a0, a0, -1
384 ; RV32ZBB-NEXT: snez a0, a0
386 %1 = call i32 @llvm.ctpop.i32(i32 %a)
387 %2 = icmp ne i32 %1, 1
391 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
393 define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
394 ; RV32I-LABEL: ctpop_v2i32:
396 ; RV32I-NEXT: addi sp, sp, -32
397 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
398 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
399 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
400 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
401 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
402 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
403 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
404 ; RV32I-NEXT: mv s0, a1
405 ; RV32I-NEXT: srli a1, a0, 1
406 ; RV32I-NEXT: lui a2, 349525
407 ; RV32I-NEXT: addi s3, a2, 1365
408 ; RV32I-NEXT: and a1, a1, s3
409 ; RV32I-NEXT: sub a0, a0, a1
410 ; RV32I-NEXT: lui a1, 209715
411 ; RV32I-NEXT: addi s4, a1, 819
412 ; RV32I-NEXT: and a1, a0, s4
413 ; RV32I-NEXT: srli a0, a0, 2
414 ; RV32I-NEXT: and a0, a0, s4
415 ; RV32I-NEXT: add a0, a1, a0
416 ; RV32I-NEXT: srli a1, a0, 4
417 ; RV32I-NEXT: add a0, a0, a1
418 ; RV32I-NEXT: lui a1, 61681
419 ; RV32I-NEXT: addi s5, a1, -241
420 ; RV32I-NEXT: and a0, a0, s5
421 ; RV32I-NEXT: lui a1, 4112
422 ; RV32I-NEXT: addi s1, a1, 257
423 ; RV32I-NEXT: mv a1, s1
424 ; RV32I-NEXT: call __mulsi3@plt
425 ; RV32I-NEXT: srli s2, a0, 24
426 ; RV32I-NEXT: srli a0, s0, 1
427 ; RV32I-NEXT: and a0, a0, s3
428 ; RV32I-NEXT: sub s0, s0, a0
429 ; RV32I-NEXT: and a0, s0, s4
430 ; RV32I-NEXT: srli s0, s0, 2
431 ; RV32I-NEXT: and a1, s0, s4
432 ; RV32I-NEXT: add a0, a0, a1
433 ; RV32I-NEXT: srli a1, a0, 4
434 ; RV32I-NEXT: add a0, a0, a1
435 ; RV32I-NEXT: and a0, a0, s5
436 ; RV32I-NEXT: mv a1, s1
437 ; RV32I-NEXT: call __mulsi3@plt
438 ; RV32I-NEXT: srli a1, a0, 24
439 ; RV32I-NEXT: mv a0, s2
440 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
441 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
442 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
443 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
444 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
445 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
446 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
447 ; RV32I-NEXT: addi sp, sp, 32
450 ; RV32ZBB-LABEL: ctpop_v2i32:
452 ; RV32ZBB-NEXT: cpop a0, a0
453 ; RV32ZBB-NEXT: cpop a1, a1
455 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
459 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
460 ; RV32I-LABEL: ctpop_v2i32_ult_two:
462 ; RV32I-NEXT: addi a2, a0, -1
463 ; RV32I-NEXT: and a0, a0, a2
464 ; RV32I-NEXT: seqz a0, a0
465 ; RV32I-NEXT: addi a2, a1, -1
466 ; RV32I-NEXT: and a1, a1, a2
467 ; RV32I-NEXT: seqz a1, a1
470 ; RV32ZBB-LABEL: ctpop_v2i32_ult_two:
472 ; RV32ZBB-NEXT: cpop a1, a1
473 ; RV32ZBB-NEXT: cpop a0, a0
474 ; RV32ZBB-NEXT: sltiu a0, a0, 2
475 ; RV32ZBB-NEXT: sltiu a1, a1, 2
477 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
478 %2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
482 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
483 ; RV32I-LABEL: ctpop_v2i32_ugt_one:
485 ; RV32I-NEXT: addi a2, a0, -1
486 ; RV32I-NEXT: and a0, a0, a2
487 ; RV32I-NEXT: snez a0, a0
488 ; RV32I-NEXT: addi a2, a1, -1
489 ; RV32I-NEXT: and a1, a1, a2
490 ; RV32I-NEXT: snez a1, a1
493 ; RV32ZBB-LABEL: ctpop_v2i32_ugt_one:
495 ; RV32ZBB-NEXT: cpop a1, a1
496 ; RV32ZBB-NEXT: cpop a0, a0
497 ; RV32ZBB-NEXT: sltiu a0, a0, 2
498 ; RV32ZBB-NEXT: xori a0, a0, 1
499 ; RV32ZBB-NEXT: sltiu a1, a1, 2
500 ; RV32ZBB-NEXT: xori a1, a1, 1
502 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
503 %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
507 define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
508 ; RV32I-LABEL: ctpop_v2i32_eq_one:
510 ; RV32I-NEXT: addi a2, a0, -1
511 ; RV32I-NEXT: and a2, a0, a2
512 ; RV32I-NEXT: seqz a2, a2
513 ; RV32I-NEXT: snez a0, a0
514 ; RV32I-NEXT: and a0, a0, a2
515 ; RV32I-NEXT: addi a2, a1, -1
516 ; RV32I-NEXT: and a2, a1, a2
517 ; RV32I-NEXT: seqz a2, a2
518 ; RV32I-NEXT: snez a1, a1
519 ; RV32I-NEXT: and a1, a1, a2
522 ; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
524 ; RV32ZBB-NEXT: cpop a1, a1
525 ; RV32ZBB-NEXT: cpop a0, a0
526 ; RV32ZBB-NEXT: addi a0, a0, -1
527 ; RV32ZBB-NEXT: seqz a0, a0
528 ; RV32ZBB-NEXT: addi a1, a1, -1
529 ; RV32ZBB-NEXT: seqz a1, a1
531 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
532 %2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
536 define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
537 ; RV32I-LABEL: ctpop_v2i32_ne_one:
539 ; RV32I-NEXT: addi a2, a0, -1
540 ; RV32I-NEXT: and a2, a0, a2
541 ; RV32I-NEXT: snez a2, a2
542 ; RV32I-NEXT: seqz a0, a0
543 ; RV32I-NEXT: or a0, a0, a2
544 ; RV32I-NEXT: addi a2, a1, -1
545 ; RV32I-NEXT: and a2, a1, a2
546 ; RV32I-NEXT: snez a2, a2
547 ; RV32I-NEXT: seqz a1, a1
548 ; RV32I-NEXT: or a1, a1, a2
551 ; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
553 ; RV32ZBB-NEXT: cpop a1, a1
554 ; RV32ZBB-NEXT: cpop a0, a0
555 ; RV32ZBB-NEXT: addi a0, a0, -1
556 ; RV32ZBB-NEXT: snez a0, a0
557 ; RV32ZBB-NEXT: addi a1, a1, -1
558 ; RV32ZBB-NEXT: snez a1, a1
560 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
561 %2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
565 declare i64 @llvm.ctpop.i64(i64)
567 define i64 @ctpop_i64(i64 %a) nounwind {
568 ; RV32I-LABEL: ctpop_i64:
570 ; RV32I-NEXT: addi sp, sp, -32
571 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
572 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
573 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
574 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
575 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
576 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
577 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
578 ; RV32I-NEXT: mv s0, a0
579 ; RV32I-NEXT: srli a0, a1, 1
580 ; RV32I-NEXT: lui a2, 349525
581 ; RV32I-NEXT: addi s2, a2, 1365
582 ; RV32I-NEXT: and a0, a0, s2
583 ; RV32I-NEXT: sub a1, a1, a0
584 ; RV32I-NEXT: lui a0, 209715
585 ; RV32I-NEXT: addi s3, a0, 819
586 ; RV32I-NEXT: and a0, a1, s3
587 ; RV32I-NEXT: srli a1, a1, 2
588 ; RV32I-NEXT: and a1, a1, s3
589 ; RV32I-NEXT: add a0, a0, a1
590 ; RV32I-NEXT: srli a1, a0, 4
591 ; RV32I-NEXT: add a0, a0, a1
592 ; RV32I-NEXT: lui a1, 61681
593 ; RV32I-NEXT: addi s4, a1, -241
594 ; RV32I-NEXT: and a0, a0, s4
595 ; RV32I-NEXT: lui a1, 4112
596 ; RV32I-NEXT: addi s1, a1, 257
597 ; RV32I-NEXT: mv a1, s1
598 ; RV32I-NEXT: call __mulsi3@plt
599 ; RV32I-NEXT: srli s5, a0, 24
600 ; RV32I-NEXT: srli a0, s0, 1
601 ; RV32I-NEXT: and a0, a0, s2
602 ; RV32I-NEXT: sub s0, s0, a0
603 ; RV32I-NEXT: and a0, s0, s3
604 ; RV32I-NEXT: srli s0, s0, 2
605 ; RV32I-NEXT: and a1, s0, s3
606 ; RV32I-NEXT: add a0, a0, a1
607 ; RV32I-NEXT: srli a1, a0, 4
608 ; RV32I-NEXT: add a0, a0, a1
609 ; RV32I-NEXT: and a0, a0, s4
610 ; RV32I-NEXT: mv a1, s1
611 ; RV32I-NEXT: call __mulsi3@plt
612 ; RV32I-NEXT: srli a0, a0, 24
613 ; RV32I-NEXT: add a0, a0, s5
614 ; RV32I-NEXT: li a1, 0
615 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
616 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
617 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
618 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
619 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
620 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
621 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
622 ; RV32I-NEXT: addi sp, sp, 32
625 ; RV32ZBB-LABEL: ctpop_i64:
627 ; RV32ZBB-NEXT: cpop a1, a1
628 ; RV32ZBB-NEXT: cpop a0, a0
629 ; RV32ZBB-NEXT: add a0, a0, a1
630 ; RV32ZBB-NEXT: li a1, 0
632 %1 = call i64 @llvm.ctpop.i64(i64 %a)
636 define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
637 ; RV32I-LABEL: ctpop_i64_ugt_two:
639 ; RV32I-NEXT: addi a2, a0, -1
640 ; RV32I-NEXT: and a2, a0, a2
641 ; RV32I-NEXT: seqz a0, a0
642 ; RV32I-NEXT: sub a0, a1, a0
643 ; RV32I-NEXT: and a0, a1, a0
644 ; RV32I-NEXT: or a0, a2, a0
645 ; RV32I-NEXT: seqz a0, a0
648 ; RV32ZBB-LABEL: ctpop_i64_ugt_two:
650 ; RV32ZBB-NEXT: cpop a1, a1
651 ; RV32ZBB-NEXT: cpop a0, a0
652 ; RV32ZBB-NEXT: add a0, a0, a1
653 ; RV32ZBB-NEXT: sltiu a0, a0, 2
655 %1 = call i64 @llvm.ctpop.i64(i64 %a)
656 %2 = icmp ult i64 %1, 2
660 define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
661 ; RV32I-LABEL: ctpop_i64_ugt_one:
663 ; RV32I-NEXT: addi a2, a0, -1
664 ; RV32I-NEXT: and a2, a0, a2
665 ; RV32I-NEXT: seqz a0, a0
666 ; RV32I-NEXT: sub a0, a1, a0
667 ; RV32I-NEXT: and a0, a1, a0
668 ; RV32I-NEXT: or a0, a2, a0
669 ; RV32I-NEXT: snez a0, a0
672 ; RV32ZBB-LABEL: ctpop_i64_ugt_one:
674 ; RV32ZBB-NEXT: cpop a1, a1
675 ; RV32ZBB-NEXT: cpop a0, a0
676 ; RV32ZBB-NEXT: add a0, a0, a1
677 ; RV32ZBB-NEXT: sltiu a0, a0, 2
678 ; RV32ZBB-NEXT: xori a0, a0, 1
680 %1 = call i64 @llvm.ctpop.i64(i64 %a)
681 %2 = icmp ugt i64 %1, 1
685 define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
686 ; RV32I-LABEL: ctpop_i64_eq_one:
688 ; RV32I-NEXT: addi a2, a0, -1
689 ; RV32I-NEXT: and a2, a0, a2
690 ; RV32I-NEXT: seqz a3, a0
691 ; RV32I-NEXT: sub a3, a1, a3
692 ; RV32I-NEXT: and a3, a1, a3
693 ; RV32I-NEXT: or a2, a2, a3
694 ; RV32I-NEXT: seqz a2, a2
695 ; RV32I-NEXT: or a0, a0, a1
696 ; RV32I-NEXT: snez a0, a0
697 ; RV32I-NEXT: and a0, a0, a2
700 ; RV32ZBB-LABEL: ctpop_i64_eq_one:
702 ; RV32ZBB-NEXT: cpop a1, a1
703 ; RV32ZBB-NEXT: cpop a0, a0
704 ; RV32ZBB-NEXT: add a0, a0, a1
705 ; RV32ZBB-NEXT: addi a0, a0, -1
706 ; RV32ZBB-NEXT: seqz a0, a0
708 %1 = call i64 @llvm.ctpop.i64(i64 %a)
709 %2 = icmp eq i64 %1, 1
713 define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
714 ; RV32I-LABEL: ctpop_i64_ne_one:
716 ; RV32I-NEXT: addi a2, a0, -1
717 ; RV32I-NEXT: and a2, a0, a2
718 ; RV32I-NEXT: seqz a3, a0
719 ; RV32I-NEXT: sub a3, a1, a3
720 ; RV32I-NEXT: and a3, a1, a3
721 ; RV32I-NEXT: or a2, a2, a3
722 ; RV32I-NEXT: snez a2, a2
723 ; RV32I-NEXT: or a0, a0, a1
724 ; RV32I-NEXT: seqz a0, a0
725 ; RV32I-NEXT: or a0, a0, a2
728 ; RV32ZBB-LABEL: ctpop_i64_ne_one:
730 ; RV32ZBB-NEXT: cpop a1, a1
731 ; RV32ZBB-NEXT: cpop a0, a0
732 ; RV32ZBB-NEXT: add a0, a0, a1
733 ; RV32ZBB-NEXT: addi a0, a0, -1
734 ; RV32ZBB-NEXT: snez a0, a0
736 %1 = call i64 @llvm.ctpop.i64(i64 %a)
737 %2 = icmp ne i64 %1, 1
741 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
743 define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
744 ; RV32I-LABEL: ctpop_v2i64:
746 ; RV32I-NEXT: addi sp, sp, -48
747 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
748 ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
749 ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
750 ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
751 ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
752 ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
753 ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
754 ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
755 ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
756 ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
757 ; RV32I-NEXT: mv s0, a0
758 ; RV32I-NEXT: lw a0, 4(a1)
759 ; RV32I-NEXT: lw s2, 8(a1)
760 ; RV32I-NEXT: lw s5, 12(a1)
761 ; RV32I-NEXT: lw s6, 0(a1)
762 ; RV32I-NEXT: srli a1, a0, 1
763 ; RV32I-NEXT: lui a2, 349525
764 ; RV32I-NEXT: addi s3, a2, 1365
765 ; RV32I-NEXT: and a1, a1, s3
766 ; RV32I-NEXT: sub a0, a0, a1
767 ; RV32I-NEXT: lui a1, 209715
768 ; RV32I-NEXT: addi s4, a1, 819
769 ; RV32I-NEXT: and a1, a0, s4
770 ; RV32I-NEXT: srli a0, a0, 2
771 ; RV32I-NEXT: and a0, a0, s4
772 ; RV32I-NEXT: add a0, a1, a0
773 ; RV32I-NEXT: srli a1, a0, 4
774 ; RV32I-NEXT: add a0, a0, a1
775 ; RV32I-NEXT: lui a1, 61681
776 ; RV32I-NEXT: addi s7, a1, -241
777 ; RV32I-NEXT: and a0, a0, s7
778 ; RV32I-NEXT: lui a1, 4112
779 ; RV32I-NEXT: addi s1, a1, 257
780 ; RV32I-NEXT: mv a1, s1
781 ; RV32I-NEXT: call __mulsi3@plt
782 ; RV32I-NEXT: srli s8, a0, 24
783 ; RV32I-NEXT: srli a0, s6, 1
784 ; RV32I-NEXT: and a0, a0, s3
785 ; RV32I-NEXT: sub a0, s6, a0
786 ; RV32I-NEXT: and a1, a0, s4
787 ; RV32I-NEXT: srli a0, a0, 2
788 ; RV32I-NEXT: and a0, a0, s4
789 ; RV32I-NEXT: add a0, a1, a0
790 ; RV32I-NEXT: srli a1, a0, 4
791 ; RV32I-NEXT: add a0, a0, a1
792 ; RV32I-NEXT: and a0, a0, s7
793 ; RV32I-NEXT: mv a1, s1
794 ; RV32I-NEXT: call __mulsi3@plt
795 ; RV32I-NEXT: srli a0, a0, 24
796 ; RV32I-NEXT: add s8, a0, s8
797 ; RV32I-NEXT: srli a0, s5, 1
798 ; RV32I-NEXT: and a0, a0, s3
799 ; RV32I-NEXT: sub a0, s5, a0
800 ; RV32I-NEXT: and a1, a0, s4
801 ; RV32I-NEXT: srli a0, a0, 2
802 ; RV32I-NEXT: and a0, a0, s4
803 ; RV32I-NEXT: add a0, a1, a0
804 ; RV32I-NEXT: srli a1, a0, 4
805 ; RV32I-NEXT: add a0, a0, a1
806 ; RV32I-NEXT: and a0, a0, s7
807 ; RV32I-NEXT: mv a1, s1
808 ; RV32I-NEXT: call __mulsi3@plt
809 ; RV32I-NEXT: srli s5, a0, 24
810 ; RV32I-NEXT: srli a0, s2, 1
811 ; RV32I-NEXT: and a0, a0, s3
812 ; RV32I-NEXT: sub a0, s2, a0
813 ; RV32I-NEXT: and a1, a0, s4
814 ; RV32I-NEXT: srli a0, a0, 2
815 ; RV32I-NEXT: and a0, a0, s4
816 ; RV32I-NEXT: add a0, a1, a0
817 ; RV32I-NEXT: srli a1, a0, 4
818 ; RV32I-NEXT: add a0, a0, a1
819 ; RV32I-NEXT: and a0, a0, s7
820 ; RV32I-NEXT: mv a1, s1
821 ; RV32I-NEXT: call __mulsi3@plt
822 ; RV32I-NEXT: srli a0, a0, 24
823 ; RV32I-NEXT: add a0, a0, s5
824 ; RV32I-NEXT: sw zero, 12(s0)
825 ; RV32I-NEXT: sw zero, 4(s0)
826 ; RV32I-NEXT: sw a0, 8(s0)
827 ; RV32I-NEXT: sw s8, 0(s0)
828 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
829 ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
830 ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
831 ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
832 ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
833 ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
834 ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
835 ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
836 ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
837 ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
838 ; RV32I-NEXT: addi sp, sp, 48
841 ; RV32ZBB-LABEL: ctpop_v2i64:
843 ; RV32ZBB-NEXT: lw a2, 4(a1)
844 ; RV32ZBB-NEXT: lw a3, 0(a1)
845 ; RV32ZBB-NEXT: lw a4, 8(a1)
846 ; RV32ZBB-NEXT: lw a1, 12(a1)
847 ; RV32ZBB-NEXT: cpop a2, a2
848 ; RV32ZBB-NEXT: cpop a3, a3
849 ; RV32ZBB-NEXT: add a2, a3, a2
850 ; RV32ZBB-NEXT: cpop a1, a1
851 ; RV32ZBB-NEXT: cpop a3, a4
852 ; RV32ZBB-NEXT: add a1, a3, a1
853 ; RV32ZBB-NEXT: sw zero, 12(a0)
854 ; RV32ZBB-NEXT: sw zero, 4(a0)
855 ; RV32ZBB-NEXT: sw a1, 8(a0)
856 ; RV32ZBB-NEXT: sw a2, 0(a0)
858 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
862 define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
863 ; RV32I-LABEL: ctpop_v2i64_ult_two:
865 ; RV32I-NEXT: lw a1, 0(a0)
866 ; RV32I-NEXT: lw a2, 12(a0)
867 ; RV32I-NEXT: lw a3, 8(a0)
868 ; RV32I-NEXT: lw a0, 4(a0)
869 ; RV32I-NEXT: addi a4, a1, -1
870 ; RV32I-NEXT: and a4, a1, a4
871 ; RV32I-NEXT: seqz a1, a1
872 ; RV32I-NEXT: sub a1, a0, a1
873 ; RV32I-NEXT: and a0, a0, a1
874 ; RV32I-NEXT: or a0, a4, a0
875 ; RV32I-NEXT: seqz a0, a0
876 ; RV32I-NEXT: addi a1, a3, -1
877 ; RV32I-NEXT: and a1, a3, a1
878 ; RV32I-NEXT: seqz a3, a3
879 ; RV32I-NEXT: sub a3, a2, a3
880 ; RV32I-NEXT: and a2, a2, a3
881 ; RV32I-NEXT: or a1, a1, a2
882 ; RV32I-NEXT: seqz a1, a1
885 ; RV32ZBB-LABEL: ctpop_v2i64_ult_two:
887 ; RV32ZBB-NEXT: lw a1, 12(a0)
888 ; RV32ZBB-NEXT: lw a2, 8(a0)
889 ; RV32ZBB-NEXT: lw a3, 0(a0)
890 ; RV32ZBB-NEXT: lw a0, 4(a0)
891 ; RV32ZBB-NEXT: cpop a1, a1
892 ; RV32ZBB-NEXT: cpop a2, a2
893 ; RV32ZBB-NEXT: add a1, a2, a1
894 ; RV32ZBB-NEXT: cpop a0, a0
895 ; RV32ZBB-NEXT: cpop a2, a3
896 ; RV32ZBB-NEXT: add a0, a2, a0
897 ; RV32ZBB-NEXT: sltiu a0, a0, 2
898 ; RV32ZBB-NEXT: sltiu a1, a1, 2
900 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
901 %2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
905 define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
906 ; RV32I-LABEL: ctpop_v2i64_ugt_one:
908 ; RV32I-NEXT: lw a1, 0(a0)
909 ; RV32I-NEXT: lw a2, 12(a0)
910 ; RV32I-NEXT: lw a3, 8(a0)
911 ; RV32I-NEXT: lw a0, 4(a0)
912 ; RV32I-NEXT: addi a4, a1, -1
913 ; RV32I-NEXT: and a4, a1, a4
914 ; RV32I-NEXT: seqz a1, a1
915 ; RV32I-NEXT: sub a1, a0, a1
916 ; RV32I-NEXT: and a0, a0, a1
917 ; RV32I-NEXT: or a0, a4, a0
918 ; RV32I-NEXT: snez a0, a0
919 ; RV32I-NEXT: addi a1, a3, -1
920 ; RV32I-NEXT: and a1, a3, a1
921 ; RV32I-NEXT: seqz a3, a3
922 ; RV32I-NEXT: sub a3, a2, a3
923 ; RV32I-NEXT: and a2, a2, a3
924 ; RV32I-NEXT: or a1, a1, a2
925 ; RV32I-NEXT: snez a1, a1
928 ; RV32ZBB-LABEL: ctpop_v2i64_ugt_one:
930 ; RV32ZBB-NEXT: lw a1, 12(a0)
931 ; RV32ZBB-NEXT: lw a2, 8(a0)
932 ; RV32ZBB-NEXT: lw a3, 0(a0)
933 ; RV32ZBB-NEXT: lw a0, 4(a0)
934 ; RV32ZBB-NEXT: cpop a1, a1
935 ; RV32ZBB-NEXT: cpop a2, a2
936 ; RV32ZBB-NEXT: add a1, a2, a1
937 ; RV32ZBB-NEXT: cpop a0, a0
938 ; RV32ZBB-NEXT: cpop a2, a3
939 ; RV32ZBB-NEXT: add a0, a2, a0
940 ; RV32ZBB-NEXT: sltiu a0, a0, 2
941 ; RV32ZBB-NEXT: xori a0, a0, 1
942 ; RV32ZBB-NEXT: sltiu a1, a1, 2
943 ; RV32ZBB-NEXT: xori a1, a1, 1
945 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
946 %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
950 define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
951 ; RV32I-LABEL: ctpop_v2i64_eq_one:
953 ; RV32I-NEXT: lw a1, 0(a0)
954 ; RV32I-NEXT: lw a2, 12(a0)
955 ; RV32I-NEXT: lw a3, 8(a0)
956 ; RV32I-NEXT: lw a0, 4(a0)
957 ; RV32I-NEXT: addi a4, a1, -1
958 ; RV32I-NEXT: and a4, a1, a4
959 ; RV32I-NEXT: seqz a5, a1
960 ; RV32I-NEXT: sub a5, a0, a5
961 ; RV32I-NEXT: and a5, a0, a5
962 ; RV32I-NEXT: or a4, a4, a5
963 ; RV32I-NEXT: seqz a4, a4
964 ; RV32I-NEXT: or a0, a1, a0
965 ; RV32I-NEXT: snez a0, a0
966 ; RV32I-NEXT: and a0, a0, a4
967 ; RV32I-NEXT: addi a1, a3, -1
968 ; RV32I-NEXT: and a1, a3, a1
969 ; RV32I-NEXT: seqz a4, a3
970 ; RV32I-NEXT: sub a4, a2, a4
971 ; RV32I-NEXT: and a4, a2, a4
972 ; RV32I-NEXT: or a1, a1, a4
973 ; RV32I-NEXT: seqz a1, a1
974 ; RV32I-NEXT: or a2, a3, a2
975 ; RV32I-NEXT: snez a2, a2
976 ; RV32I-NEXT: and a1, a2, a1
979 ; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
981 ; RV32ZBB-NEXT: lw a1, 12(a0)
982 ; RV32ZBB-NEXT: lw a2, 8(a0)
983 ; RV32ZBB-NEXT: lw a3, 0(a0)
984 ; RV32ZBB-NEXT: lw a0, 4(a0)
985 ; RV32ZBB-NEXT: cpop a1, a1
986 ; RV32ZBB-NEXT: cpop a2, a2
987 ; RV32ZBB-NEXT: add a1, a2, a1
988 ; RV32ZBB-NEXT: cpop a0, a0
989 ; RV32ZBB-NEXT: cpop a2, a3
990 ; RV32ZBB-NEXT: add a0, a2, a0
991 ; RV32ZBB-NEXT: addi a0, a0, -1
992 ; RV32ZBB-NEXT: seqz a0, a0
993 ; RV32ZBB-NEXT: addi a1, a1, -1
994 ; RV32ZBB-NEXT: seqz a1, a1
996 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
997 %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
1001 define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
1002 ; RV32I-LABEL: ctpop_v2i64_ne_one:
1004 ; RV32I-NEXT: lw a1, 0(a0)
1005 ; RV32I-NEXT: lw a2, 12(a0)
1006 ; RV32I-NEXT: lw a3, 8(a0)
1007 ; RV32I-NEXT: lw a0, 4(a0)
1008 ; RV32I-NEXT: addi a4, a1, -1
1009 ; RV32I-NEXT: and a4, a1, a4
1010 ; RV32I-NEXT: seqz a5, a1
1011 ; RV32I-NEXT: sub a5, a0, a5
1012 ; RV32I-NEXT: and a5, a0, a5
1013 ; RV32I-NEXT: or a4, a4, a5
1014 ; RV32I-NEXT: snez a4, a4
1015 ; RV32I-NEXT: or a0, a1, a0
1016 ; RV32I-NEXT: seqz a0, a0
1017 ; RV32I-NEXT: or a0, a0, a4
1018 ; RV32I-NEXT: addi a1, a3, -1
1019 ; RV32I-NEXT: and a1, a3, a1
1020 ; RV32I-NEXT: seqz a4, a3
1021 ; RV32I-NEXT: sub a4, a2, a4
1022 ; RV32I-NEXT: and a4, a2, a4
1023 ; RV32I-NEXT: or a1, a1, a4
1024 ; RV32I-NEXT: snez a1, a1
1025 ; RV32I-NEXT: or a2, a3, a2
1026 ; RV32I-NEXT: seqz a2, a2
1027 ; RV32I-NEXT: or a1, a2, a1
1030 ; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
1032 ; RV32ZBB-NEXT: lw a1, 12(a0)
1033 ; RV32ZBB-NEXT: lw a2, 8(a0)
1034 ; RV32ZBB-NEXT: lw a3, 0(a0)
1035 ; RV32ZBB-NEXT: lw a0, 4(a0)
1036 ; RV32ZBB-NEXT: cpop a1, a1
1037 ; RV32ZBB-NEXT: cpop a2, a2
1038 ; RV32ZBB-NEXT: add a1, a2, a1
1039 ; RV32ZBB-NEXT: cpop a0, a0
1040 ; RV32ZBB-NEXT: cpop a2, a3
1041 ; RV32ZBB-NEXT: add a0, a2, a0
1042 ; RV32ZBB-NEXT: addi a0, a0, -1
1043 ; RV32ZBB-NEXT: snez a0, a0
1044 ; RV32ZBB-NEXT: addi a1, a1, -1
1045 ; RV32ZBB-NEXT: snez a1, a1
1047 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1048 %2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
1052 define i32 @sextb_i32(i32 %a) nounwind {
1053 ; RV32I-LABEL: sextb_i32:
1055 ; RV32I-NEXT: slli a0, a0, 24
1056 ; RV32I-NEXT: srai a0, a0, 24
1059 ; RV32ZBB-LABEL: sextb_i32:
1061 ; RV32ZBB-NEXT: sext.b a0, a0
1063 %shl = shl i32 %a, 24
1064 %shr = ashr exact i32 %shl, 24
1068 define i64 @sextb_i64(i64 %a) nounwind {
1069 ; RV32I-LABEL: sextb_i64:
1071 ; RV32I-NEXT: slli a1, a0, 24
1072 ; RV32I-NEXT: srai a0, a1, 24
1073 ; RV32I-NEXT: srai a1, a1, 31
1076 ; RV32ZBB-LABEL: sextb_i64:
1078 ; RV32ZBB-NEXT: sext.b a0, a0
1079 ; RV32ZBB-NEXT: srai a1, a0, 31
1081 %shl = shl i64 %a, 56
1082 %shr = ashr exact i64 %shl, 56
1086 define i32 @sexth_i32(i32 %a) nounwind {
1087 ; RV32I-LABEL: sexth_i32:
1089 ; RV32I-NEXT: slli a0, a0, 16
1090 ; RV32I-NEXT: srai a0, a0, 16
1093 ; RV32ZBB-LABEL: sexth_i32:
1095 ; RV32ZBB-NEXT: sext.h a0, a0
1097 %shl = shl i32 %a, 16
1098 %shr = ashr exact i32 %shl, 16
1102 define i64 @sexth_i64(i64 %a) nounwind {
1103 ; RV32I-LABEL: sexth_i64:
1105 ; RV32I-NEXT: slli a1, a0, 16
1106 ; RV32I-NEXT: srai a0, a1, 16
1107 ; RV32I-NEXT: srai a1, a1, 31
1110 ; RV32ZBB-LABEL: sexth_i64:
1112 ; RV32ZBB-NEXT: sext.h a0, a0
1113 ; RV32ZBB-NEXT: srai a1, a0, 31
1115 %shl = shl i64 %a, 48
1116 %shr = ashr exact i64 %shl, 48
1120 define i32 @min_i32(i32 %a, i32 %b) nounwind {
1121 ; RV32I-LABEL: min_i32:
1123 ; RV32I-NEXT: blt a0, a1, .LBB28_2
1124 ; RV32I-NEXT: # %bb.1:
1125 ; RV32I-NEXT: mv a0, a1
1126 ; RV32I-NEXT: .LBB28_2:
1129 ; RV32ZBB-LABEL: min_i32:
1131 ; RV32ZBB-NEXT: min a0, a0, a1
1133 %cmp = icmp slt i32 %a, %b
1134 %cond = select i1 %cmp, i32 %a, i32 %b
1138 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1139 ; don't have yet any matching bit manipulation instructions on RV32.
1140 ; This test is presented here in case future expansions of the Bitmanip
1141 ; extensions introduce instructions suitable for this pattern.
1143 define i64 @min_i64(i64 %a, i64 %b) nounwind {
1144 ; CHECK-LABEL: min_i64:
1146 ; CHECK-NEXT: beq a1, a3, .LBB29_2
1147 ; CHECK-NEXT: # %bb.1:
1148 ; CHECK-NEXT: slt a4, a1, a3
1149 ; CHECK-NEXT: beqz a4, .LBB29_3
1150 ; CHECK-NEXT: j .LBB29_4
1151 ; CHECK-NEXT: .LBB29_2:
1152 ; CHECK-NEXT: sltu a4, a0, a2
1153 ; CHECK-NEXT: bnez a4, .LBB29_4
1154 ; CHECK-NEXT: .LBB29_3:
1155 ; CHECK-NEXT: mv a0, a2
1156 ; CHECK-NEXT: mv a1, a3
1157 ; CHECK-NEXT: .LBB29_4:
1159 %cmp = icmp slt i64 %a, %b
1160 %cond = select i1 %cmp, i64 %a, i64 %b
1164 define i32 @max_i32(i32 %a, i32 %b) nounwind {
1165 ; RV32I-LABEL: max_i32:
1167 ; RV32I-NEXT: blt a1, a0, .LBB30_2
1168 ; RV32I-NEXT: # %bb.1:
1169 ; RV32I-NEXT: mv a0, a1
1170 ; RV32I-NEXT: .LBB30_2:
1173 ; RV32ZBB-LABEL: max_i32:
1175 ; RV32ZBB-NEXT: max a0, a0, a1
1177 %cmp = icmp sgt i32 %a, %b
1178 %cond = select i1 %cmp, i32 %a, i32 %b
1182 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1183 ; don't have yet any matching bit manipulation instructions on RV32.
1184 ; This test is presented here in case future expansions of the Bitmanip
1185 ; extensions introduce instructions suitable for this pattern.
1187 define i64 @max_i64(i64 %a, i64 %b) nounwind {
1188 ; CHECK-LABEL: max_i64:
1190 ; CHECK-NEXT: beq a1, a3, .LBB31_2
1191 ; CHECK-NEXT: # %bb.1:
1192 ; CHECK-NEXT: slt a4, a3, a1
1193 ; CHECK-NEXT: beqz a4, .LBB31_3
1194 ; CHECK-NEXT: j .LBB31_4
1195 ; CHECK-NEXT: .LBB31_2:
1196 ; CHECK-NEXT: sltu a4, a2, a0
1197 ; CHECK-NEXT: bnez a4, .LBB31_4
1198 ; CHECK-NEXT: .LBB31_3:
1199 ; CHECK-NEXT: mv a0, a2
1200 ; CHECK-NEXT: mv a1, a3
1201 ; CHECK-NEXT: .LBB31_4:
1203 %cmp = icmp sgt i64 %a, %b
1204 %cond = select i1 %cmp, i64 %a, i64 %b
1208 define i32 @minu_i32(i32 %a, i32 %b) nounwind {
1209 ; RV32I-LABEL: minu_i32:
1211 ; RV32I-NEXT: bltu a0, a1, .LBB32_2
1212 ; RV32I-NEXT: # %bb.1:
1213 ; RV32I-NEXT: mv a0, a1
1214 ; RV32I-NEXT: .LBB32_2:
1217 ; RV32ZBB-LABEL: minu_i32:
1219 ; RV32ZBB-NEXT: minu a0, a0, a1
1221 %cmp = icmp ult i32 %a, %b
1222 %cond = select i1 %cmp, i32 %a, i32 %b
1226 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1227 ; don't have yet any matching bit manipulation instructions on RV32.
1228 ; This test is presented here in case future expansions of the Bitmanip
1229 ; extensions introduce instructions suitable for this pattern.
1231 define i64 @minu_i64(i64 %a, i64 %b) nounwind {
1232 ; CHECK-LABEL: minu_i64:
1234 ; CHECK-NEXT: beq a1, a3, .LBB33_2
1235 ; CHECK-NEXT: # %bb.1:
1236 ; CHECK-NEXT: sltu a4, a1, a3
1237 ; CHECK-NEXT: beqz a4, .LBB33_3
1238 ; CHECK-NEXT: j .LBB33_4
1239 ; CHECK-NEXT: .LBB33_2:
1240 ; CHECK-NEXT: sltu a4, a0, a2
1241 ; CHECK-NEXT: bnez a4, .LBB33_4
1242 ; CHECK-NEXT: .LBB33_3:
1243 ; CHECK-NEXT: mv a0, a2
1244 ; CHECK-NEXT: mv a1, a3
1245 ; CHECK-NEXT: .LBB33_4:
1247 %cmp = icmp ult i64 %a, %b
1248 %cond = select i1 %cmp, i64 %a, i64 %b
1252 define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
1253 ; RV32I-LABEL: maxu_i32:
1255 ; RV32I-NEXT: bltu a1, a0, .LBB34_2
1256 ; RV32I-NEXT: # %bb.1:
1257 ; RV32I-NEXT: mv a0, a1
1258 ; RV32I-NEXT: .LBB34_2:
1261 ; RV32ZBB-LABEL: maxu_i32:
1263 ; RV32ZBB-NEXT: maxu a0, a0, a1
1265 %cmp = icmp ugt i32 %a, %b
1266 %cond = select i1 %cmp, i32 %a, i32 %b
1270 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
1271 ; don't have yet any matching bit manipulation instructions on RV32.
1272 ; This test is presented here in case future expansions of the Bitmanip
1273 ; extensions introduce instructions suitable for this pattern.
1275 define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
1276 ; CHECK-LABEL: maxu_i64:
1278 ; CHECK-NEXT: beq a1, a3, .LBB35_2
1279 ; CHECK-NEXT: # %bb.1:
1280 ; CHECK-NEXT: sltu a4, a3, a1
1281 ; CHECK-NEXT: beqz a4, .LBB35_3
1282 ; CHECK-NEXT: j .LBB35_4
1283 ; CHECK-NEXT: .LBB35_2:
1284 ; CHECK-NEXT: sltu a4, a2, a0
1285 ; CHECK-NEXT: bnez a4, .LBB35_4
1286 ; CHECK-NEXT: .LBB35_3:
1287 ; CHECK-NEXT: mv a0, a2
1288 ; CHECK-NEXT: mv a1, a3
1289 ; CHECK-NEXT: .LBB35_4:
1291 %cmp = icmp ugt i64 %a, %b
1292 %cond = select i1 %cmp, i64 %a, i64 %b
1296 declare i32 @llvm.abs.i32(i32, i1 immarg)
1298 define i32 @abs_i32(i32 %x) {
1299 ; RV32I-LABEL: abs_i32:
1301 ; RV32I-NEXT: srai a1, a0, 31
1302 ; RV32I-NEXT: xor a0, a0, a1
1303 ; RV32I-NEXT: sub a0, a0, a1
1306 ; RV32ZBB-LABEL: abs_i32:
1308 ; RV32ZBB-NEXT: neg a1, a0
1309 ; RV32ZBB-NEXT: max a0, a0, a1
1311 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
1315 declare i64 @llvm.abs.i64(i64, i1 immarg)
1317 define i64 @abs_i64(i64 %x) {
1318 ; CHECK-LABEL: abs_i64:
1320 ; CHECK-NEXT: bgez a1, .LBB37_2
1321 ; CHECK-NEXT: # %bb.1:
1322 ; CHECK-NEXT: snez a2, a0
1323 ; CHECK-NEXT: neg a0, a0
1324 ; CHECK-NEXT: neg a1, a1
1325 ; CHECK-NEXT: sub a1, a1, a2
1326 ; CHECK-NEXT: .LBB37_2:
1328 %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
1332 define i32 @zexth_i32(i32 %a) nounwind {
1333 ; RV32I-LABEL: zexth_i32:
1335 ; RV32I-NEXT: slli a0, a0, 16
1336 ; RV32I-NEXT: srli a0, a0, 16
1339 ; RV32ZBB-LABEL: zexth_i32:
1341 ; RV32ZBB-NEXT: zext.h a0, a0
1343 %and = and i32 %a, 65535
1347 define i64 @zexth_i64(i64 %a) nounwind {
1348 ; RV32I-LABEL: zexth_i64:
1350 ; RV32I-NEXT: slli a0, a0, 16
1351 ; RV32I-NEXT: srli a0, a0, 16
1352 ; RV32I-NEXT: li a1, 0
1355 ; RV32ZBB-LABEL: zexth_i64:
1357 ; RV32ZBB-NEXT: zext.h a0, a0
1358 ; RV32ZBB-NEXT: li a1, 0
1360 %and = and i64 %a, 65535
1364 declare i32 @llvm.bswap.i32(i32)
1366 define i32 @bswap_i32(i32 %a) nounwind {
1367 ; RV32I-LABEL: bswap_i32:
1369 ; RV32I-NEXT: srli a1, a0, 8
1370 ; RV32I-NEXT: lui a2, 16
1371 ; RV32I-NEXT: addi a2, a2, -256
1372 ; RV32I-NEXT: and a1, a1, a2
1373 ; RV32I-NEXT: srli a3, a0, 24
1374 ; RV32I-NEXT: or a1, a1, a3
1375 ; RV32I-NEXT: and a2, a0, a2
1376 ; RV32I-NEXT: slli a2, a2, 8
1377 ; RV32I-NEXT: slli a0, a0, 24
1378 ; RV32I-NEXT: or a0, a0, a2
1379 ; RV32I-NEXT: or a0, a0, a1
1382 ; RV32ZBB-LABEL: bswap_i32:
1384 ; RV32ZBB-NEXT: rev8 a0, a0
1386 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
1390 declare i64 @llvm.bswap.i64(i64)
1392 define i64 @bswap_i64(i64 %a) {
1393 ; RV32I-LABEL: bswap_i64:
1395 ; RV32I-NEXT: srli a2, a1, 8
1396 ; RV32I-NEXT: lui a3, 16
1397 ; RV32I-NEXT: addi a3, a3, -256
1398 ; RV32I-NEXT: and a2, a2, a3
1399 ; RV32I-NEXT: srli a4, a1, 24
1400 ; RV32I-NEXT: or a2, a2, a4
1401 ; RV32I-NEXT: and a4, a1, a3
1402 ; RV32I-NEXT: slli a4, a4, 8
1403 ; RV32I-NEXT: slli a1, a1, 24
1404 ; RV32I-NEXT: or a1, a1, a4
1405 ; RV32I-NEXT: or a2, a1, a2
1406 ; RV32I-NEXT: srli a1, a0, 8
1407 ; RV32I-NEXT: and a1, a1, a3
1408 ; RV32I-NEXT: srli a4, a0, 24
1409 ; RV32I-NEXT: or a1, a1, a4
1410 ; RV32I-NEXT: and a3, a0, a3
1411 ; RV32I-NEXT: slli a3, a3, 8
1412 ; RV32I-NEXT: slli a0, a0, 24
1413 ; RV32I-NEXT: or a0, a0, a3
1414 ; RV32I-NEXT: or a1, a0, a1
1415 ; RV32I-NEXT: mv a0, a2
1418 ; RV32ZBB-LABEL: bswap_i64:
1420 ; RV32ZBB-NEXT: rev8 a2, a1
1421 ; RV32ZBB-NEXT: rev8 a1, a0
1422 ; RV32ZBB-NEXT: mv a0, a2
1424 %1 = call i64 @llvm.bswap.i64(i64 %a)