1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV64I
4 ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefix=RV64ZBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define signext i32 @ctlz_i32(i32 signext %a) nounwind {
10 ; RV64I-LABEL: ctlz_i32:
12 ; RV64I-NEXT: beqz a0, .LBB0_2
13 ; RV64I-NEXT: # %bb.1: # %cond.false
14 ; RV64I-NEXT: addi sp, sp, -16
15 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
16 ; RV64I-NEXT: srliw a1, a0, 1
17 ; RV64I-NEXT: or a0, a0, a1
18 ; RV64I-NEXT: srliw a1, a0, 2
19 ; RV64I-NEXT: or a0, a0, a1
20 ; RV64I-NEXT: srliw a1, a0, 4
21 ; RV64I-NEXT: or a0, a0, a1
22 ; RV64I-NEXT: srliw a1, a0, 8
23 ; RV64I-NEXT: or a0, a0, a1
24 ; RV64I-NEXT: srliw a1, a0, 16
25 ; RV64I-NEXT: or a0, a0, a1
26 ; RV64I-NEXT: not a0, a0
27 ; RV64I-NEXT: srli a1, a0, 1
28 ; RV64I-NEXT: lui a2, 349525
29 ; RV64I-NEXT: addiw a2, a2, 1365
30 ; RV64I-NEXT: and a1, a1, a2
31 ; RV64I-NEXT: sub a0, a0, a1
32 ; RV64I-NEXT: lui a1, 209715
33 ; RV64I-NEXT: addiw a1, a1, 819
34 ; RV64I-NEXT: and a2, a0, a1
35 ; RV64I-NEXT: srli a0, a0, 2
36 ; RV64I-NEXT: and a0, a0, a1
37 ; RV64I-NEXT: add a0, a2, a0
38 ; RV64I-NEXT: srli a1, a0, 4
39 ; RV64I-NEXT: add a0, a0, a1
40 ; RV64I-NEXT: lui a1, 61681
41 ; RV64I-NEXT: addiw a1, a1, -241
42 ; RV64I-NEXT: and a0, a0, a1
43 ; RV64I-NEXT: lui a1, 4112
44 ; RV64I-NEXT: addiw a1, a1, 257
45 ; RV64I-NEXT: call __muldi3
46 ; RV64I-NEXT: srliw a0, a0, 24
47 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
48 ; RV64I-NEXT: addi sp, sp, 16
50 ; RV64I-NEXT: .LBB0_2:
51 ; RV64I-NEXT: li a0, 32
54 ; RV64ZBB-LABEL: ctlz_i32:
56 ; RV64ZBB-NEXT: clzw a0, a0
58 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
62 define signext i32 @log2_i32(i32 signext %a) nounwind {
63 ; RV64I-LABEL: log2_i32:
65 ; RV64I-NEXT: beqz a0, .LBB1_2
66 ; RV64I-NEXT: # %bb.1: # %cond.false
67 ; RV64I-NEXT: addi sp, sp, -16
68 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
69 ; RV64I-NEXT: srliw a1, a0, 1
70 ; RV64I-NEXT: or a0, a0, a1
71 ; RV64I-NEXT: srliw a1, a0, 2
72 ; RV64I-NEXT: or a0, a0, a1
73 ; RV64I-NEXT: srliw a1, a0, 4
74 ; RV64I-NEXT: or a0, a0, a1
75 ; RV64I-NEXT: srliw a1, a0, 8
76 ; RV64I-NEXT: or a0, a0, a1
77 ; RV64I-NEXT: srliw a1, a0, 16
78 ; RV64I-NEXT: or a0, a0, a1
79 ; RV64I-NEXT: not a0, a0
80 ; RV64I-NEXT: srli a1, a0, 1
81 ; RV64I-NEXT: lui a2, 349525
82 ; RV64I-NEXT: addiw a2, a2, 1365
83 ; RV64I-NEXT: and a1, a1, a2
84 ; RV64I-NEXT: sub a0, a0, a1
85 ; RV64I-NEXT: lui a1, 209715
86 ; RV64I-NEXT: addiw a1, a1, 819
87 ; RV64I-NEXT: and a2, a0, a1
88 ; RV64I-NEXT: srli a0, a0, 2
89 ; RV64I-NEXT: and a0, a0, a1
90 ; RV64I-NEXT: add a0, a2, a0
91 ; RV64I-NEXT: srli a1, a0, 4
92 ; RV64I-NEXT: add a0, a0, a1
93 ; RV64I-NEXT: lui a1, 61681
94 ; RV64I-NEXT: addiw a1, a1, -241
95 ; RV64I-NEXT: and a0, a0, a1
96 ; RV64I-NEXT: lui a1, 4112
97 ; RV64I-NEXT: addiw a1, a1, 257
98 ; RV64I-NEXT: call __muldi3
99 ; RV64I-NEXT: srliw a0, a0, 24
100 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
101 ; RV64I-NEXT: addi sp, sp, 16
102 ; RV64I-NEXT: j .LBB1_3
103 ; RV64I-NEXT: .LBB1_2:
104 ; RV64I-NEXT: li a0, 32
105 ; RV64I-NEXT: .LBB1_3: # %cond.end
106 ; RV64I-NEXT: li a1, 31
107 ; RV64I-NEXT: sub a0, a1, a0
110 ; RV64ZBB-LABEL: log2_i32:
112 ; RV64ZBB-NEXT: clzw a0, a0
113 ; RV64ZBB-NEXT: li a1, 31
114 ; RV64ZBB-NEXT: sub a0, a1, a0
116 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
121 define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
122 ; RV64I-LABEL: log2_ceil_i32:
124 ; RV64I-NEXT: addi sp, sp, -16
125 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
126 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
127 ; RV64I-NEXT: addiw a0, a0, -1
128 ; RV64I-NEXT: li s0, 32
129 ; RV64I-NEXT: li a1, 32
130 ; RV64I-NEXT: beqz a0, .LBB2_2
131 ; RV64I-NEXT: # %bb.1: # %cond.false
132 ; RV64I-NEXT: srliw a1, a0, 1
133 ; RV64I-NEXT: or a0, a0, a1
134 ; RV64I-NEXT: srliw a1, a0, 2
135 ; RV64I-NEXT: or a0, a0, a1
136 ; RV64I-NEXT: srliw a1, a0, 4
137 ; RV64I-NEXT: or a0, a0, a1
138 ; RV64I-NEXT: srliw a1, a0, 8
139 ; RV64I-NEXT: or a0, a0, a1
140 ; RV64I-NEXT: srliw a1, a0, 16
141 ; RV64I-NEXT: or a0, a0, a1
142 ; RV64I-NEXT: not a0, a0
143 ; RV64I-NEXT: srli a1, a0, 1
144 ; RV64I-NEXT: lui a2, 349525
145 ; RV64I-NEXT: addiw a2, a2, 1365
146 ; RV64I-NEXT: and a1, a1, a2
147 ; RV64I-NEXT: sub a0, a0, a1
148 ; RV64I-NEXT: lui a1, 209715
149 ; RV64I-NEXT: addiw a1, a1, 819
150 ; RV64I-NEXT: and a2, a0, a1
151 ; RV64I-NEXT: srli a0, a0, 2
152 ; RV64I-NEXT: and a0, a0, a1
153 ; RV64I-NEXT: add a0, a2, a0
154 ; RV64I-NEXT: srli a1, a0, 4
155 ; RV64I-NEXT: add a0, a0, a1
156 ; RV64I-NEXT: lui a1, 61681
157 ; RV64I-NEXT: addiw a1, a1, -241
158 ; RV64I-NEXT: and a0, a0, a1
159 ; RV64I-NEXT: lui a1, 4112
160 ; RV64I-NEXT: addiw a1, a1, 257
161 ; RV64I-NEXT: call __muldi3
162 ; RV64I-NEXT: srliw a1, a0, 24
163 ; RV64I-NEXT: .LBB2_2: # %cond.end
164 ; RV64I-NEXT: sub a0, s0, a1
165 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
166 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
167 ; RV64I-NEXT: addi sp, sp, 16
170 ; RV64ZBB-LABEL: log2_ceil_i32:
172 ; RV64ZBB-NEXT: addi a0, a0, -1
173 ; RV64ZBB-NEXT: clzw a0, a0
174 ; RV64ZBB-NEXT: li a1, 32
175 ; RV64ZBB-NEXT: sub a0, a1, a0
178 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
183 define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
184 ; RV64I-LABEL: findLastSet_i32:
186 ; RV64I-NEXT: addi sp, sp, -16
187 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
188 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
189 ; RV64I-NEXT: mv s0, a0
190 ; RV64I-NEXT: srliw a0, a0, 1
191 ; RV64I-NEXT: or a0, s0, a0
192 ; RV64I-NEXT: srliw a1, a0, 2
193 ; RV64I-NEXT: or a0, a0, a1
194 ; RV64I-NEXT: srliw a1, a0, 4
195 ; RV64I-NEXT: or a0, a0, a1
196 ; RV64I-NEXT: srliw a1, a0, 8
197 ; RV64I-NEXT: or a0, a0, a1
198 ; RV64I-NEXT: srliw a1, a0, 16
199 ; RV64I-NEXT: or a0, a0, a1
200 ; RV64I-NEXT: not a0, a0
201 ; RV64I-NEXT: srli a1, a0, 1
202 ; RV64I-NEXT: lui a2, 349525
203 ; RV64I-NEXT: addiw a2, a2, 1365
204 ; RV64I-NEXT: and a1, a1, a2
205 ; RV64I-NEXT: sub a0, a0, a1
206 ; RV64I-NEXT: lui a1, 209715
207 ; RV64I-NEXT: addiw a1, a1, 819
208 ; RV64I-NEXT: and a2, a0, a1
209 ; RV64I-NEXT: srli a0, a0, 2
210 ; RV64I-NEXT: and a0, a0, a1
211 ; RV64I-NEXT: add a0, a2, a0
212 ; RV64I-NEXT: srli a1, a0, 4
213 ; RV64I-NEXT: add a0, a0, a1
214 ; RV64I-NEXT: lui a1, 61681
215 ; RV64I-NEXT: addiw a1, a1, -241
216 ; RV64I-NEXT: and a0, a0, a1
217 ; RV64I-NEXT: lui a1, 4112
218 ; RV64I-NEXT: addiw a1, a1, 257
219 ; RV64I-NEXT: call __muldi3
220 ; RV64I-NEXT: srliw a0, a0, 24
221 ; RV64I-NEXT: xori a0, a0, 31
222 ; RV64I-NEXT: snez a1, s0
223 ; RV64I-NEXT: addi a1, a1, -1
224 ; RV64I-NEXT: or a0, a1, a0
225 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
226 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
227 ; RV64I-NEXT: addi sp, sp, 16
230 ; RV64ZBB-LABEL: findLastSet_i32:
232 ; RV64ZBB-NEXT: clzw a1, a0
233 ; RV64ZBB-NEXT: xori a1, a1, 31
234 ; RV64ZBB-NEXT: snez a0, a0
235 ; RV64ZBB-NEXT: addi a0, a0, -1
236 ; RV64ZBB-NEXT: or a0, a0, a1
238 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
240 %3 = icmp eq i32 %a, 0
241 %4 = select i1 %3, i32 -1, i32 %2
245 define i32 @ctlz_lshr_i32(i32 signext %a) {
246 ; RV64I-LABEL: ctlz_lshr_i32:
248 ; RV64I-NEXT: srliw a0, a0, 1
249 ; RV64I-NEXT: beqz a0, .LBB4_2
250 ; RV64I-NEXT: # %bb.1: # %cond.false
251 ; RV64I-NEXT: addi sp, sp, -16
252 ; RV64I-NEXT: .cfi_def_cfa_offset 16
253 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
254 ; RV64I-NEXT: .cfi_offset ra, -8
255 ; RV64I-NEXT: srliw a1, a0, 1
256 ; RV64I-NEXT: or a0, a0, a1
257 ; RV64I-NEXT: srliw a1, a0, 2
258 ; RV64I-NEXT: or a0, a0, a1
259 ; RV64I-NEXT: srliw a1, a0, 4
260 ; RV64I-NEXT: or a0, a0, a1
261 ; RV64I-NEXT: srliw a1, a0, 8
262 ; RV64I-NEXT: or a0, a0, a1
263 ; RV64I-NEXT: srliw a1, a0, 16
264 ; RV64I-NEXT: or a0, a0, a1
265 ; RV64I-NEXT: not a0, a0
266 ; RV64I-NEXT: srli a1, a0, 1
267 ; RV64I-NEXT: lui a2, 349525
268 ; RV64I-NEXT: addiw a2, a2, 1365
269 ; RV64I-NEXT: and a1, a1, a2
270 ; RV64I-NEXT: sub a0, a0, a1
271 ; RV64I-NEXT: lui a1, 209715
272 ; RV64I-NEXT: addiw a1, a1, 819
273 ; RV64I-NEXT: and a2, a0, a1
274 ; RV64I-NEXT: srli a0, a0, 2
275 ; RV64I-NEXT: and a0, a0, a1
276 ; RV64I-NEXT: add a0, a2, a0
277 ; RV64I-NEXT: srli a1, a0, 4
278 ; RV64I-NEXT: add a0, a0, a1
279 ; RV64I-NEXT: lui a1, 61681
280 ; RV64I-NEXT: addiw a1, a1, -241
281 ; RV64I-NEXT: and a0, a0, a1
282 ; RV64I-NEXT: lui a1, 4112
283 ; RV64I-NEXT: addiw a1, a1, 257
284 ; RV64I-NEXT: call __muldi3
285 ; RV64I-NEXT: srliw a0, a0, 24
286 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
287 ; RV64I-NEXT: addi sp, sp, 16
289 ; RV64I-NEXT: .LBB4_2:
290 ; RV64I-NEXT: li a0, 32
293 ; RV64ZBB-LABEL: ctlz_lshr_i32:
295 ; RV64ZBB-NEXT: srliw a0, a0, 1
296 ; RV64ZBB-NEXT: clzw a0, a0
299 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
303 declare i64 @llvm.ctlz.i64(i64, i1)
305 define i64 @ctlz_i64(i64 %a) nounwind {
306 ; RV64I-LABEL: ctlz_i64:
308 ; RV64I-NEXT: beqz a0, .LBB5_2
309 ; RV64I-NEXT: # %bb.1: # %cond.false
310 ; RV64I-NEXT: addi sp, sp, -16
311 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
312 ; RV64I-NEXT: srli a1, a0, 1
313 ; RV64I-NEXT: or a0, a0, a1
314 ; RV64I-NEXT: srli a1, a0, 2
315 ; RV64I-NEXT: or a0, a0, a1
316 ; RV64I-NEXT: srli a1, a0, 4
317 ; RV64I-NEXT: or a0, a0, a1
318 ; RV64I-NEXT: srli a1, a0, 8
319 ; RV64I-NEXT: or a0, a0, a1
320 ; RV64I-NEXT: srli a1, a0, 16
321 ; RV64I-NEXT: or a0, a0, a1
322 ; RV64I-NEXT: srli a1, a0, 32
323 ; RV64I-NEXT: or a0, a0, a1
324 ; RV64I-NEXT: not a0, a0
325 ; RV64I-NEXT: srli a1, a0, 1
326 ; RV64I-NEXT: lui a2, 349525
327 ; RV64I-NEXT: addiw a2, a2, 1365
328 ; RV64I-NEXT: slli a3, a2, 32
329 ; RV64I-NEXT: add a2, a2, a3
330 ; RV64I-NEXT: and a1, a1, a2
331 ; RV64I-NEXT: sub a0, a0, a1
332 ; RV64I-NEXT: lui a1, 209715
333 ; RV64I-NEXT: addiw a1, a1, 819
334 ; RV64I-NEXT: slli a2, a1, 32
335 ; RV64I-NEXT: add a1, a1, a2
336 ; RV64I-NEXT: and a2, a0, a1
337 ; RV64I-NEXT: srli a0, a0, 2
338 ; RV64I-NEXT: and a0, a0, a1
339 ; RV64I-NEXT: add a0, a2, a0
340 ; RV64I-NEXT: srli a1, a0, 4
341 ; RV64I-NEXT: add a0, a0, a1
342 ; RV64I-NEXT: lui a1, 61681
343 ; RV64I-NEXT: addiw a1, a1, -241
344 ; RV64I-NEXT: slli a2, a1, 32
345 ; RV64I-NEXT: add a1, a1, a2
346 ; RV64I-NEXT: and a0, a0, a1
347 ; RV64I-NEXT: lui a1, 4112
348 ; RV64I-NEXT: addiw a1, a1, 257
349 ; RV64I-NEXT: slli a2, a1, 32
350 ; RV64I-NEXT: add a1, a1, a2
351 ; RV64I-NEXT: call __muldi3
352 ; RV64I-NEXT: srli a0, a0, 56
353 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
354 ; RV64I-NEXT: addi sp, sp, 16
356 ; RV64I-NEXT: .LBB5_2:
357 ; RV64I-NEXT: li a0, 64
360 ; RV64ZBB-LABEL: ctlz_i64:
362 ; RV64ZBB-NEXT: clz a0, a0
364 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
368 declare i32 @llvm.cttz.i32(i32, i1)
370 define signext i32 @cttz_i32(i32 signext %a) nounwind {
371 ; RV64I-LABEL: cttz_i32:
373 ; RV64I-NEXT: beqz a0, .LBB6_2
374 ; RV64I-NEXT: # %bb.1: # %cond.false
375 ; RV64I-NEXT: addi sp, sp, -16
376 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
377 ; RV64I-NEXT: neg a1, a0
378 ; RV64I-NEXT: and a0, a0, a1
379 ; RV64I-NEXT: lui a1, 30667
380 ; RV64I-NEXT: addiw a1, a1, 1329
381 ; RV64I-NEXT: call __muldi3
382 ; RV64I-NEXT: srliw a0, a0, 27
383 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
384 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
385 ; RV64I-NEXT: add a0, a1, a0
386 ; RV64I-NEXT: lbu a0, 0(a0)
387 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
388 ; RV64I-NEXT: addi sp, sp, 16
390 ; RV64I-NEXT: .LBB6_2:
391 ; RV64I-NEXT: li a0, 32
394 ; RV64ZBB-LABEL: cttz_i32:
396 ; RV64ZBB-NEXT: ctzw a0, a0
398 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
402 define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
403 ; RV64I-LABEL: cttz_zero_undef_i32:
405 ; RV64I-NEXT: addi sp, sp, -16
406 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
407 ; RV64I-NEXT: neg a1, a0
408 ; RV64I-NEXT: and a0, a0, a1
409 ; RV64I-NEXT: lui a1, 30667
410 ; RV64I-NEXT: addiw a1, a1, 1329
411 ; RV64I-NEXT: call __muldi3
412 ; RV64I-NEXT: srliw a0, a0, 27
413 ; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
414 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
415 ; RV64I-NEXT: add a0, a1, a0
416 ; RV64I-NEXT: lbu a0, 0(a0)
417 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
418 ; RV64I-NEXT: addi sp, sp, 16
421 ; RV64ZBB-LABEL: cttz_zero_undef_i32:
423 ; RV64ZBB-NEXT: ctzw a0, a0
425 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
429 define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
430 ; RV64I-LABEL: findFirstSet_i32:
432 ; RV64I-NEXT: addi sp, sp, -16
433 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
434 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
435 ; RV64I-NEXT: mv s0, a0
436 ; RV64I-NEXT: neg a0, a0
437 ; RV64I-NEXT: and a0, s0, a0
438 ; RV64I-NEXT: lui a1, 30667
439 ; RV64I-NEXT: addiw a1, a1, 1329
440 ; RV64I-NEXT: call __muldi3
441 ; RV64I-NEXT: srliw a0, a0, 27
442 ; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
443 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
444 ; RV64I-NEXT: add a0, a1, a0
445 ; RV64I-NEXT: lbu a0, 0(a0)
446 ; RV64I-NEXT: snez a1, s0
447 ; RV64I-NEXT: addi a1, a1, -1
448 ; RV64I-NEXT: or a0, a1, a0
449 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
450 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
451 ; RV64I-NEXT: addi sp, sp, 16
454 ; RV64ZBB-LABEL: findFirstSet_i32:
456 ; RV64ZBB-NEXT: ctzw a1, a0
457 ; RV64ZBB-NEXT: snez a0, a0
458 ; RV64ZBB-NEXT: addi a0, a0, -1
459 ; RV64ZBB-NEXT: or a0, a0, a1
461 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
462 %2 = icmp eq i32 %a, 0
463 %3 = select i1 %2, i32 -1, i32 %1
467 define signext i32 @ffs_i32(i32 signext %a) nounwind {
468 ; RV64I-LABEL: ffs_i32:
470 ; RV64I-NEXT: addi sp, sp, -16
471 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
472 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
473 ; RV64I-NEXT: mv s0, a0
474 ; RV64I-NEXT: neg a0, a0
475 ; RV64I-NEXT: and a0, s0, a0
476 ; RV64I-NEXT: lui a1, 30667
477 ; RV64I-NEXT: addiw a1, a1, 1329
478 ; RV64I-NEXT: call __muldi3
479 ; RV64I-NEXT: srliw a0, a0, 27
480 ; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
481 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
482 ; RV64I-NEXT: add a0, a1, a0
483 ; RV64I-NEXT: lbu a0, 0(a0)
484 ; RV64I-NEXT: addi a0, a0, 1
485 ; RV64I-NEXT: seqz a1, s0
486 ; RV64I-NEXT: addi a1, a1, -1
487 ; RV64I-NEXT: and a0, a1, a0
488 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
489 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
490 ; RV64I-NEXT: addi sp, sp, 16
493 ; RV64ZBB-LABEL: ffs_i32:
495 ; RV64ZBB-NEXT: ctzw a1, a0
496 ; RV64ZBB-NEXT: addi a1, a1, 1
497 ; RV64ZBB-NEXT: seqz a0, a0
498 ; RV64ZBB-NEXT: addi a0, a0, -1
499 ; RV64ZBB-NEXT: and a0, a0, a1
501 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
503 %3 = icmp eq i32 %a, 0
504 %4 = select i1 %3, i32 0, i32 %2
508 declare i64 @llvm.cttz.i64(i64, i1)
510 define i64 @cttz_i64(i64 %a) nounwind {
511 ; RV64I-LABEL: cttz_i64:
513 ; RV64I-NEXT: beqz a0, .LBB10_2
514 ; RV64I-NEXT: # %bb.1: # %cond.false
515 ; RV64I-NEXT: addi sp, sp, -16
516 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
517 ; RV64I-NEXT: neg a1, a0
518 ; RV64I-NEXT: and a0, a0, a1
519 ; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
520 ; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
521 ; RV64I-NEXT: call __muldi3
522 ; RV64I-NEXT: srli a0, a0, 58
523 ; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
524 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
525 ; RV64I-NEXT: add a0, a1, a0
526 ; RV64I-NEXT: lbu a0, 0(a0)
527 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
528 ; RV64I-NEXT: addi sp, sp, 16
530 ; RV64I-NEXT: .LBB10_2:
531 ; RV64I-NEXT: li a0, 64
534 ; RV64ZBB-LABEL: cttz_i64:
536 ; RV64ZBB-NEXT: ctz a0, a0
538 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
542 declare i32 @llvm.ctpop.i32(i32)
544 define signext i32 @ctpop_i32(i32 signext %a) nounwind {
545 ; RV64I-LABEL: ctpop_i32:
547 ; RV64I-NEXT: addi sp, sp, -16
548 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
549 ; RV64I-NEXT: srli a1, a0, 1
550 ; RV64I-NEXT: lui a2, 349525
551 ; RV64I-NEXT: addiw a2, a2, 1365
552 ; RV64I-NEXT: and a1, a1, a2
553 ; RV64I-NEXT: sub a0, a0, a1
554 ; RV64I-NEXT: lui a1, 209715
555 ; RV64I-NEXT: addiw a1, a1, 819
556 ; RV64I-NEXT: and a2, a0, a1
557 ; RV64I-NEXT: srli a0, a0, 2
558 ; RV64I-NEXT: and a0, a0, a1
559 ; RV64I-NEXT: add a0, a2, a0
560 ; RV64I-NEXT: srli a1, a0, 4
561 ; RV64I-NEXT: add a0, a0, a1
562 ; RV64I-NEXT: lui a1, 61681
563 ; RV64I-NEXT: addiw a1, a1, -241
564 ; RV64I-NEXT: and a0, a0, a1
565 ; RV64I-NEXT: lui a1, 4112
566 ; RV64I-NEXT: addiw a1, a1, 257
567 ; RV64I-NEXT: call __muldi3
568 ; RV64I-NEXT: srliw a0, a0, 24
569 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
570 ; RV64I-NEXT: addi sp, sp, 16
573 ; RV64ZBB-LABEL: ctpop_i32:
575 ; RV64ZBB-NEXT: cpopw a0, a0
577 %1 = call i32 @llvm.ctpop.i32(i32 %a)
581 define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind {
582 ; RV64I-LABEL: ctpop_i32_ult_two:
584 ; RV64I-NEXT: addiw a1, a0, -1
585 ; RV64I-NEXT: and a0, a0, a1
586 ; RV64I-NEXT: seqz a0, a0
589 ; RV64ZBB-LABEL: ctpop_i32_ult_two:
591 ; RV64ZBB-NEXT: cpopw a0, a0
592 ; RV64ZBB-NEXT: sltiu a0, a0, 2
594 %1 = call i32 @llvm.ctpop.i32(i32 %a)
595 %2 = icmp ult i32 %1, 2
599 define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind {
600 ; RV64I-LABEL: ctpop_i32_ugt_one:
602 ; RV64I-NEXT: addiw a1, a0, -1
603 ; RV64I-NEXT: and a0, a0, a1
604 ; RV64I-NEXT: snez a0, a0
607 ; RV64ZBB-LABEL: ctpop_i32_ugt_one:
609 ; RV64ZBB-NEXT: cpopw a0, a0
610 ; RV64ZBB-NEXT: sltiu a0, a0, 2
611 ; RV64ZBB-NEXT: xori a0, a0, 1
613 %1 = call i32 @llvm.ctpop.i32(i32 %a)
614 %2 = icmp ugt i32 %1, 1
618 define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
619 ; RV64I-LABEL: ctpop_i32_eq_one:
621 ; RV64I-NEXT: addiw a1, a0, -1
622 ; RV64I-NEXT: xor a0, a0, a1
623 ; RV64I-NEXT: sltu a0, a1, a0
626 ; RV64ZBB-LABEL: ctpop_i32_eq_one:
628 ; RV64ZBB-NEXT: cpopw a0, a0
629 ; RV64ZBB-NEXT: addi a0, a0, -1
630 ; RV64ZBB-NEXT: seqz a0, a0
632 %1 = call i32 @llvm.ctpop.i32(i32 %a)
633 %2 = icmp eq i32 %1, 1
637 define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
638 ; RV64I-LABEL: ctpop_i32_ne_one:
640 ; RV64I-NEXT: addiw a1, a0, -1
641 ; RV64I-NEXT: xor a0, a0, a1
642 ; RV64I-NEXT: sltu a0, a1, a0
643 ; RV64I-NEXT: xori a0, a0, 1
646 ; RV64ZBB-LABEL: ctpop_i32_ne_one:
648 ; RV64ZBB-NEXT: cpopw a0, a0
649 ; RV64ZBB-NEXT: addi a0, a0, -1
650 ; RV64ZBB-NEXT: snez a0, a0
652 %1 = call i32 @llvm.ctpop.i32(i32 %a)
653 %2 = icmp ne i32 %1, 1
657 define signext i32 @ctpop_i32_load(ptr %p) nounwind {
658 ; RV64I-LABEL: ctpop_i32_load:
660 ; RV64I-NEXT: addi sp, sp, -16
661 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
662 ; RV64I-NEXT: lw a0, 0(a0)
663 ; RV64I-NEXT: srli a1, a0, 1
664 ; RV64I-NEXT: lui a2, 349525
665 ; RV64I-NEXT: addiw a2, a2, 1365
666 ; RV64I-NEXT: and a1, a1, a2
667 ; RV64I-NEXT: sub a0, a0, a1
668 ; RV64I-NEXT: lui a1, 209715
669 ; RV64I-NEXT: addiw a1, a1, 819
670 ; RV64I-NEXT: and a2, a0, a1
671 ; RV64I-NEXT: srli a0, a0, 2
672 ; RV64I-NEXT: and a0, a0, a1
673 ; RV64I-NEXT: add a0, a2, a0
674 ; RV64I-NEXT: srli a1, a0, 4
675 ; RV64I-NEXT: add a0, a0, a1
676 ; RV64I-NEXT: lui a1, 61681
677 ; RV64I-NEXT: addiw a1, a1, -241
678 ; RV64I-NEXT: and a0, a0, a1
679 ; RV64I-NEXT: lui a1, 4112
680 ; RV64I-NEXT: addiw a1, a1, 257
681 ; RV64I-NEXT: call __muldi3
682 ; RV64I-NEXT: srliw a0, a0, 24
683 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
684 ; RV64I-NEXT: addi sp, sp, 16
687 ; RV64ZBB-LABEL: ctpop_i32_load:
689 ; RV64ZBB-NEXT: lwu a0, 0(a0)
690 ; RV64ZBB-NEXT: cpopw a0, a0
692 %a = load i32, ptr %p
693 %1 = call i32 @llvm.ctpop.i32(i32 %a)
697 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
699 define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
700 ; RV64I-LABEL: ctpop_v2i32:
702 ; RV64I-NEXT: addi sp, sp, -64
703 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
704 ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
705 ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
706 ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
707 ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
708 ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
709 ; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
710 ; RV64I-NEXT: mv s0, a1
711 ; RV64I-NEXT: srli a1, a0, 1
712 ; RV64I-NEXT: lui a2, 349525
713 ; RV64I-NEXT: addiw s3, a2, 1365
714 ; RV64I-NEXT: and a1, a1, s3
715 ; RV64I-NEXT: sub a0, a0, a1
716 ; RV64I-NEXT: lui a1, 209715
717 ; RV64I-NEXT: addiw s4, a1, 819
718 ; RV64I-NEXT: and a1, a0, s4
719 ; RV64I-NEXT: srli a0, a0, 2
720 ; RV64I-NEXT: and a0, a0, s4
721 ; RV64I-NEXT: add a0, a1, a0
722 ; RV64I-NEXT: srli a1, a0, 4
723 ; RV64I-NEXT: add a0, a0, a1
724 ; RV64I-NEXT: lui a1, 61681
725 ; RV64I-NEXT: addiw s5, a1, -241
726 ; RV64I-NEXT: and a0, a0, s5
727 ; RV64I-NEXT: lui a1, 4112
728 ; RV64I-NEXT: addiw s1, a1, 257
729 ; RV64I-NEXT: mv a1, s1
730 ; RV64I-NEXT: call __muldi3
731 ; RV64I-NEXT: srliw s2, a0, 24
732 ; RV64I-NEXT: srli a0, s0, 1
733 ; RV64I-NEXT: and a0, a0, s3
734 ; RV64I-NEXT: sub s0, s0, a0
735 ; RV64I-NEXT: and a0, s0, s4
736 ; RV64I-NEXT: srli s0, s0, 2
737 ; RV64I-NEXT: and a1, s0, s4
738 ; RV64I-NEXT: add a0, a0, a1
739 ; RV64I-NEXT: srli a1, a0, 4
740 ; RV64I-NEXT: add a0, a0, a1
741 ; RV64I-NEXT: and a0, a0, s5
742 ; RV64I-NEXT: mv a1, s1
743 ; RV64I-NEXT: call __muldi3
744 ; RV64I-NEXT: srliw a1, a0, 24
745 ; RV64I-NEXT: mv a0, s2
746 ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
747 ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
748 ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
749 ; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
750 ; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
751 ; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
752 ; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
753 ; RV64I-NEXT: addi sp, sp, 64
756 ; RV64ZBB-LABEL: ctpop_v2i32:
758 ; RV64ZBB-NEXT: cpopw a0, a0
759 ; RV64ZBB-NEXT: cpopw a1, a1
761 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
765 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
766 ; RV64I-LABEL: ctpop_v2i32_ult_two:
768 ; RV64I-NEXT: addi a2, a0, -1
769 ; RV64I-NEXT: and a0, a0, a2
770 ; RV64I-NEXT: sext.w a0, a0
771 ; RV64I-NEXT: seqz a0, a0
772 ; RV64I-NEXT: addi a2, a1, -1
773 ; RV64I-NEXT: and a1, a1, a2
774 ; RV64I-NEXT: sext.w a1, a1
775 ; RV64I-NEXT: seqz a1, a1
778 ; RV64ZBB-LABEL: ctpop_v2i32_ult_two:
780 ; RV64ZBB-NEXT: cpopw a1, a1
781 ; RV64ZBB-NEXT: cpopw a0, a0
782 ; RV64ZBB-NEXT: sltiu a0, a0, 2
783 ; RV64ZBB-NEXT: sltiu a1, a1, 2
785 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
786 %2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
790 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
791 ; RV64I-LABEL: ctpop_v2i32_ugt_one:
793 ; RV64I-NEXT: addi a2, a0, -1
794 ; RV64I-NEXT: and a0, a0, a2
795 ; RV64I-NEXT: sext.w a0, a0
796 ; RV64I-NEXT: snez a0, a0
797 ; RV64I-NEXT: addi a2, a1, -1
798 ; RV64I-NEXT: and a1, a1, a2
799 ; RV64I-NEXT: sext.w a1, a1
800 ; RV64I-NEXT: snez a1, a1
803 ; RV64ZBB-LABEL: ctpop_v2i32_ugt_one:
805 ; RV64ZBB-NEXT: cpopw a1, a1
806 ; RV64ZBB-NEXT: cpopw a0, a0
807 ; RV64ZBB-NEXT: sltiu a0, a0, 2
808 ; RV64ZBB-NEXT: xori a0, a0, 1
809 ; RV64ZBB-NEXT: sltiu a1, a1, 2
810 ; RV64ZBB-NEXT: xori a1, a1, 1
812 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
813 %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
817 define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
818 ; RV64I-LABEL: ctpop_v2i32_eq_one:
820 ; RV64I-NEXT: addiw a2, a0, -1
821 ; RV64I-NEXT: xor a0, a0, a2
822 ; RV64I-NEXT: sext.w a0, a0
823 ; RV64I-NEXT: sltu a0, a2, a0
824 ; RV64I-NEXT: addiw a2, a1, -1
825 ; RV64I-NEXT: xor a1, a1, a2
826 ; RV64I-NEXT: sext.w a1, a1
827 ; RV64I-NEXT: sltu a1, a2, a1
830 ; RV64ZBB-LABEL: ctpop_v2i32_eq_one:
832 ; RV64ZBB-NEXT: cpopw a1, a1
833 ; RV64ZBB-NEXT: cpopw a0, a0
834 ; RV64ZBB-NEXT: addi a0, a0, -1
835 ; RV64ZBB-NEXT: seqz a0, a0
836 ; RV64ZBB-NEXT: addi a1, a1, -1
837 ; RV64ZBB-NEXT: seqz a1, a1
839 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
840 %2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
844 define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
845 ; RV64I-LABEL: ctpop_v2i32_ne_one:
847 ; RV64I-NEXT: addiw a2, a0, -1
848 ; RV64I-NEXT: xor a0, a0, a2
849 ; RV64I-NEXT: sext.w a0, a0
850 ; RV64I-NEXT: sltu a0, a2, a0
851 ; RV64I-NEXT: xori a0, a0, 1
852 ; RV64I-NEXT: addiw a2, a1, -1
853 ; RV64I-NEXT: xor a1, a1, a2
854 ; RV64I-NEXT: sext.w a1, a1
855 ; RV64I-NEXT: sltu a1, a2, a1
856 ; RV64I-NEXT: xori a1, a1, 1
859 ; RV64ZBB-LABEL: ctpop_v2i32_ne_one:
861 ; RV64ZBB-NEXT: cpopw a1, a1
862 ; RV64ZBB-NEXT: cpopw a0, a0
863 ; RV64ZBB-NEXT: addi a0, a0, -1
864 ; RV64ZBB-NEXT: snez a0, a0
865 ; RV64ZBB-NEXT: addi a1, a1, -1
866 ; RV64ZBB-NEXT: snez a1, a1
868 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
869 %2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
873 declare i64 @llvm.ctpop.i64(i64)
875 define i64 @ctpop_i64(i64 %a) nounwind {
876 ; RV64I-LABEL: ctpop_i64:
878 ; RV64I-NEXT: addi sp, sp, -16
879 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
880 ; RV64I-NEXT: srli a1, a0, 1
881 ; RV64I-NEXT: lui a2, 349525
882 ; RV64I-NEXT: addiw a2, a2, 1365
883 ; RV64I-NEXT: slli a3, a2, 32
884 ; RV64I-NEXT: add a2, a2, a3
885 ; RV64I-NEXT: and a1, a1, a2
886 ; RV64I-NEXT: sub a0, a0, a1
887 ; RV64I-NEXT: lui a1, 209715
888 ; RV64I-NEXT: addiw a1, a1, 819
889 ; RV64I-NEXT: slli a2, a1, 32
890 ; RV64I-NEXT: add a1, a1, a2
891 ; RV64I-NEXT: and a2, a0, a1
892 ; RV64I-NEXT: srli a0, a0, 2
893 ; RV64I-NEXT: and a0, a0, a1
894 ; RV64I-NEXT: add a0, a2, a0
895 ; RV64I-NEXT: srli a1, a0, 4
896 ; RV64I-NEXT: add a0, a0, a1
897 ; RV64I-NEXT: lui a1, 61681
898 ; RV64I-NEXT: addiw a1, a1, -241
899 ; RV64I-NEXT: slli a2, a1, 32
900 ; RV64I-NEXT: add a1, a1, a2
901 ; RV64I-NEXT: and a0, a0, a1
902 ; RV64I-NEXT: lui a1, 4112
903 ; RV64I-NEXT: addiw a1, a1, 257
904 ; RV64I-NEXT: slli a2, a1, 32
905 ; RV64I-NEXT: add a1, a1, a2
906 ; RV64I-NEXT: call __muldi3
907 ; RV64I-NEXT: srli a0, a0, 56
908 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
909 ; RV64I-NEXT: addi sp, sp, 16
912 ; RV64ZBB-LABEL: ctpop_i64:
914 ; RV64ZBB-NEXT: cpop a0, a0
916 %1 = call i64 @llvm.ctpop.i64(i64 %a)
920 define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
921 ; RV64I-LABEL: ctpop_i64_ugt_two:
923 ; RV64I-NEXT: addi a1, a0, -1
924 ; RV64I-NEXT: and a0, a0, a1
925 ; RV64I-NEXT: seqz a0, a0
928 ; RV64ZBB-LABEL: ctpop_i64_ugt_two:
930 ; RV64ZBB-NEXT: cpop a0, a0
931 ; RV64ZBB-NEXT: sltiu a0, a0, 2
933 %1 = call i64 @llvm.ctpop.i64(i64 %a)
934 %2 = icmp ult i64 %1, 2
938 define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
939 ; RV64I-LABEL: ctpop_i64_ugt_one:
941 ; RV64I-NEXT: addi a1, a0, -1
942 ; RV64I-NEXT: and a0, a0, a1
943 ; RV64I-NEXT: snez a0, a0
946 ; RV64ZBB-LABEL: ctpop_i64_ugt_one:
948 ; RV64ZBB-NEXT: cpop a0, a0
949 ; RV64ZBB-NEXT: sltiu a0, a0, 2
950 ; RV64ZBB-NEXT: xori a0, a0, 1
952 %1 = call i64 @llvm.ctpop.i64(i64 %a)
953 %2 = icmp ugt i64 %1, 1
957 define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
958 ; RV64I-LABEL: ctpop_i64_eq_one:
960 ; RV64I-NEXT: addi a1, a0, -1
961 ; RV64I-NEXT: xor a0, a0, a1
962 ; RV64I-NEXT: sltu a0, a1, a0
965 ; RV64ZBB-LABEL: ctpop_i64_eq_one:
967 ; RV64ZBB-NEXT: cpop a0, a0
968 ; RV64ZBB-NEXT: addi a0, a0, -1
969 ; RV64ZBB-NEXT: seqz a0, a0
971 %1 = call i64 @llvm.ctpop.i64(i64 %a)
972 %2 = icmp eq i64 %1, 1
976 define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
977 ; RV64I-LABEL: ctpop_i64_ne_one:
979 ; RV64I-NEXT: addi a1, a0, -1
980 ; RV64I-NEXT: xor a0, a0, a1
981 ; RV64I-NEXT: sltu a0, a1, a0
982 ; RV64I-NEXT: xori a0, a0, 1
985 ; RV64ZBB-LABEL: ctpop_i64_ne_one:
987 ; RV64ZBB-NEXT: cpop a0, a0
988 ; RV64ZBB-NEXT: addi a0, a0, -1
989 ; RV64ZBB-NEXT: snez a0, a0
991 %1 = call i64 @llvm.ctpop.i64(i64 %a)
992 %2 = icmp ne i64 %1, 1
996 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
998 define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
999 ; RV64I-LABEL: ctpop_v2i64:
1001 ; RV64I-NEXT: addi sp, sp, -64
1002 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
1003 ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
1004 ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
1005 ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
1006 ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
1007 ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
1008 ; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
1009 ; RV64I-NEXT: mv s0, a1
1010 ; RV64I-NEXT: srli a1, a0, 1
1011 ; RV64I-NEXT: lui a2, 349525
1012 ; RV64I-NEXT: addiw a2, a2, 1365
1013 ; RV64I-NEXT: slli a3, a2, 32
1014 ; RV64I-NEXT: add s3, a2, a3
1015 ; RV64I-NEXT: and a1, a1, s3
1016 ; RV64I-NEXT: sub a0, a0, a1
1017 ; RV64I-NEXT: lui a1, 209715
1018 ; RV64I-NEXT: addiw a1, a1, 819
1019 ; RV64I-NEXT: slli a2, a1, 32
1020 ; RV64I-NEXT: add s4, a1, a2
1021 ; RV64I-NEXT: and a1, a0, s4
1022 ; RV64I-NEXT: srli a0, a0, 2
1023 ; RV64I-NEXT: and a0, a0, s4
1024 ; RV64I-NEXT: add a0, a1, a0
1025 ; RV64I-NEXT: srli a1, a0, 4
1026 ; RV64I-NEXT: add a0, a0, a1
1027 ; RV64I-NEXT: lui a1, 61681
1028 ; RV64I-NEXT: addiw a1, a1, -241
1029 ; RV64I-NEXT: slli a2, a1, 32
1030 ; RV64I-NEXT: add s5, a1, a2
1031 ; RV64I-NEXT: and a0, a0, s5
1032 ; RV64I-NEXT: lui a1, 4112
1033 ; RV64I-NEXT: addiw s1, a1, 257
1034 ; RV64I-NEXT: slli a1, s1, 32
1035 ; RV64I-NEXT: add s1, s1, a1
1036 ; RV64I-NEXT: mv a1, s1
1037 ; RV64I-NEXT: call __muldi3
1038 ; RV64I-NEXT: srli s2, a0, 56
1039 ; RV64I-NEXT: srli a0, s0, 1
1040 ; RV64I-NEXT: and a0, a0, s3
1041 ; RV64I-NEXT: sub s0, s0, a0
1042 ; RV64I-NEXT: and a0, s0, s4
1043 ; RV64I-NEXT: srli s0, s0, 2
1044 ; RV64I-NEXT: and a1, s0, s4
1045 ; RV64I-NEXT: add a0, a0, a1
1046 ; RV64I-NEXT: srli a1, a0, 4
1047 ; RV64I-NEXT: add a0, a0, a1
1048 ; RV64I-NEXT: and a0, a0, s5
1049 ; RV64I-NEXT: mv a1, s1
1050 ; RV64I-NEXT: call __muldi3
1051 ; RV64I-NEXT: srli a1, a0, 56
1052 ; RV64I-NEXT: mv a0, s2
1053 ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
1054 ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
1055 ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
1056 ; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
1057 ; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
1058 ; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
1059 ; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
1060 ; RV64I-NEXT: addi sp, sp, 64
1063 ; RV64ZBB-LABEL: ctpop_v2i64:
1065 ; RV64ZBB-NEXT: cpop a0, a0
1066 ; RV64ZBB-NEXT: cpop a1, a1
1068 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1072 define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
1073 ; RV64I-LABEL: ctpop_v2i64_ult_two:
1075 ; RV64I-NEXT: addi a2, a0, -1
1076 ; RV64I-NEXT: and a0, a0, a2
1077 ; RV64I-NEXT: seqz a0, a0
1078 ; RV64I-NEXT: addi a2, a1, -1
1079 ; RV64I-NEXT: and a1, a1, a2
1080 ; RV64I-NEXT: seqz a1, a1
1083 ; RV64ZBB-LABEL: ctpop_v2i64_ult_two:
1085 ; RV64ZBB-NEXT: cpop a1, a1
1086 ; RV64ZBB-NEXT: cpop a0, a0
1087 ; RV64ZBB-NEXT: sltiu a0, a0, 2
1088 ; RV64ZBB-NEXT: sltiu a1, a1, 2
1090 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1091 %2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
1095 define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
1096 ; RV64I-LABEL: ctpop_v2i64_ugt_one:
1098 ; RV64I-NEXT: addi a2, a0, -1
1099 ; RV64I-NEXT: and a0, a0, a2
1100 ; RV64I-NEXT: snez a0, a0
1101 ; RV64I-NEXT: addi a2, a1, -1
1102 ; RV64I-NEXT: and a1, a1, a2
1103 ; RV64I-NEXT: snez a1, a1
1106 ; RV64ZBB-LABEL: ctpop_v2i64_ugt_one:
1108 ; RV64ZBB-NEXT: cpop a1, a1
1109 ; RV64ZBB-NEXT: cpop a0, a0
1110 ; RV64ZBB-NEXT: sltiu a0, a0, 2
1111 ; RV64ZBB-NEXT: xori a0, a0, 1
1112 ; RV64ZBB-NEXT: sltiu a1, a1, 2
1113 ; RV64ZBB-NEXT: xori a1, a1, 1
1115 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1116 %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
1120 define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
1121 ; RV64I-LABEL: ctpop_v2i64_eq_one:
1123 ; RV64I-NEXT: addi a2, a0, -1
1124 ; RV64I-NEXT: xor a0, a0, a2
1125 ; RV64I-NEXT: sltu a0, a2, a0
1126 ; RV64I-NEXT: addi a2, a1, -1
1127 ; RV64I-NEXT: xor a1, a1, a2
1128 ; RV64I-NEXT: sltu a1, a2, a1
1131 ; RV64ZBB-LABEL: ctpop_v2i64_eq_one:
1133 ; RV64ZBB-NEXT: cpop a1, a1
1134 ; RV64ZBB-NEXT: cpop a0, a0
1135 ; RV64ZBB-NEXT: addi a0, a0, -1
1136 ; RV64ZBB-NEXT: seqz a0, a0
1137 ; RV64ZBB-NEXT: addi a1, a1, -1
1138 ; RV64ZBB-NEXT: seqz a1, a1
1140 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1141 %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
1145 define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
1146 ; RV64I-LABEL: ctpop_v2i64_ne_one:
1148 ; RV64I-NEXT: addi a2, a0, -1
1149 ; RV64I-NEXT: xor a0, a0, a2
1150 ; RV64I-NEXT: sltu a0, a2, a0
1151 ; RV64I-NEXT: xori a0, a0, 1
1152 ; RV64I-NEXT: addi a2, a1, -1
1153 ; RV64I-NEXT: xor a1, a1, a2
1154 ; RV64I-NEXT: sltu a1, a2, a1
1155 ; RV64I-NEXT: xori a1, a1, 1
1158 ; RV64ZBB-LABEL: ctpop_v2i64_ne_one:
1160 ; RV64ZBB-NEXT: cpop a1, a1
1161 ; RV64ZBB-NEXT: cpop a0, a0
1162 ; RV64ZBB-NEXT: addi a0, a0, -1
1163 ; RV64ZBB-NEXT: snez a0, a0
1164 ; RV64ZBB-NEXT: addi a1, a1, -1
1165 ; RV64ZBB-NEXT: snez a1, a1
1167 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
1168 %2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
1172 define signext i32 @sextb_i32(i32 signext %a) nounwind {
1173 ; RV64I-LABEL: sextb_i32:
1175 ; RV64I-NEXT: slli a0, a0, 56
1176 ; RV64I-NEXT: srai a0, a0, 56
1179 ; RV64ZBB-LABEL: sextb_i32:
1181 ; RV64ZBB-NEXT: sext.b a0, a0
1183 %shl = shl i32 %a, 24
1184 %shr = ashr exact i32 %shl, 24
1188 define i64 @sextb_i64(i64 %a) nounwind {
1189 ; RV64I-LABEL: sextb_i64:
1191 ; RV64I-NEXT: slli a0, a0, 56
1192 ; RV64I-NEXT: srai a0, a0, 56
1195 ; RV64ZBB-LABEL: sextb_i64:
1197 ; RV64ZBB-NEXT: sext.b a0, a0
1199 %shl = shl i64 %a, 56
1200 %shr = ashr exact i64 %shl, 56
1204 define signext i32 @sexth_i32(i32 signext %a) nounwind {
1205 ; RV64I-LABEL: sexth_i32:
1207 ; RV64I-NEXT: slli a0, a0, 48
1208 ; RV64I-NEXT: srai a0, a0, 48
1211 ; RV64ZBB-LABEL: sexth_i32:
1213 ; RV64ZBB-NEXT: sext.h a0, a0
1215 %shl = shl i32 %a, 16
1216 %shr = ashr exact i32 %shl, 16
1220 define i64 @sexth_i64(i64 %a) nounwind {
1221 ; RV64I-LABEL: sexth_i64:
1223 ; RV64I-NEXT: slli a0, a0, 48
1224 ; RV64I-NEXT: srai a0, a0, 48
1227 ; RV64ZBB-LABEL: sexth_i64:
1229 ; RV64ZBB-NEXT: sext.h a0, a0
1231 %shl = shl i64 %a, 48
1232 %shr = ashr exact i64 %shl, 48
1236 define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
1237 ; RV64I-LABEL: min_i32:
1239 ; RV64I-NEXT: blt a0, a1, .LBB36_2
1240 ; RV64I-NEXT: # %bb.1:
1241 ; RV64I-NEXT: mv a0, a1
1242 ; RV64I-NEXT: .LBB36_2:
1245 ; RV64ZBB-LABEL: min_i32:
1247 ; RV64ZBB-NEXT: min a0, a0, a1
1249 %cmp = icmp slt i32 %a, %b
1250 %cond = select i1 %cmp, i32 %a, i32 %b
1254 define i64 @min_i64(i64 %a, i64 %b) nounwind {
1255 ; RV64I-LABEL: min_i64:
1257 ; RV64I-NEXT: blt a0, a1, .LBB37_2
1258 ; RV64I-NEXT: # %bb.1:
1259 ; RV64I-NEXT: mv a0, a1
1260 ; RV64I-NEXT: .LBB37_2:
1263 ; RV64ZBB-LABEL: min_i64:
1265 ; RV64ZBB-NEXT: min a0, a0, a1
1267 %cmp = icmp slt i64 %a, %b
1268 %cond = select i1 %cmp, i64 %a, i64 %b
1272 define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
1273 ; RV64I-LABEL: max_i32:
1275 ; RV64I-NEXT: blt a1, a0, .LBB38_2
1276 ; RV64I-NEXT: # %bb.1:
1277 ; RV64I-NEXT: mv a0, a1
1278 ; RV64I-NEXT: .LBB38_2:
1281 ; RV64ZBB-LABEL: max_i32:
1283 ; RV64ZBB-NEXT: max a0, a0, a1
1285 %cmp = icmp sgt i32 %a, %b
1286 %cond = select i1 %cmp, i32 %a, i32 %b
1290 define i64 @max_i64(i64 %a, i64 %b) nounwind {
1291 ; RV64I-LABEL: max_i64:
1293 ; RV64I-NEXT: blt a1, a0, .LBB39_2
1294 ; RV64I-NEXT: # %bb.1:
1295 ; RV64I-NEXT: mv a0, a1
1296 ; RV64I-NEXT: .LBB39_2:
1299 ; RV64ZBB-LABEL: max_i64:
1301 ; RV64ZBB-NEXT: max a0, a0, a1
1303 %cmp = icmp sgt i64 %a, %b
1304 %cond = select i1 %cmp, i64 %a, i64 %b
1308 define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
1309 ; RV64I-LABEL: minu_i32:
1311 ; RV64I-NEXT: bltu a0, a1, .LBB40_2
1312 ; RV64I-NEXT: # %bb.1:
1313 ; RV64I-NEXT: mv a0, a1
1314 ; RV64I-NEXT: .LBB40_2:
1317 ; RV64ZBB-LABEL: minu_i32:
1319 ; RV64ZBB-NEXT: minu a0, a0, a1
1321 %cmp = icmp ult i32 %a, %b
1322 %cond = select i1 %cmp, i32 %a, i32 %b
1326 define i64 @minu_i64(i64 %a, i64 %b) nounwind {
1327 ; RV64I-LABEL: minu_i64:
1329 ; RV64I-NEXT: bltu a0, a1, .LBB41_2
1330 ; RV64I-NEXT: # %bb.1:
1331 ; RV64I-NEXT: mv a0, a1
1332 ; RV64I-NEXT: .LBB41_2:
1335 ; RV64ZBB-LABEL: minu_i64:
1337 ; RV64ZBB-NEXT: minu a0, a0, a1
1339 %cmp = icmp ult i64 %a, %b
1340 %cond = select i1 %cmp, i64 %a, i64 %b
1344 define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
1345 ; RV64I-LABEL: maxu_i32:
1347 ; RV64I-NEXT: bltu a1, a0, .LBB42_2
1348 ; RV64I-NEXT: # %bb.1:
1349 ; RV64I-NEXT: mv a0, a1
1350 ; RV64I-NEXT: .LBB42_2:
1353 ; RV64ZBB-LABEL: maxu_i32:
1355 ; RV64ZBB-NEXT: maxu a0, a0, a1
1357 %cmp = icmp ugt i32 %a, %b
1358 %cond = select i1 %cmp, i32 %a, i32 %b
1362 define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
1363 ; RV64I-LABEL: maxu_i64:
1365 ; RV64I-NEXT: bltu a1, a0, .LBB43_2
1366 ; RV64I-NEXT: # %bb.1:
1367 ; RV64I-NEXT: mv a0, a1
1368 ; RV64I-NEXT: .LBB43_2:
1371 ; RV64ZBB-LABEL: maxu_i64:
1373 ; RV64ZBB-NEXT: maxu a0, a0, a1
1375 %cmp = icmp ugt i64 %a, %b
1376 %cond = select i1 %cmp, i64 %a, i64 %b
1380 declare i32 @llvm.abs.i32(i32, i1 immarg)
1382 define i32 @abs_i32(i32 %x) {
1383 ; RV64I-LABEL: abs_i32:
1385 ; RV64I-NEXT: sraiw a1, a0, 31
1386 ; RV64I-NEXT: xor a0, a0, a1
1387 ; RV64I-NEXT: subw a0, a0, a1
1390 ; RV64ZBB-LABEL: abs_i32:
1392 ; RV64ZBB-NEXT: sext.w a0, a0
1393 ; RV64ZBB-NEXT: negw a1, a0
1394 ; RV64ZBB-NEXT: max a0, a0, a1
1396 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
1400 define signext i32 @abs_i32_sext(i32 signext %x) {
1401 ; RV64I-LABEL: abs_i32_sext:
1403 ; RV64I-NEXT: sraiw a1, a0, 31
1404 ; RV64I-NEXT: xor a0, a0, a1
1405 ; RV64I-NEXT: subw a0, a0, a1
1408 ; RV64ZBB-LABEL: abs_i32_sext:
1410 ; RV64ZBB-NEXT: negw a1, a0
1411 ; RV64ZBB-NEXT: max a0, a0, a1
1413 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
1417 declare i64 @llvm.abs.i64(i64, i1 immarg)
1419 define i64 @abs_i64(i64 %x) {
1420 ; RV64I-LABEL: abs_i64:
1422 ; RV64I-NEXT: srai a1, a0, 63
1423 ; RV64I-NEXT: xor a0, a0, a1
1424 ; RV64I-NEXT: sub a0, a0, a1
1427 ; RV64ZBB-LABEL: abs_i64:
1429 ; RV64ZBB-NEXT: neg a1, a0
1430 ; RV64ZBB-NEXT: max a0, a0, a1
1432 %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
1436 define i32 @zexth_i32(i32 %a) nounwind {
1437 ; RV64I-LABEL: zexth_i32:
1439 ; RV64I-NEXT: slli a0, a0, 48
1440 ; RV64I-NEXT: srli a0, a0, 48
1443 ; RV64ZBB-LABEL: zexth_i32:
1445 ; RV64ZBB-NEXT: zext.h a0, a0
1447 %and = and i32 %a, 65535
1451 define i64 @zexth_i64(i64 %a) nounwind {
1452 ; RV64I-LABEL: zexth_i64:
1454 ; RV64I-NEXT: slli a0, a0, 48
1455 ; RV64I-NEXT: srli a0, a0, 48
1458 ; RV64ZBB-LABEL: zexth_i64:
1460 ; RV64ZBB-NEXT: zext.h a0, a0
1462 %and = and i64 %a, 65535
1466 declare i32 @llvm.bswap.i32(i32)
1468 define signext i32 @bswap_i32(i32 signext %a) nounwind {
1469 ; RV64I-LABEL: bswap_i32:
1471 ; RV64I-NEXT: srli a1, a0, 8
1472 ; RV64I-NEXT: lui a2, 16
1473 ; RV64I-NEXT: addiw a2, a2, -256
1474 ; RV64I-NEXT: and a1, a1, a2
1475 ; RV64I-NEXT: srliw a3, a0, 24
1476 ; RV64I-NEXT: or a1, a1, a3
1477 ; RV64I-NEXT: and a2, a0, a2
1478 ; RV64I-NEXT: slli a2, a2, 8
1479 ; RV64I-NEXT: slliw a0, a0, 24
1480 ; RV64I-NEXT: or a0, a0, a2
1481 ; RV64I-NEXT: or a0, a0, a1
1484 ; RV64ZBB-LABEL: bswap_i32:
1486 ; RV64ZBB-NEXT: rev8 a0, a0
1487 ; RV64ZBB-NEXT: srai a0, a0, 32
1489 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
1493 ; Similar to bswap_i32 but the result is not sign extended.
1494 define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
1495 ; RV64I-LABEL: bswap_i32_nosext:
1497 ; RV64I-NEXT: srli a2, a0, 8
1498 ; RV64I-NEXT: lui a3, 16
1499 ; RV64I-NEXT: addi a3, a3, -256
1500 ; RV64I-NEXT: and a2, a2, a3
1501 ; RV64I-NEXT: srliw a4, a0, 24
1502 ; RV64I-NEXT: or a2, a2, a4
1503 ; RV64I-NEXT: and a3, a0, a3
1504 ; RV64I-NEXT: slli a3, a3, 8
1505 ; RV64I-NEXT: slli a0, a0, 24
1506 ; RV64I-NEXT: or a0, a0, a3
1507 ; RV64I-NEXT: or a0, a0, a2
1508 ; RV64I-NEXT: sw a0, 0(a1)
1511 ; RV64ZBB-LABEL: bswap_i32_nosext:
1513 ; RV64ZBB-NEXT: rev8 a0, a0
1514 ; RV64ZBB-NEXT: srli a0, a0, 32
1515 ; RV64ZBB-NEXT: sw a0, 0(a1)
1517 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
1518 store i32 %1, ptr %x
1522 declare i64 @llvm.bswap.i64(i64)
1524 define i64 @bswap_i64(i64 %a) {
1525 ; RV64I-LABEL: bswap_i64:
1527 ; RV64I-NEXT: srli a1, a0, 40
1528 ; RV64I-NEXT: lui a2, 16
1529 ; RV64I-NEXT: addiw a2, a2, -256
1530 ; RV64I-NEXT: and a1, a1, a2
1531 ; RV64I-NEXT: srli a3, a0, 56
1532 ; RV64I-NEXT: or a1, a1, a3
1533 ; RV64I-NEXT: srli a3, a0, 24
1534 ; RV64I-NEXT: lui a4, 4080
1535 ; RV64I-NEXT: and a3, a3, a4
1536 ; RV64I-NEXT: srli a5, a0, 8
1537 ; RV64I-NEXT: srliw a5, a5, 24
1538 ; RV64I-NEXT: slli a5, a5, 24
1539 ; RV64I-NEXT: or a3, a5, a3
1540 ; RV64I-NEXT: or a1, a3, a1
1541 ; RV64I-NEXT: and a4, a0, a4
1542 ; RV64I-NEXT: slli a4, a4, 24
1543 ; RV64I-NEXT: srliw a3, a0, 24
1544 ; RV64I-NEXT: slli a3, a3, 32
1545 ; RV64I-NEXT: or a3, a4, a3
1546 ; RV64I-NEXT: and a2, a0, a2
1547 ; RV64I-NEXT: slli a2, a2, 40
1548 ; RV64I-NEXT: slli a0, a0, 56
1549 ; RV64I-NEXT: or a0, a0, a2
1550 ; RV64I-NEXT: or a0, a0, a3
1551 ; RV64I-NEXT: or a0, a0, a1
1554 ; RV64ZBB-LABEL: bswap_i64:
1556 ; RV64ZBB-NEXT: rev8 a0, a0
1558 %1 = call i64 @llvm.bswap.i64(i64 %a)