1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV32I
4 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefix=RV32B
6 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
7 ; RUN: | FileCheck %s -check-prefix=RV32ZBB
9 declare i32 @llvm.ctlz.i32(i32, i1)
11 define i32 @ctlz_i32(i32 %a) nounwind {
12 ; RV32I-LABEL: ctlz_i32:
14 ; RV32I-NEXT: addi sp, sp, -16
15 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
16 ; RV32I-NEXT: beqz a0, .LBB0_2
17 ; RV32I-NEXT: # %bb.1: # %cond.false
18 ; RV32I-NEXT: srli a1, a0, 1
19 ; RV32I-NEXT: or a0, a0, a1
20 ; RV32I-NEXT: srli a1, a0, 2
21 ; RV32I-NEXT: or a0, a0, a1
22 ; RV32I-NEXT: srli a1, a0, 4
23 ; RV32I-NEXT: or a0, a0, a1
24 ; RV32I-NEXT: srli a1, a0, 8
25 ; RV32I-NEXT: or a0, a0, a1
26 ; RV32I-NEXT: srli a1, a0, 16
27 ; RV32I-NEXT: or a0, a0, a1
28 ; RV32I-NEXT: not a0, a0
29 ; RV32I-NEXT: srli a1, a0, 1
30 ; RV32I-NEXT: lui a2, 349525
31 ; RV32I-NEXT: addi a2, a2, 1365
32 ; RV32I-NEXT: and a1, a1, a2
33 ; RV32I-NEXT: sub a0, a0, a1
34 ; RV32I-NEXT: lui a1, 209715
35 ; RV32I-NEXT: addi a1, a1, 819
36 ; RV32I-NEXT: and a2, a0, a1
37 ; RV32I-NEXT: srli a0, a0, 2
38 ; RV32I-NEXT: and a0, a0, a1
39 ; RV32I-NEXT: add a0, a2, a0
40 ; RV32I-NEXT: srli a1, a0, 4
41 ; RV32I-NEXT: add a0, a0, a1
42 ; RV32I-NEXT: lui a1, 61681
43 ; RV32I-NEXT: addi a1, a1, -241
44 ; RV32I-NEXT: and a0, a0, a1
45 ; RV32I-NEXT: lui a1, 4112
46 ; RV32I-NEXT: addi a1, a1, 257
47 ; RV32I-NEXT: call __mulsi3@plt
48 ; RV32I-NEXT: srli a0, a0, 24
49 ; RV32I-NEXT: j .LBB0_3
50 ; RV32I-NEXT: .LBB0_2:
51 ; RV32I-NEXT: addi a0, zero, 32
52 ; RV32I-NEXT: .LBB0_3: # %cond.end
53 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
54 ; RV32I-NEXT: addi sp, sp, 16
57 ; RV32B-LABEL: ctlz_i32:
59 ; RV32B-NEXT: clz a0, a0
62 ; RV32ZBB-LABEL: ctlz_i32:
64 ; RV32ZBB-NEXT: clz a0, a0
66 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
70 declare i64 @llvm.ctlz.i64(i64, i1)
72 define i64 @ctlz_i64(i64 %a) nounwind {
73 ; RV32I-LABEL: ctlz_i64:
75 ; RV32I-NEXT: addi sp, sp, -32
76 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
77 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
78 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
79 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
80 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
81 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
82 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
83 ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
84 ; RV32I-NEXT: mv s3, a1
85 ; RV32I-NEXT: mv s4, a0
86 ; RV32I-NEXT: srli a0, a1, 1
87 ; RV32I-NEXT: or a0, a1, a0
88 ; RV32I-NEXT: srli a1, a0, 2
89 ; RV32I-NEXT: or a0, a0, a1
90 ; RV32I-NEXT: srli a1, a0, 4
91 ; RV32I-NEXT: or a0, a0, a1
92 ; RV32I-NEXT: srli a1, a0, 8
93 ; RV32I-NEXT: or a0, a0, a1
94 ; RV32I-NEXT: srli a1, a0, 16
95 ; RV32I-NEXT: or a0, a0, a1
96 ; RV32I-NEXT: not a0, a0
97 ; RV32I-NEXT: srli a1, a0, 1
98 ; RV32I-NEXT: lui a2, 349525
99 ; RV32I-NEXT: addi s5, a2, 1365
100 ; RV32I-NEXT: and a1, a1, s5
101 ; RV32I-NEXT: sub a0, a0, a1
102 ; RV32I-NEXT: lui a1, 209715
103 ; RV32I-NEXT: addi s1, a1, 819
104 ; RV32I-NEXT: and a1, a0, s1
105 ; RV32I-NEXT: srli a0, a0, 2
106 ; RV32I-NEXT: and a0, a0, s1
107 ; RV32I-NEXT: add a0, a1, a0
108 ; RV32I-NEXT: srli a1, a0, 4
109 ; RV32I-NEXT: add a0, a0, a1
110 ; RV32I-NEXT: lui a1, 61681
111 ; RV32I-NEXT: addi s6, a1, -241
112 ; RV32I-NEXT: and a0, a0, s6
113 ; RV32I-NEXT: lui a1, 4112
114 ; RV32I-NEXT: addi s0, a1, 257
115 ; RV32I-NEXT: mv a1, s0
116 ; RV32I-NEXT: call __mulsi3@plt
117 ; RV32I-NEXT: mv s2, a0
118 ; RV32I-NEXT: srli a0, s4, 1
119 ; RV32I-NEXT: or a0, s4, a0
120 ; RV32I-NEXT: srli a1, a0, 2
121 ; RV32I-NEXT: or a0, a0, a1
122 ; RV32I-NEXT: srli a1, a0, 4
123 ; RV32I-NEXT: or a0, a0, a1
124 ; RV32I-NEXT: srli a1, a0, 8
125 ; RV32I-NEXT: or a0, a0, a1
126 ; RV32I-NEXT: srli a1, a0, 16
127 ; RV32I-NEXT: or a0, a0, a1
128 ; RV32I-NEXT: not a0, a0
129 ; RV32I-NEXT: srli a1, a0, 1
130 ; RV32I-NEXT: and a1, a1, s5
131 ; RV32I-NEXT: sub a0, a0, a1
132 ; RV32I-NEXT: and a1, a0, s1
133 ; RV32I-NEXT: srli a0, a0, 2
134 ; RV32I-NEXT: and a0, a0, s1
135 ; RV32I-NEXT: add a0, a1, a0
136 ; RV32I-NEXT: srli a1, a0, 4
137 ; RV32I-NEXT: add a0, a0, a1
138 ; RV32I-NEXT: and a0, a0, s6
139 ; RV32I-NEXT: mv a1, s0
140 ; RV32I-NEXT: call __mulsi3@plt
141 ; RV32I-NEXT: bnez s3, .LBB1_2
142 ; RV32I-NEXT: # %bb.1:
143 ; RV32I-NEXT: srli a0, a0, 24
144 ; RV32I-NEXT: addi a0, a0, 32
145 ; RV32I-NEXT: j .LBB1_3
146 ; RV32I-NEXT: .LBB1_2:
147 ; RV32I-NEXT: srli a0, s2, 24
148 ; RV32I-NEXT: .LBB1_3:
149 ; RV32I-NEXT: mv a1, zero
150 ; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
151 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
152 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
153 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
154 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
155 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
156 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
157 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
158 ; RV32I-NEXT: addi sp, sp, 32
161 ; RV32B-LABEL: ctlz_i64:
163 ; RV32B-NEXT: clz a2, a1
164 ; RV32B-NEXT: clz a0, a0
165 ; RV32B-NEXT: addi a0, a0, 32
166 ; RV32B-NEXT: cmov a0, a1, a2, a0
167 ; RV32B-NEXT: mv a1, zero
170 ; RV32ZBB-LABEL: ctlz_i64:
172 ; RV32ZBB-NEXT: bnez a1, .LBB1_2
173 ; RV32ZBB-NEXT: # %bb.1:
174 ; RV32ZBB-NEXT: clz a0, a0
175 ; RV32ZBB-NEXT: addi a0, a0, 32
176 ; RV32ZBB-NEXT: mv a1, zero
178 ; RV32ZBB-NEXT: .LBB1_2:
179 ; RV32ZBB-NEXT: clz a0, a1
180 ; RV32ZBB-NEXT: mv a1, zero
182 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
186 declare i32 @llvm.cttz.i32(i32, i1)
188 define i32 @cttz_i32(i32 %a) nounwind {
189 ; RV32I-LABEL: cttz_i32:
191 ; RV32I-NEXT: addi sp, sp, -16
192 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
193 ; RV32I-NEXT: beqz a0, .LBB2_2
194 ; RV32I-NEXT: # %bb.1: # %cond.false
195 ; RV32I-NEXT: addi a1, a0, -1
196 ; RV32I-NEXT: not a0, a0
197 ; RV32I-NEXT: and a0, a0, a1
198 ; RV32I-NEXT: srli a1, a0, 1
199 ; RV32I-NEXT: lui a2, 349525
200 ; RV32I-NEXT: addi a2, a2, 1365
201 ; RV32I-NEXT: and a1, a1, a2
202 ; RV32I-NEXT: sub a0, a0, a1
203 ; RV32I-NEXT: lui a1, 209715
204 ; RV32I-NEXT: addi a1, a1, 819
205 ; RV32I-NEXT: and a2, a0, a1
206 ; RV32I-NEXT: srli a0, a0, 2
207 ; RV32I-NEXT: and a0, a0, a1
208 ; RV32I-NEXT: add a0, a2, a0
209 ; RV32I-NEXT: srli a1, a0, 4
210 ; RV32I-NEXT: add a0, a0, a1
211 ; RV32I-NEXT: lui a1, 61681
212 ; RV32I-NEXT: addi a1, a1, -241
213 ; RV32I-NEXT: and a0, a0, a1
214 ; RV32I-NEXT: lui a1, 4112
215 ; RV32I-NEXT: addi a1, a1, 257
216 ; RV32I-NEXT: call __mulsi3@plt
217 ; RV32I-NEXT: srli a0, a0, 24
218 ; RV32I-NEXT: j .LBB2_3
219 ; RV32I-NEXT: .LBB2_2:
220 ; RV32I-NEXT: addi a0, zero, 32
221 ; RV32I-NEXT: .LBB2_3: # %cond.end
222 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
223 ; RV32I-NEXT: addi sp, sp, 16
226 ; RV32B-LABEL: cttz_i32:
228 ; RV32B-NEXT: ctz a0, a0
231 ; RV32ZBB-LABEL: cttz_i32:
233 ; RV32ZBB-NEXT: ctz a0, a0
235 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
239 declare i64 @llvm.cttz.i64(i64, i1)
241 define i64 @cttz_i64(i64 %a) nounwind {
242 ; RV32I-LABEL: cttz_i64:
244 ; RV32I-NEXT: addi sp, sp, -32
245 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
246 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
247 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
248 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
249 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
250 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
251 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
252 ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
253 ; RV32I-NEXT: mv s3, a1
254 ; RV32I-NEXT: mv s4, a0
255 ; RV32I-NEXT: addi a0, a0, -1
256 ; RV32I-NEXT: not a1, s4
257 ; RV32I-NEXT: and a0, a1, a0
258 ; RV32I-NEXT: srli a1, a0, 1
259 ; RV32I-NEXT: lui a2, 349525
260 ; RV32I-NEXT: addi s5, a2, 1365
261 ; RV32I-NEXT: and a1, a1, s5
262 ; RV32I-NEXT: sub a0, a0, a1
263 ; RV32I-NEXT: lui a1, 209715
264 ; RV32I-NEXT: addi s0, a1, 819
265 ; RV32I-NEXT: and a1, a0, s0
266 ; RV32I-NEXT: srli a0, a0, 2
267 ; RV32I-NEXT: and a0, a0, s0
268 ; RV32I-NEXT: add a0, a1, a0
269 ; RV32I-NEXT: srli a1, a0, 4
270 ; RV32I-NEXT: add a0, a0, a1
271 ; RV32I-NEXT: lui a1, 61681
272 ; RV32I-NEXT: addi s6, a1, -241
273 ; RV32I-NEXT: and a0, a0, s6
274 ; RV32I-NEXT: lui a1, 4112
275 ; RV32I-NEXT: addi s1, a1, 257
276 ; RV32I-NEXT: mv a1, s1
277 ; RV32I-NEXT: call __mulsi3@plt
278 ; RV32I-NEXT: mv s2, a0
279 ; RV32I-NEXT: addi a0, s3, -1
280 ; RV32I-NEXT: not a1, s3
281 ; RV32I-NEXT: and a0, a1, a0
282 ; RV32I-NEXT: srli a1, a0, 1
283 ; RV32I-NEXT: and a1, a1, s5
284 ; RV32I-NEXT: sub a0, a0, a1
285 ; RV32I-NEXT: and a1, a0, s0
286 ; RV32I-NEXT: srli a0, a0, 2
287 ; RV32I-NEXT: and a0, a0, s0
288 ; RV32I-NEXT: add a0, a1, a0
289 ; RV32I-NEXT: srli a1, a0, 4
290 ; RV32I-NEXT: add a0, a0, a1
291 ; RV32I-NEXT: and a0, a0, s6
292 ; RV32I-NEXT: mv a1, s1
293 ; RV32I-NEXT: call __mulsi3@plt
294 ; RV32I-NEXT: bnez s4, .LBB3_2
295 ; RV32I-NEXT: # %bb.1:
296 ; RV32I-NEXT: srli a0, a0, 24
297 ; RV32I-NEXT: addi a0, a0, 32
298 ; RV32I-NEXT: j .LBB3_3
299 ; RV32I-NEXT: .LBB3_2:
300 ; RV32I-NEXT: srli a0, s2, 24
301 ; RV32I-NEXT: .LBB3_3:
302 ; RV32I-NEXT: mv a1, zero
303 ; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
304 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
305 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
306 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
307 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
308 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
309 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
310 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
311 ; RV32I-NEXT: addi sp, sp, 32
314 ; RV32B-LABEL: cttz_i64:
316 ; RV32B-NEXT: ctz a2, a0
317 ; RV32B-NEXT: ctz a1, a1
318 ; RV32B-NEXT: addi a1, a1, 32
319 ; RV32B-NEXT: cmov a0, a0, a2, a1
320 ; RV32B-NEXT: mv a1, zero
323 ; RV32ZBB-LABEL: cttz_i64:
325 ; RV32ZBB-NEXT: bnez a0, .LBB3_2
326 ; RV32ZBB-NEXT: # %bb.1:
327 ; RV32ZBB-NEXT: ctz a0, a1
328 ; RV32ZBB-NEXT: addi a0, a0, 32
329 ; RV32ZBB-NEXT: mv a1, zero
331 ; RV32ZBB-NEXT: .LBB3_2:
332 ; RV32ZBB-NEXT: ctz a0, a0
333 ; RV32ZBB-NEXT: mv a1, zero
335 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
339 declare i32 @llvm.ctpop.i32(i32)
341 define i32 @ctpop_i32(i32 %a) nounwind {
342 ; RV32I-LABEL: ctpop_i32:
344 ; RV32I-NEXT: addi sp, sp, -16
345 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
346 ; RV32I-NEXT: srli a1, a0, 1
347 ; RV32I-NEXT: lui a2, 349525
348 ; RV32I-NEXT: addi a2, a2, 1365
349 ; RV32I-NEXT: and a1, a1, a2
350 ; RV32I-NEXT: sub a0, a0, a1
351 ; RV32I-NEXT: lui a1, 209715
352 ; RV32I-NEXT: addi a1, a1, 819
353 ; RV32I-NEXT: and a2, a0, a1
354 ; RV32I-NEXT: srli a0, a0, 2
355 ; RV32I-NEXT: and a0, a0, a1
356 ; RV32I-NEXT: add a0, a2, a0
357 ; RV32I-NEXT: srli a1, a0, 4
358 ; RV32I-NEXT: add a0, a0, a1
359 ; RV32I-NEXT: lui a1, 61681
360 ; RV32I-NEXT: addi a1, a1, -241
361 ; RV32I-NEXT: and a0, a0, a1
362 ; RV32I-NEXT: lui a1, 4112
363 ; RV32I-NEXT: addi a1, a1, 257
364 ; RV32I-NEXT: call __mulsi3@plt
365 ; RV32I-NEXT: srli a0, a0, 24
366 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
367 ; RV32I-NEXT: addi sp, sp, 16
370 ; RV32B-LABEL: ctpop_i32:
372 ; RV32B-NEXT: cpop a0, a0
375 ; RV32ZBB-LABEL: ctpop_i32:
377 ; RV32ZBB-NEXT: cpop a0, a0
379 %1 = call i32 @llvm.ctpop.i32(i32 %a)
383 declare i64 @llvm.ctpop.i64(i64)
385 define i64 @ctpop_i64(i64 %a) nounwind {
386 ; RV32I-LABEL: ctpop_i64:
388 ; RV32I-NEXT: addi sp, sp, -32
389 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
390 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
391 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
392 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
393 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
394 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
395 ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
396 ; RV32I-NEXT: mv s2, a0
397 ; RV32I-NEXT: srli a0, a1, 1
398 ; RV32I-NEXT: lui a2, 349525
399 ; RV32I-NEXT: addi s3, a2, 1365
400 ; RV32I-NEXT: and a0, a0, s3
401 ; RV32I-NEXT: sub a0, a1, a0
402 ; RV32I-NEXT: lui a1, 209715
403 ; RV32I-NEXT: addi s0, a1, 819
404 ; RV32I-NEXT: and a1, a0, s0
405 ; RV32I-NEXT: srli a0, a0, 2
406 ; RV32I-NEXT: and a0, a0, s0
407 ; RV32I-NEXT: add a0, a1, a0
408 ; RV32I-NEXT: srli a1, a0, 4
409 ; RV32I-NEXT: add a0, a0, a1
410 ; RV32I-NEXT: lui a1, 61681
411 ; RV32I-NEXT: addi s4, a1, -241
412 ; RV32I-NEXT: and a0, a0, s4
413 ; RV32I-NEXT: lui a1, 4112
414 ; RV32I-NEXT: addi s1, a1, 257
415 ; RV32I-NEXT: mv a1, s1
416 ; RV32I-NEXT: call __mulsi3@plt
417 ; RV32I-NEXT: srli s5, a0, 24
418 ; RV32I-NEXT: srli a0, s2, 1
419 ; RV32I-NEXT: and a0, a0, s3
420 ; RV32I-NEXT: sub a0, s2, a0
421 ; RV32I-NEXT: and a1, a0, s0
422 ; RV32I-NEXT: srli a0, a0, 2
423 ; RV32I-NEXT: and a0, a0, s0
424 ; RV32I-NEXT: add a0, a1, a0
425 ; RV32I-NEXT: srli a1, a0, 4
426 ; RV32I-NEXT: add a0, a0, a1
427 ; RV32I-NEXT: and a0, a0, s4
428 ; RV32I-NEXT: mv a1, s1
429 ; RV32I-NEXT: call __mulsi3@plt
430 ; RV32I-NEXT: srli a0, a0, 24
431 ; RV32I-NEXT: add a0, a0, s5
432 ; RV32I-NEXT: mv a1, zero
433 ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
434 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
435 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
436 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
437 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
438 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
439 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
440 ; RV32I-NEXT: addi sp, sp, 32
443 ; RV32B-LABEL: ctpop_i64:
445 ; RV32B-NEXT: cpop a1, a1
446 ; RV32B-NEXT: cpop a0, a0
447 ; RV32B-NEXT: add a0, a0, a1
448 ; RV32B-NEXT: mv a1, zero
451 ; RV32ZBB-LABEL: ctpop_i64:
453 ; RV32ZBB-NEXT: cpop a1, a1
454 ; RV32ZBB-NEXT: cpop a0, a0
455 ; RV32ZBB-NEXT: add a0, a0, a1
456 ; RV32ZBB-NEXT: mv a1, zero
458 %1 = call i64 @llvm.ctpop.i64(i64 %a)
462 define i32 @sextb_i32(i32 %a) nounwind {
463 ; RV32I-LABEL: sextb_i32:
465 ; RV32I-NEXT: slli a0, a0, 24
466 ; RV32I-NEXT: srai a0, a0, 24
469 ; RV32B-LABEL: sextb_i32:
471 ; RV32B-NEXT: sext.b a0, a0
474 ; RV32ZBB-LABEL: sextb_i32:
476 ; RV32ZBB-NEXT: sext.b a0, a0
478 %shl = shl i32 %a, 24
479 %shr = ashr exact i32 %shl, 24
483 define i64 @sextb_i64(i64 %a) nounwind {
484 ; RV32I-LABEL: sextb_i64:
486 ; RV32I-NEXT: slli a1, a0, 24
487 ; RV32I-NEXT: srai a0, a1, 24
488 ; RV32I-NEXT: srai a1, a1, 31
491 ; RV32B-LABEL: sextb_i64:
493 ; RV32B-NEXT: sext.b a0, a0
494 ; RV32B-NEXT: srai a1, a0, 31
497 ; RV32ZBB-LABEL: sextb_i64:
499 ; RV32ZBB-NEXT: sext.b a0, a0
500 ; RV32ZBB-NEXT: srai a1, a0, 31
502 %shl = shl i64 %a, 56
503 %shr = ashr exact i64 %shl, 56
507 define i32 @sexth_i32(i32 %a) nounwind {
508 ; RV32I-LABEL: sexth_i32:
510 ; RV32I-NEXT: slli a0, a0, 16
511 ; RV32I-NEXT: srai a0, a0, 16
514 ; RV32B-LABEL: sexth_i32:
516 ; RV32B-NEXT: sext.h a0, a0
519 ; RV32ZBB-LABEL: sexth_i32:
521 ; RV32ZBB-NEXT: sext.h a0, a0
523 %shl = shl i32 %a, 16
524 %shr = ashr exact i32 %shl, 16
528 define i64 @sexth_i64(i64 %a) nounwind {
529 ; RV32I-LABEL: sexth_i64:
531 ; RV32I-NEXT: slli a1, a0, 16
532 ; RV32I-NEXT: srai a0, a1, 16
533 ; RV32I-NEXT: srai a1, a1, 31
536 ; RV32B-LABEL: sexth_i64:
538 ; RV32B-NEXT: sext.h a0, a0
539 ; RV32B-NEXT: srai a1, a0, 31
542 ; RV32ZBB-LABEL: sexth_i64:
544 ; RV32ZBB-NEXT: sext.h a0, a0
545 ; RV32ZBB-NEXT: srai a1, a0, 31
547 %shl = shl i64 %a, 48
548 %shr = ashr exact i64 %shl, 48
552 define i32 @min_i32(i32 %a, i32 %b) nounwind {
553 ; RV32I-LABEL: min_i32:
555 ; RV32I-NEXT: blt a0, a1, .LBB10_2
556 ; RV32I-NEXT: # %bb.1:
557 ; RV32I-NEXT: mv a0, a1
558 ; RV32I-NEXT: .LBB10_2:
561 ; RV32B-LABEL: min_i32:
563 ; RV32B-NEXT: min a0, a0, a1
566 ; RV32ZBB-LABEL: min_i32:
568 ; RV32ZBB-NEXT: min a0, a0, a1
570 %cmp = icmp slt i32 %a, %b
571 %cond = select i1 %cmp, i32 %a, i32 %b
575 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
576 ; don't have yet any matching bit manipulation instructions on RV32.
577 ; This test is presented here in case future expansions of the experimental-b
578 ; extension introduce instructions suitable for this pattern.
580 define i64 @min_i64(i64 %a, i64 %b) nounwind {
581 ; RV32I-LABEL: min_i64:
583 ; RV32I-NEXT: beq a1, a3, .LBB11_2
584 ; RV32I-NEXT: # %bb.1:
585 ; RV32I-NEXT: slt a4, a1, a3
586 ; RV32I-NEXT: beqz a4, .LBB11_3
587 ; RV32I-NEXT: j .LBB11_4
588 ; RV32I-NEXT: .LBB11_2:
589 ; RV32I-NEXT: sltu a4, a0, a2
590 ; RV32I-NEXT: bnez a4, .LBB11_4
591 ; RV32I-NEXT: .LBB11_3:
592 ; RV32I-NEXT: mv a0, a2
593 ; RV32I-NEXT: mv a1, a3
594 ; RV32I-NEXT: .LBB11_4:
597 ; RV32B-LABEL: min_i64:
599 ; RV32B-NEXT: slt a4, a1, a3
600 ; RV32B-NEXT: cmov a4, a4, a0, a2
601 ; RV32B-NEXT: minu a0, a0, a2
602 ; RV32B-NEXT: xor a2, a1, a3
603 ; RV32B-NEXT: cmov a0, a2, a4, a0
604 ; RV32B-NEXT: min a1, a1, a3
607 ; RV32ZBB-LABEL: min_i64:
609 ; RV32ZBB-NEXT: mv a4, a0
610 ; RV32ZBB-NEXT: bge a1, a3, .LBB11_3
611 ; RV32ZBB-NEXT: # %bb.1:
612 ; RV32ZBB-NEXT: beq a1, a3, .LBB11_4
613 ; RV32ZBB-NEXT: .LBB11_2:
614 ; RV32ZBB-NEXT: min a1, a1, a3
616 ; RV32ZBB-NEXT: .LBB11_3:
617 ; RV32ZBB-NEXT: mv a0, a2
618 ; RV32ZBB-NEXT: bne a1, a3, .LBB11_2
619 ; RV32ZBB-NEXT: .LBB11_4:
620 ; RV32ZBB-NEXT: minu a0, a4, a2
621 ; RV32ZBB-NEXT: min a1, a1, a3
623 %cmp = icmp slt i64 %a, %b
624 %cond = select i1 %cmp, i64 %a, i64 %b
628 define i32 @max_i32(i32 %a, i32 %b) nounwind {
629 ; RV32I-LABEL: max_i32:
631 ; RV32I-NEXT: blt a1, a0, .LBB12_2
632 ; RV32I-NEXT: # %bb.1:
633 ; RV32I-NEXT: mv a0, a1
634 ; RV32I-NEXT: .LBB12_2:
637 ; RV32B-LABEL: max_i32:
639 ; RV32B-NEXT: max a0, a0, a1
642 ; RV32ZBB-LABEL: max_i32:
644 ; RV32ZBB-NEXT: max a0, a0, a1
646 %cmp = icmp sgt i32 %a, %b
647 %cond = select i1 %cmp, i32 %a, i32 %b
651 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
652 ; don't have yet any matching bit manipulation instructions on RV32.
653 ; This test is presented here in case future expansions of the experimental-b
654 ; extension introduce instructions suitable for this pattern.
656 define i64 @max_i64(i64 %a, i64 %b) nounwind {
657 ; RV32I-LABEL: max_i64:
659 ; RV32I-NEXT: beq a1, a3, .LBB13_2
660 ; RV32I-NEXT: # %bb.1:
661 ; RV32I-NEXT: slt a4, a3, a1
662 ; RV32I-NEXT: beqz a4, .LBB13_3
663 ; RV32I-NEXT: j .LBB13_4
664 ; RV32I-NEXT: .LBB13_2:
665 ; RV32I-NEXT: sltu a4, a2, a0
666 ; RV32I-NEXT: bnez a4, .LBB13_4
667 ; RV32I-NEXT: .LBB13_3:
668 ; RV32I-NEXT: mv a0, a2
669 ; RV32I-NEXT: mv a1, a3
670 ; RV32I-NEXT: .LBB13_4:
673 ; RV32B-LABEL: max_i64:
675 ; RV32B-NEXT: slt a4, a3, a1
676 ; RV32B-NEXT: cmov a4, a4, a0, a2
677 ; RV32B-NEXT: maxu a0, a0, a2
678 ; RV32B-NEXT: xor a2, a1, a3
679 ; RV32B-NEXT: cmov a0, a2, a4, a0
680 ; RV32B-NEXT: max a1, a1, a3
683 ; RV32ZBB-LABEL: max_i64:
685 ; RV32ZBB-NEXT: mv a4, a0
686 ; RV32ZBB-NEXT: bge a3, a1, .LBB13_3
687 ; RV32ZBB-NEXT: # %bb.1:
688 ; RV32ZBB-NEXT: beq a1, a3, .LBB13_4
689 ; RV32ZBB-NEXT: .LBB13_2:
690 ; RV32ZBB-NEXT: max a1, a1, a3
692 ; RV32ZBB-NEXT: .LBB13_3:
693 ; RV32ZBB-NEXT: mv a0, a2
694 ; RV32ZBB-NEXT: bne a1, a3, .LBB13_2
695 ; RV32ZBB-NEXT: .LBB13_4:
696 ; RV32ZBB-NEXT: maxu a0, a4, a2
697 ; RV32ZBB-NEXT: max a1, a1, a3
699 %cmp = icmp sgt i64 %a, %b
700 %cond = select i1 %cmp, i64 %a, i64 %b
704 define i32 @minu_i32(i32 %a, i32 %b) nounwind {
705 ; RV32I-LABEL: minu_i32:
707 ; RV32I-NEXT: bltu a0, a1, .LBB14_2
708 ; RV32I-NEXT: # %bb.1:
709 ; RV32I-NEXT: mv a0, a1
710 ; RV32I-NEXT: .LBB14_2:
713 ; RV32B-LABEL: minu_i32:
715 ; RV32B-NEXT: minu a0, a0, a1
718 ; RV32ZBB-LABEL: minu_i32:
720 ; RV32ZBB-NEXT: minu a0, a0, a1
722 %cmp = icmp ult i32 %a, %b
723 %cond = select i1 %cmp, i32 %a, i32 %b
727 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
728 ; don't have yet any matching bit manipulation instructions on RV32.
729 ; This test is presented here in case future expansions of the experimental-b
730 ; extension introduce instructions suitable for this pattern.
732 define i64 @minu_i64(i64 %a, i64 %b) nounwind {
733 ; RV32I-LABEL: minu_i64:
735 ; RV32I-NEXT: beq a1, a3, .LBB15_2
736 ; RV32I-NEXT: # %bb.1:
737 ; RV32I-NEXT: sltu a4, a1, a3
738 ; RV32I-NEXT: beqz a4, .LBB15_3
739 ; RV32I-NEXT: j .LBB15_4
740 ; RV32I-NEXT: .LBB15_2:
741 ; RV32I-NEXT: sltu a4, a0, a2
742 ; RV32I-NEXT: bnez a4, .LBB15_4
743 ; RV32I-NEXT: .LBB15_3:
744 ; RV32I-NEXT: mv a0, a2
745 ; RV32I-NEXT: mv a1, a3
746 ; RV32I-NEXT: .LBB15_4:
749 ; RV32B-LABEL: minu_i64:
751 ; RV32B-NEXT: sltu a4, a1, a3
752 ; RV32B-NEXT: cmov a4, a4, a0, a2
753 ; RV32B-NEXT: minu a0, a0, a2
754 ; RV32B-NEXT: xor a2, a1, a3
755 ; RV32B-NEXT: cmov a0, a2, a4, a0
756 ; RV32B-NEXT: minu a1, a1, a3
759 ; RV32ZBB-LABEL: minu_i64:
761 ; RV32ZBB-NEXT: mv a4, a0
762 ; RV32ZBB-NEXT: bgeu a1, a3, .LBB15_3
763 ; RV32ZBB-NEXT: # %bb.1:
764 ; RV32ZBB-NEXT: beq a1, a3, .LBB15_4
765 ; RV32ZBB-NEXT: .LBB15_2:
766 ; RV32ZBB-NEXT: minu a1, a1, a3
768 ; RV32ZBB-NEXT: .LBB15_3:
769 ; RV32ZBB-NEXT: mv a0, a2
770 ; RV32ZBB-NEXT: bne a1, a3, .LBB15_2
771 ; RV32ZBB-NEXT: .LBB15_4:
772 ; RV32ZBB-NEXT: minu a0, a4, a2
773 ; RV32ZBB-NEXT: minu a1, a1, a3
775 %cmp = icmp ult i64 %a, %b
776 %cond = select i1 %cmp, i64 %a, i64 %b
780 define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
781 ; RV32I-LABEL: maxu_i32:
783 ; RV32I-NEXT: bltu a1, a0, .LBB16_2
784 ; RV32I-NEXT: # %bb.1:
785 ; RV32I-NEXT: mv a0, a1
786 ; RV32I-NEXT: .LBB16_2:
789 ; RV32B-LABEL: maxu_i32:
791 ; RV32B-NEXT: maxu a0, a0, a1
794 ; RV32ZBB-LABEL: maxu_i32:
796 ; RV32ZBB-NEXT: maxu a0, a0, a1
798 %cmp = icmp ugt i32 %a, %b
799 %cond = select i1 %cmp, i32 %a, i32 %b
803 ; As we are not matching directly i64 code patterns on RV32 some i64 patterns
804 ; don't have yet any matching bit manipulation instructions on RV32.
805 ; This test is presented here in case future expansions of the experimental-b
806 ; extension introduce instructions suitable for this pattern.
808 define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
809 ; RV32I-LABEL: maxu_i64:
811 ; RV32I-NEXT: beq a1, a3, .LBB17_2
812 ; RV32I-NEXT: # %bb.1:
813 ; RV32I-NEXT: sltu a4, a3, a1
814 ; RV32I-NEXT: beqz a4, .LBB17_3
815 ; RV32I-NEXT: j .LBB17_4
816 ; RV32I-NEXT: .LBB17_2:
817 ; RV32I-NEXT: sltu a4, a2, a0
818 ; RV32I-NEXT: bnez a4, .LBB17_4
819 ; RV32I-NEXT: .LBB17_3:
820 ; RV32I-NEXT: mv a0, a2
821 ; RV32I-NEXT: mv a1, a3
822 ; RV32I-NEXT: .LBB17_4:
825 ; RV32B-LABEL: maxu_i64:
827 ; RV32B-NEXT: sltu a4, a3, a1
828 ; RV32B-NEXT: cmov a4, a4, a0, a2
829 ; RV32B-NEXT: maxu a0, a0, a2
830 ; RV32B-NEXT: xor a2, a1, a3
831 ; RV32B-NEXT: cmov a0, a2, a4, a0
832 ; RV32B-NEXT: maxu a1, a1, a3
835 ; RV32ZBB-LABEL: maxu_i64:
837 ; RV32ZBB-NEXT: mv a4, a0
838 ; RV32ZBB-NEXT: bgeu a3, a1, .LBB17_3
839 ; RV32ZBB-NEXT: # %bb.1:
840 ; RV32ZBB-NEXT: beq a1, a3, .LBB17_4
841 ; RV32ZBB-NEXT: .LBB17_2:
842 ; RV32ZBB-NEXT: maxu a1, a1, a3
844 ; RV32ZBB-NEXT: .LBB17_3:
845 ; RV32ZBB-NEXT: mv a0, a2
846 ; RV32ZBB-NEXT: bne a1, a3, .LBB17_2
847 ; RV32ZBB-NEXT: .LBB17_4:
848 ; RV32ZBB-NEXT: maxu a0, a4, a2
849 ; RV32ZBB-NEXT: maxu a1, a1, a3
851 %cmp = icmp ugt i64 %a, %b
852 %cond = select i1 %cmp, i64 %a, i64 %b
856 declare i32 @llvm.abs.i32(i32, i1 immarg)
858 define i32 @abs_i32(i32 %x) {
859 ; RV32I-LABEL: abs_i32:
861 ; RV32I-NEXT: srai a1, a0, 31
862 ; RV32I-NEXT: add a0, a0, a1
863 ; RV32I-NEXT: xor a0, a0, a1
866 ; RV32B-LABEL: abs_i32:
868 ; RV32B-NEXT: neg a1, a0
869 ; RV32B-NEXT: max a0, a0, a1
872 ; RV32ZBB-LABEL: abs_i32:
874 ; RV32ZBB-NEXT: neg a1, a0
875 ; RV32ZBB-NEXT: max a0, a0, a1
877 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
881 declare i64 @llvm.abs.i64(i64, i1 immarg)
883 define i64 @abs_i64(i64 %x) {
884 ; RV32I-LABEL: abs_i64:
886 ; RV32I-NEXT: bgez a1, .LBB19_2
887 ; RV32I-NEXT: # %bb.1:
888 ; RV32I-NEXT: snez a2, a0
889 ; RV32I-NEXT: neg a0, a0
890 ; RV32I-NEXT: add a1, a1, a2
891 ; RV32I-NEXT: neg a1, a1
892 ; RV32I-NEXT: .LBB19_2:
895 ; RV32B-LABEL: abs_i64:
897 ; RV32B-NEXT: neg a2, a0
898 ; RV32B-NEXT: slti a3, a1, 0
899 ; RV32B-NEXT: cmov a2, a3, a2, a0
900 ; RV32B-NEXT: snez a0, a0
901 ; RV32B-NEXT: add a0, a1, a0
902 ; RV32B-NEXT: neg a0, a0
903 ; RV32B-NEXT: cmov a1, a3, a0, a1
904 ; RV32B-NEXT: mv a0, a2
907 ; RV32ZBB-LABEL: abs_i64:
909 ; RV32ZBB-NEXT: bgez a1, .LBB19_2
910 ; RV32ZBB-NEXT: # %bb.1:
911 ; RV32ZBB-NEXT: snez a2, a0
912 ; RV32ZBB-NEXT: neg a0, a0
913 ; RV32ZBB-NEXT: add a1, a1, a2
914 ; RV32ZBB-NEXT: neg a1, a1
915 ; RV32ZBB-NEXT: .LBB19_2:
917 %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
921 define i32 @zexth_i32(i32 %a) nounwind {
922 ; RV32I-LABEL: zexth_i32:
924 ; RV32I-NEXT: lui a1, 16
925 ; RV32I-NEXT: addi a1, a1, -1
926 ; RV32I-NEXT: and a0, a0, a1
929 ; RV32B-LABEL: zexth_i32:
931 ; RV32B-NEXT: zext.h a0, a0
934 ; RV32ZBB-LABEL: zexth_i32:
936 ; RV32ZBB-NEXT: zext.h a0, a0
938 %and = and i32 %a, 65535
942 define i64 @zexth_i64(i64 %a) nounwind {
943 ; RV32I-LABEL: zexth_i64:
945 ; RV32I-NEXT: lui a1, 16
946 ; RV32I-NEXT: addi a1, a1, -1
947 ; RV32I-NEXT: and a0, a0, a1
948 ; RV32I-NEXT: mv a1, zero
951 ; RV32B-LABEL: zexth_i64:
953 ; RV32B-NEXT: zext.h a0, a0
954 ; RV32B-NEXT: mv a1, zero
957 ; RV32ZBB-LABEL: zexth_i64:
959 ; RV32ZBB-NEXT: zext.h a0, a0
960 ; RV32ZBB-NEXT: mv a1, zero
962 %and = and i64 %a, 65535
966 declare i32 @llvm.bswap.i32(i32)
968 define i32 @bswap_i32(i32 %a) nounwind {
969 ; RV32I-LABEL: bswap_i32:
971 ; RV32I-NEXT: srli a1, a0, 8
972 ; RV32I-NEXT: lui a2, 16
973 ; RV32I-NEXT: addi a2, a2, -256
974 ; RV32I-NEXT: and a1, a1, a2
975 ; RV32I-NEXT: srli a2, a0, 24
976 ; RV32I-NEXT: or a1, a1, a2
977 ; RV32I-NEXT: slli a2, a0, 8
978 ; RV32I-NEXT: lui a3, 4080
979 ; RV32I-NEXT: and a2, a2, a3
980 ; RV32I-NEXT: slli a0, a0, 24
981 ; RV32I-NEXT: or a0, a0, a2
982 ; RV32I-NEXT: or a0, a0, a1
985 ; RV32B-LABEL: bswap_i32:
987 ; RV32B-NEXT: rev8 a0, a0
990 ; RV32ZBB-LABEL: bswap_i32:
992 ; RV32ZBB-NEXT: rev8 a0, a0
994 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
998 declare i64 @llvm.bswap.i64(i64)
1000 define i64 @bswap_i64(i64 %a) {
1001 ; RV32I-LABEL: bswap_i64:
1003 ; RV32I-NEXT: srli a2, a1, 8
1004 ; RV32I-NEXT: lui a3, 16
1005 ; RV32I-NEXT: addi a3, a3, -256
1006 ; RV32I-NEXT: and a2, a2, a3
1007 ; RV32I-NEXT: srli a4, a1, 24
1008 ; RV32I-NEXT: or a2, a2, a4
1009 ; RV32I-NEXT: slli a4, a1, 8
1010 ; RV32I-NEXT: lui a5, 4080
1011 ; RV32I-NEXT: and a4, a4, a5
1012 ; RV32I-NEXT: slli a1, a1, 24
1013 ; RV32I-NEXT: or a1, a1, a4
1014 ; RV32I-NEXT: or a2, a1, a2
1015 ; RV32I-NEXT: srli a1, a0, 8
1016 ; RV32I-NEXT: and a1, a1, a3
1017 ; RV32I-NEXT: srli a3, a0, 24
1018 ; RV32I-NEXT: or a1, a1, a3
1019 ; RV32I-NEXT: slli a3, a0, 8
1020 ; RV32I-NEXT: and a3, a3, a5
1021 ; RV32I-NEXT: slli a0, a0, 24
1022 ; RV32I-NEXT: or a0, a0, a3
1023 ; RV32I-NEXT: or a1, a0, a1
1024 ; RV32I-NEXT: mv a0, a2
1027 ; RV32B-LABEL: bswap_i64:
1029 ; RV32B-NEXT: rev8 a2, a1
1030 ; RV32B-NEXT: rev8 a1, a0
1031 ; RV32B-NEXT: mv a0, a2
1034 ; RV32ZBB-LABEL: bswap_i64:
1036 ; RV32ZBB-NEXT: rev8 a2, a1
1037 ; RV32ZBB-NEXT: rev8 a1, a0
1038 ; RV32ZBB-NEXT: mv a0, a2
1040 %1 = call i64 @llvm.bswap.i64(i64 %a)