1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV64I
4 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define signext i32 @ctlz_i32(i32 signext %a) nounwind {
10 ; RV64I-LABEL: ctlz_i32:
12 ; RV64I-NEXT: beqz a0, .LBB0_2
13 ; RV64I-NEXT: # %bb.1: # %cond.false
14 ; RV64I-NEXT: addi sp, sp, -16
15 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
16 ; RV64I-NEXT: srliw a1, a0, 1
17 ; RV64I-NEXT: or a0, a0, a1
18 ; RV64I-NEXT: srliw a1, a0, 2
19 ; RV64I-NEXT: or a0, a0, a1
20 ; RV64I-NEXT: srliw a1, a0, 4
21 ; RV64I-NEXT: or a0, a0, a1
22 ; RV64I-NEXT: srliw a1, a0, 8
23 ; RV64I-NEXT: or a0, a0, a1
24 ; RV64I-NEXT: srliw a1, a0, 16
25 ; RV64I-NEXT: or a0, a0, a1
26 ; RV64I-NEXT: not a0, a0
27 ; RV64I-NEXT: srli a1, a0, 1
28 ; RV64I-NEXT: lui a2, 349525
29 ; RV64I-NEXT: addiw a2, a2, 1365
30 ; RV64I-NEXT: and a1, a1, a2
31 ; RV64I-NEXT: sub a0, a0, a1
32 ; RV64I-NEXT: lui a1, 209715
33 ; RV64I-NEXT: addiw a1, a1, 819
34 ; RV64I-NEXT: and a2, a0, a1
35 ; RV64I-NEXT: srli a0, a0, 2
36 ; RV64I-NEXT: and a0, a0, a1
37 ; RV64I-NEXT: add a0, a2, a0
38 ; RV64I-NEXT: srli a1, a0, 4
39 ; RV64I-NEXT: add a0, a0, a1
40 ; RV64I-NEXT: lui a1, 61681
41 ; RV64I-NEXT: addiw a1, a1, -241
42 ; RV64I-NEXT: and a0, a0, a1
43 ; RV64I-NEXT: lui a1, 4112
44 ; RV64I-NEXT: addiw a1, a1, 257
45 ; RV64I-NEXT: call __muldi3
46 ; RV64I-NEXT: srliw a0, a0, 24
47 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
48 ; RV64I-NEXT: addi sp, sp, 16
50 ; RV64I-NEXT: .LBB0_2:
51 ; RV64I-NEXT: li a0, 32
54 ; RV64XTHEADBB-LABEL: ctlz_i32:
55 ; RV64XTHEADBB: # %bb.0:
56 ; RV64XTHEADBB-NEXT: not a0, a0
57 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
58 ; RV64XTHEADBB-NEXT: th.ff0 a0, a0
59 ; RV64XTHEADBB-NEXT: ret
60 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
64 define signext i32 @log2_i32(i32 signext %a) nounwind {
65 ; RV64I-LABEL: log2_i32:
67 ; RV64I-NEXT: beqz a0, .LBB1_2
68 ; RV64I-NEXT: # %bb.1: # %cond.false
69 ; RV64I-NEXT: addi sp, sp, -16
70 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
71 ; RV64I-NEXT: srliw a1, a0, 1
72 ; RV64I-NEXT: or a0, a0, a1
73 ; RV64I-NEXT: srliw a1, a0, 2
74 ; RV64I-NEXT: or a0, a0, a1
75 ; RV64I-NEXT: srliw a1, a0, 4
76 ; RV64I-NEXT: or a0, a0, a1
77 ; RV64I-NEXT: srliw a1, a0, 8
78 ; RV64I-NEXT: or a0, a0, a1
79 ; RV64I-NEXT: srliw a1, a0, 16
80 ; RV64I-NEXT: or a0, a0, a1
81 ; RV64I-NEXT: not a0, a0
82 ; RV64I-NEXT: srli a1, a0, 1
83 ; RV64I-NEXT: lui a2, 349525
84 ; RV64I-NEXT: addiw a2, a2, 1365
85 ; RV64I-NEXT: and a1, a1, a2
86 ; RV64I-NEXT: sub a0, a0, a1
87 ; RV64I-NEXT: lui a1, 209715
88 ; RV64I-NEXT: addiw a1, a1, 819
89 ; RV64I-NEXT: and a2, a0, a1
90 ; RV64I-NEXT: srli a0, a0, 2
91 ; RV64I-NEXT: and a0, a0, a1
92 ; RV64I-NEXT: add a0, a2, a0
93 ; RV64I-NEXT: srli a1, a0, 4
94 ; RV64I-NEXT: add a0, a0, a1
95 ; RV64I-NEXT: lui a1, 61681
96 ; RV64I-NEXT: addiw a1, a1, -241
97 ; RV64I-NEXT: and a0, a0, a1
98 ; RV64I-NEXT: lui a1, 4112
99 ; RV64I-NEXT: addiw a1, a1, 257
100 ; RV64I-NEXT: call __muldi3
101 ; RV64I-NEXT: srliw a0, a0, 24
102 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
103 ; RV64I-NEXT: addi sp, sp, 16
104 ; RV64I-NEXT: j .LBB1_3
105 ; RV64I-NEXT: .LBB1_2:
106 ; RV64I-NEXT: li a0, 32
107 ; RV64I-NEXT: .LBB1_3: # %cond.end
108 ; RV64I-NEXT: li a1, 31
109 ; RV64I-NEXT: sub a0, a1, a0
112 ; RV64XTHEADBB-LABEL: log2_i32:
113 ; RV64XTHEADBB: # %bb.0:
114 ; RV64XTHEADBB-NEXT: not a0, a0
115 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
116 ; RV64XTHEADBB-NEXT: th.ff0 a0, a0
117 ; RV64XTHEADBB-NEXT: li a1, 31
118 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
119 ; RV64XTHEADBB-NEXT: ret
120 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
125 define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
126 ; RV64I-LABEL: log2_ceil_i32:
128 ; RV64I-NEXT: addi sp, sp, -16
129 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
130 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
131 ; RV64I-NEXT: addiw a0, a0, -1
132 ; RV64I-NEXT: li s0, 32
133 ; RV64I-NEXT: li a1, 32
134 ; RV64I-NEXT: beqz a0, .LBB2_2
135 ; RV64I-NEXT: # %bb.1: # %cond.false
136 ; RV64I-NEXT: srliw a1, a0, 1
137 ; RV64I-NEXT: or a0, a0, a1
138 ; RV64I-NEXT: srliw a1, a0, 2
139 ; RV64I-NEXT: or a0, a0, a1
140 ; RV64I-NEXT: srliw a1, a0, 4
141 ; RV64I-NEXT: or a0, a0, a1
142 ; RV64I-NEXT: srliw a1, a0, 8
143 ; RV64I-NEXT: or a0, a0, a1
144 ; RV64I-NEXT: srliw a1, a0, 16
145 ; RV64I-NEXT: or a0, a0, a1
146 ; RV64I-NEXT: not a0, a0
147 ; RV64I-NEXT: srli a1, a0, 1
148 ; RV64I-NEXT: lui a2, 349525
149 ; RV64I-NEXT: addiw a2, a2, 1365
150 ; RV64I-NEXT: and a1, a1, a2
151 ; RV64I-NEXT: sub a0, a0, a1
152 ; RV64I-NEXT: lui a1, 209715
153 ; RV64I-NEXT: addiw a1, a1, 819
154 ; RV64I-NEXT: and a2, a0, a1
155 ; RV64I-NEXT: srli a0, a0, 2
156 ; RV64I-NEXT: and a0, a0, a1
157 ; RV64I-NEXT: add a0, a2, a0
158 ; RV64I-NEXT: srli a1, a0, 4
159 ; RV64I-NEXT: add a0, a0, a1
160 ; RV64I-NEXT: lui a1, 61681
161 ; RV64I-NEXT: addiw a1, a1, -241
162 ; RV64I-NEXT: and a0, a0, a1
163 ; RV64I-NEXT: lui a1, 4112
164 ; RV64I-NEXT: addiw a1, a1, 257
165 ; RV64I-NEXT: call __muldi3
166 ; RV64I-NEXT: srliw a1, a0, 24
167 ; RV64I-NEXT: .LBB2_2: # %cond.end
168 ; RV64I-NEXT: sub a0, s0, a1
169 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
170 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
171 ; RV64I-NEXT: addi sp, sp, 16
174 ; RV64XTHEADBB-LABEL: log2_ceil_i32:
175 ; RV64XTHEADBB: # %bb.0:
176 ; RV64XTHEADBB-NEXT: addi a0, a0, -1
177 ; RV64XTHEADBB-NEXT: not a0, a0
178 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
179 ; RV64XTHEADBB-NEXT: th.ff0 a0, a0
180 ; RV64XTHEADBB-NEXT: li a1, 32
181 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
182 ; RV64XTHEADBB-NEXT: ret
184 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
189 define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
190 ; RV64I-LABEL: findLastSet_i32:
192 ; RV64I-NEXT: addi sp, sp, -16
193 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
194 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
195 ; RV64I-NEXT: mv s0, a0
196 ; RV64I-NEXT: srliw a0, a0, 1
197 ; RV64I-NEXT: or a0, s0, a0
198 ; RV64I-NEXT: srliw a1, a0, 2
199 ; RV64I-NEXT: or a0, a0, a1
200 ; RV64I-NEXT: srliw a1, a0, 4
201 ; RV64I-NEXT: or a0, a0, a1
202 ; RV64I-NEXT: srliw a1, a0, 8
203 ; RV64I-NEXT: or a0, a0, a1
204 ; RV64I-NEXT: srliw a1, a0, 16
205 ; RV64I-NEXT: or a0, a0, a1
206 ; RV64I-NEXT: not a0, a0
207 ; RV64I-NEXT: srli a1, a0, 1
208 ; RV64I-NEXT: lui a2, 349525
209 ; RV64I-NEXT: addiw a2, a2, 1365
210 ; RV64I-NEXT: and a1, a1, a2
211 ; RV64I-NEXT: sub a0, a0, a1
212 ; RV64I-NEXT: lui a1, 209715
213 ; RV64I-NEXT: addiw a1, a1, 819
214 ; RV64I-NEXT: and a2, a0, a1
215 ; RV64I-NEXT: srli a0, a0, 2
216 ; RV64I-NEXT: and a0, a0, a1
217 ; RV64I-NEXT: add a0, a2, a0
218 ; RV64I-NEXT: srli a1, a0, 4
219 ; RV64I-NEXT: add a0, a0, a1
220 ; RV64I-NEXT: lui a1, 61681
221 ; RV64I-NEXT: addiw a1, a1, -241
222 ; RV64I-NEXT: and a0, a0, a1
223 ; RV64I-NEXT: lui a1, 4112
224 ; RV64I-NEXT: addiw a1, a1, 257
225 ; RV64I-NEXT: call __muldi3
226 ; RV64I-NEXT: srliw a0, a0, 24
227 ; RV64I-NEXT: xori a0, a0, 31
228 ; RV64I-NEXT: snez a1, s0
229 ; RV64I-NEXT: addi a1, a1, -1
230 ; RV64I-NEXT: or a0, a1, a0
231 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
232 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
233 ; RV64I-NEXT: addi sp, sp, 16
236 ; RV64XTHEADBB-LABEL: findLastSet_i32:
237 ; RV64XTHEADBB: # %bb.0:
238 ; RV64XTHEADBB-NEXT: not a1, a0
239 ; RV64XTHEADBB-NEXT: slli a1, a1, 32
240 ; RV64XTHEADBB-NEXT: th.ff0 a1, a1
241 ; RV64XTHEADBB-NEXT: xori a1, a1, 31
242 ; RV64XTHEADBB-NEXT: snez a0, a0
243 ; RV64XTHEADBB-NEXT: addi a0, a0, -1
244 ; RV64XTHEADBB-NEXT: or a0, a0, a1
245 ; RV64XTHEADBB-NEXT: ret
246 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
248 %3 = icmp eq i32 %a, 0
249 %4 = select i1 %3, i32 -1, i32 %2
253 define i32 @ctlz_lshr_i32(i32 signext %a) {
254 ; RV64I-LABEL: ctlz_lshr_i32:
256 ; RV64I-NEXT: srliw a0, a0, 1
257 ; RV64I-NEXT: beqz a0, .LBB4_2
258 ; RV64I-NEXT: # %bb.1: # %cond.false
259 ; RV64I-NEXT: addi sp, sp, -16
260 ; RV64I-NEXT: .cfi_def_cfa_offset 16
261 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
262 ; RV64I-NEXT: .cfi_offset ra, -8
263 ; RV64I-NEXT: srliw a1, a0, 1
264 ; RV64I-NEXT: or a0, a0, a1
265 ; RV64I-NEXT: srliw a1, a0, 2
266 ; RV64I-NEXT: or a0, a0, a1
267 ; RV64I-NEXT: srliw a1, a0, 4
268 ; RV64I-NEXT: or a0, a0, a1
269 ; RV64I-NEXT: srliw a1, a0, 8
270 ; RV64I-NEXT: or a0, a0, a1
271 ; RV64I-NEXT: srliw a1, a0, 16
272 ; RV64I-NEXT: or a0, a0, a1
273 ; RV64I-NEXT: not a0, a0
274 ; RV64I-NEXT: srli a1, a0, 1
275 ; RV64I-NEXT: lui a2, 349525
276 ; RV64I-NEXT: addiw a2, a2, 1365
277 ; RV64I-NEXT: and a1, a1, a2
278 ; RV64I-NEXT: sub a0, a0, a1
279 ; RV64I-NEXT: lui a1, 209715
280 ; RV64I-NEXT: addiw a1, a1, 819
281 ; RV64I-NEXT: and a2, a0, a1
282 ; RV64I-NEXT: srli a0, a0, 2
283 ; RV64I-NEXT: and a0, a0, a1
284 ; RV64I-NEXT: add a0, a2, a0
285 ; RV64I-NEXT: srli a1, a0, 4
286 ; RV64I-NEXT: add a0, a0, a1
287 ; RV64I-NEXT: lui a1, 61681
288 ; RV64I-NEXT: addiw a1, a1, -241
289 ; RV64I-NEXT: and a0, a0, a1
290 ; RV64I-NEXT: lui a1, 4112
291 ; RV64I-NEXT: addiw a1, a1, 257
292 ; RV64I-NEXT: call __muldi3
293 ; RV64I-NEXT: srliw a0, a0, 24
294 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
295 ; RV64I-NEXT: addi sp, sp, 16
297 ; RV64I-NEXT: .LBB4_2:
298 ; RV64I-NEXT: li a0, 32
301 ; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
302 ; RV64XTHEADBB: # %bb.0:
303 ; RV64XTHEADBB-NEXT: srliw a0, a0, 1
304 ; RV64XTHEADBB-NEXT: not a0, a0
305 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
306 ; RV64XTHEADBB-NEXT: th.ff0 a0, a0
307 ; RV64XTHEADBB-NEXT: ret
309 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
313 declare i64 @llvm.ctlz.i64(i64, i1)
315 define i64 @ctlz_i64(i64 %a) nounwind {
316 ; RV64I-LABEL: ctlz_i64:
318 ; RV64I-NEXT: beqz a0, .LBB5_2
319 ; RV64I-NEXT: # %bb.1: # %cond.false
320 ; RV64I-NEXT: addi sp, sp, -16
321 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
322 ; RV64I-NEXT: srli a1, a0, 1
323 ; RV64I-NEXT: or a0, a0, a1
324 ; RV64I-NEXT: srli a1, a0, 2
325 ; RV64I-NEXT: or a0, a0, a1
326 ; RV64I-NEXT: srli a1, a0, 4
327 ; RV64I-NEXT: or a0, a0, a1
328 ; RV64I-NEXT: srli a1, a0, 8
329 ; RV64I-NEXT: or a0, a0, a1
330 ; RV64I-NEXT: srli a1, a0, 16
331 ; RV64I-NEXT: or a0, a0, a1
332 ; RV64I-NEXT: srli a1, a0, 32
333 ; RV64I-NEXT: or a0, a0, a1
334 ; RV64I-NEXT: not a0, a0
335 ; RV64I-NEXT: srli a1, a0, 1
336 ; RV64I-NEXT: lui a2, 349525
337 ; RV64I-NEXT: addiw a2, a2, 1365
338 ; RV64I-NEXT: slli a3, a2, 32
339 ; RV64I-NEXT: add a2, a2, a3
340 ; RV64I-NEXT: and a1, a1, a2
341 ; RV64I-NEXT: sub a0, a0, a1
342 ; RV64I-NEXT: lui a1, 209715
343 ; RV64I-NEXT: addiw a1, a1, 819
344 ; RV64I-NEXT: slli a2, a1, 32
345 ; RV64I-NEXT: add a1, a1, a2
346 ; RV64I-NEXT: and a2, a0, a1
347 ; RV64I-NEXT: srli a0, a0, 2
348 ; RV64I-NEXT: and a0, a0, a1
349 ; RV64I-NEXT: add a0, a2, a0
350 ; RV64I-NEXT: srli a1, a0, 4
351 ; RV64I-NEXT: add a0, a0, a1
352 ; RV64I-NEXT: lui a1, 61681
353 ; RV64I-NEXT: addiw a1, a1, -241
354 ; RV64I-NEXT: slli a2, a1, 32
355 ; RV64I-NEXT: add a1, a1, a2
356 ; RV64I-NEXT: and a0, a0, a1
357 ; RV64I-NEXT: lui a1, 4112
358 ; RV64I-NEXT: addiw a1, a1, 257
359 ; RV64I-NEXT: slli a2, a1, 32
360 ; RV64I-NEXT: add a1, a1, a2
361 ; RV64I-NEXT: call __muldi3
362 ; RV64I-NEXT: srli a0, a0, 56
363 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
364 ; RV64I-NEXT: addi sp, sp, 16
366 ; RV64I-NEXT: .LBB5_2:
367 ; RV64I-NEXT: li a0, 64
370 ; RV64XTHEADBB-LABEL: ctlz_i64:
371 ; RV64XTHEADBB: # %bb.0:
372 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
373 ; RV64XTHEADBB-NEXT: ret
374 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
378 declare i32 @llvm.cttz.i32(i32, i1)
380 define signext i32 @cttz_i32(i32 signext %a) nounwind {
381 ; RV64I-LABEL: cttz_i32:
383 ; RV64I-NEXT: beqz a0, .LBB6_2
384 ; RV64I-NEXT: # %bb.1: # %cond.false
385 ; RV64I-NEXT: addi sp, sp, -16
386 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
387 ; RV64I-NEXT: neg a1, a0
388 ; RV64I-NEXT: and a0, a0, a1
389 ; RV64I-NEXT: lui a1, 30667
390 ; RV64I-NEXT: addiw a1, a1, 1329
391 ; RV64I-NEXT: call __muldi3
392 ; RV64I-NEXT: srliw a0, a0, 27
393 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
394 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
395 ; RV64I-NEXT: add a0, a1, a0
396 ; RV64I-NEXT: lbu a0, 0(a0)
397 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
398 ; RV64I-NEXT: addi sp, sp, 16
400 ; RV64I-NEXT: .LBB6_2:
401 ; RV64I-NEXT: li a0, 32
404 ; RV64XTHEADBB-LABEL: cttz_i32:
405 ; RV64XTHEADBB: # %bb.0:
406 ; RV64XTHEADBB-NEXT: beqz a0, .LBB6_2
407 ; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
408 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
409 ; RV64XTHEADBB-NEXT: not a0, a0
410 ; RV64XTHEADBB-NEXT: and a0, a0, a1
411 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
412 ; RV64XTHEADBB-NEXT: li a1, 64
413 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
414 ; RV64XTHEADBB-NEXT: ret
415 ; RV64XTHEADBB-NEXT: .LBB6_2:
416 ; RV64XTHEADBB-NEXT: li a0, 32
417 ; RV64XTHEADBB-NEXT: ret
418 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
422 define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
423 ; RV64I-LABEL: cttz_zero_undef_i32:
425 ; RV64I-NEXT: addi sp, sp, -16
426 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
427 ; RV64I-NEXT: neg a1, a0
428 ; RV64I-NEXT: and a0, a0, a1
429 ; RV64I-NEXT: lui a1, 30667
430 ; RV64I-NEXT: addiw a1, a1, 1329
431 ; RV64I-NEXT: call __muldi3
432 ; RV64I-NEXT: srliw a0, a0, 27
433 ; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
434 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
435 ; RV64I-NEXT: add a0, a1, a0
436 ; RV64I-NEXT: lbu a0, 0(a0)
437 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
438 ; RV64I-NEXT: addi sp, sp, 16
441 ; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
442 ; RV64XTHEADBB: # %bb.0:
443 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
444 ; RV64XTHEADBB-NEXT: not a0, a0
445 ; RV64XTHEADBB-NEXT: and a0, a0, a1
446 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
447 ; RV64XTHEADBB-NEXT: li a1, 64
448 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
449 ; RV64XTHEADBB-NEXT: ret
450 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
454 define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
455 ; RV64I-LABEL: findFirstSet_i32:
457 ; RV64I-NEXT: addi sp, sp, -16
458 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
459 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
460 ; RV64I-NEXT: mv s0, a0
461 ; RV64I-NEXT: neg a0, a0
462 ; RV64I-NEXT: and a0, s0, a0
463 ; RV64I-NEXT: lui a1, 30667
464 ; RV64I-NEXT: addiw a1, a1, 1329
465 ; RV64I-NEXT: call __muldi3
466 ; RV64I-NEXT: srliw a0, a0, 27
467 ; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
468 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
469 ; RV64I-NEXT: add a0, a1, a0
470 ; RV64I-NEXT: lbu a0, 0(a0)
471 ; RV64I-NEXT: snez a1, s0
472 ; RV64I-NEXT: addi a1, a1, -1
473 ; RV64I-NEXT: or a0, a1, a0
474 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
475 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
476 ; RV64I-NEXT: addi sp, sp, 16
479 ; RV64XTHEADBB-LABEL: findFirstSet_i32:
480 ; RV64XTHEADBB: # %bb.0:
481 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
482 ; RV64XTHEADBB-NEXT: not a2, a0
483 ; RV64XTHEADBB-NEXT: and a1, a2, a1
484 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1
485 ; RV64XTHEADBB-NEXT: li a2, 64
486 ; RV64XTHEADBB-NEXT: sub a2, a2, a1
487 ; RV64XTHEADBB-NEXT: snez a0, a0
488 ; RV64XTHEADBB-NEXT: addi a0, a0, -1
489 ; RV64XTHEADBB-NEXT: or a0, a0, a2
490 ; RV64XTHEADBB-NEXT: ret
491 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
492 %2 = icmp eq i32 %a, 0
493 %3 = select i1 %2, i32 -1, i32 %1
497 define signext i32 @ffs_i32(i32 signext %a) nounwind {
498 ; RV64I-LABEL: ffs_i32:
500 ; RV64I-NEXT: addi sp, sp, -16
501 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
502 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
503 ; RV64I-NEXT: mv s0, a0
504 ; RV64I-NEXT: neg a0, a0
505 ; RV64I-NEXT: and a0, s0, a0
506 ; RV64I-NEXT: lui a1, 30667
507 ; RV64I-NEXT: addiw a1, a1, 1329
508 ; RV64I-NEXT: call __muldi3
509 ; RV64I-NEXT: srliw a0, a0, 27
510 ; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
511 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
512 ; RV64I-NEXT: add a0, a1, a0
513 ; RV64I-NEXT: lbu a0, 0(a0)
514 ; RV64I-NEXT: addi a0, a0, 1
515 ; RV64I-NEXT: seqz a1, s0
516 ; RV64I-NEXT: addi a1, a1, -1
517 ; RV64I-NEXT: and a0, a1, a0
518 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
519 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
520 ; RV64I-NEXT: addi sp, sp, 16
523 ; RV64XTHEADBB-LABEL: ffs_i32:
524 ; RV64XTHEADBB: # %bb.0:
525 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
526 ; RV64XTHEADBB-NEXT: not a2, a0
527 ; RV64XTHEADBB-NEXT: and a1, a2, a1
528 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1
529 ; RV64XTHEADBB-NEXT: li a2, 65
530 ; RV64XTHEADBB-NEXT: sub a2, a2, a1
531 ; RV64XTHEADBB-NEXT: seqz a0, a0
532 ; RV64XTHEADBB-NEXT: addi a0, a0, -1
533 ; RV64XTHEADBB-NEXT: and a0, a0, a2
534 ; RV64XTHEADBB-NEXT: ret
535 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
537 %3 = icmp eq i32 %a, 0
538 %4 = select i1 %3, i32 0, i32 %2
542 declare i64 @llvm.cttz.i64(i64, i1)
544 define i64 @cttz_i64(i64 %a) nounwind {
545 ; RV64I-LABEL: cttz_i64:
547 ; RV64I-NEXT: beqz a0, .LBB10_2
548 ; RV64I-NEXT: # %bb.1: # %cond.false
549 ; RV64I-NEXT: addi sp, sp, -16
550 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
551 ; RV64I-NEXT: neg a1, a0
552 ; RV64I-NEXT: and a0, a0, a1
553 ; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
554 ; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
555 ; RV64I-NEXT: call __muldi3
556 ; RV64I-NEXT: srli a0, a0, 58
557 ; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
558 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
559 ; RV64I-NEXT: add a0, a1, a0
560 ; RV64I-NEXT: lbu a0, 0(a0)
561 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
562 ; RV64I-NEXT: addi sp, sp, 16
564 ; RV64I-NEXT: .LBB10_2:
565 ; RV64I-NEXT: li a0, 64
568 ; RV64XTHEADBB-LABEL: cttz_i64:
569 ; RV64XTHEADBB: # %bb.0:
570 ; RV64XTHEADBB-NEXT: beqz a0, .LBB10_2
571 ; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
572 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
573 ; RV64XTHEADBB-NEXT: not a0, a0
574 ; RV64XTHEADBB-NEXT: and a0, a0, a1
575 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
576 ; RV64XTHEADBB-NEXT: li a1, 64
577 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
578 ; RV64XTHEADBB-NEXT: ret
579 ; RV64XTHEADBB-NEXT: .LBB10_2:
580 ; RV64XTHEADBB-NEXT: li a0, 64
581 ; RV64XTHEADBB-NEXT: ret
582 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
586 define signext i32 @sextb_i32(i32 signext %a) nounwind {
587 ; RV64I-LABEL: sextb_i32:
589 ; RV64I-NEXT: slli a0, a0, 56
590 ; RV64I-NEXT: srai a0, a0, 56
593 ; RV64XTHEADBB-LABEL: sextb_i32:
594 ; RV64XTHEADBB: # %bb.0:
595 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
596 ; RV64XTHEADBB-NEXT: ret
597 %shl = shl i32 %a, 24
598 %shr = ashr exact i32 %shl, 24
602 define i64 @sextb_i64(i64 %a) nounwind {
603 ; RV64I-LABEL: sextb_i64:
605 ; RV64I-NEXT: slli a0, a0, 56
606 ; RV64I-NEXT: srai a0, a0, 56
609 ; RV64XTHEADBB-LABEL: sextb_i64:
610 ; RV64XTHEADBB: # %bb.0:
611 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
612 ; RV64XTHEADBB-NEXT: ret
613 %shl = shl i64 %a, 56
614 %shr = ashr exact i64 %shl, 56
618 define signext i32 @sexth_i32(i32 signext %a) nounwind {
619 ; RV64I-LABEL: sexth_i32:
621 ; RV64I-NEXT: slli a0, a0, 48
622 ; RV64I-NEXT: srai a0, a0, 48
625 ; RV64XTHEADBB-LABEL: sexth_i32:
626 ; RV64XTHEADBB: # %bb.0:
627 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
628 ; RV64XTHEADBB-NEXT: ret
629 %shl = shl i32 %a, 16
630 %shr = ashr exact i32 %shl, 16
634 define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
635 ; RV64I-LABEL: no_sexth_i32:
637 ; RV64I-NEXT: slli a0, a0, 49
638 ; RV64I-NEXT: srai a0, a0, 48
641 ; RV64XTHEADBB-LABEL: no_sexth_i32:
642 ; RV64XTHEADBB: # %bb.0:
643 ; RV64XTHEADBB-NEXT: slli a0, a0, 49
644 ; RV64XTHEADBB-NEXT: srai a0, a0, 48
645 ; RV64XTHEADBB-NEXT: ret
646 %shl = shl i32 %a, 17
647 %shr = ashr exact i32 %shl, 16
651 define i64 @sexth_i64(i64 %a) nounwind {
652 ; RV64I-LABEL: sexth_i64:
654 ; RV64I-NEXT: slli a0, a0, 48
655 ; RV64I-NEXT: srai a0, a0, 48
658 ; RV64XTHEADBB-LABEL: sexth_i64:
659 ; RV64XTHEADBB: # %bb.0:
660 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
661 ; RV64XTHEADBB-NEXT: ret
662 %shl = shl i64 %a, 48
663 %shr = ashr exact i64 %shl, 48
667 define i64 @no_sexth_i64(i64 %a) nounwind {
668 ; RV64I-LABEL: no_sexth_i64:
670 ; RV64I-NEXT: slli a0, a0, 49
671 ; RV64I-NEXT: srai a0, a0, 48
674 ; RV64XTHEADBB-LABEL: no_sexth_i64:
675 ; RV64XTHEADBB: # %bb.0:
676 ; RV64XTHEADBB-NEXT: slli a0, a0, 49
677 ; RV64XTHEADBB-NEXT: srai a0, a0, 48
678 ; RV64XTHEADBB-NEXT: ret
679 %shl = shl i64 %a, 49
680 %shr = ashr exact i64 %shl, 48
684 define i32 @zexth_i32(i32 %a) nounwind {
685 ; RV64I-LABEL: zexth_i32:
687 ; RV64I-NEXT: slli a0, a0, 48
688 ; RV64I-NEXT: srli a0, a0, 48
691 ; RV64XTHEADBB-LABEL: zexth_i32:
692 ; RV64XTHEADBB: # %bb.0:
693 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
694 ; RV64XTHEADBB-NEXT: ret
695 %and = and i32 %a, 65535
699 define i64 @zexth_i64(i64 %a) nounwind {
700 ; RV64I-LABEL: zexth_i64:
702 ; RV64I-NEXT: slli a0, a0, 48
703 ; RV64I-NEXT: srli a0, a0, 48
706 ; RV64XTHEADBB-LABEL: zexth_i64:
707 ; RV64XTHEADBB: # %bb.0:
708 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
709 ; RV64XTHEADBB-NEXT: ret
710 %and = and i64 %a, 65535
714 define i64 @zext_bf_i64(i64 %a) nounwind {
715 ; RV64I-LABEL: zext_bf_i64:
717 ; RV64I-NEXT: slli a0, a0, 47
718 ; RV64I-NEXT: srli a0, a0, 48
721 ; RV64XTHEADBB-LABEL: zext_bf_i64:
722 ; RV64XTHEADBB: # %bb.0:
723 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1
724 ; RV64XTHEADBB-NEXT: ret
726 %and = and i64 %1, 65535
730 define i64 @zext_i64_srliw(i64 %a) nounwind {
731 ; RV64I-LABEL: zext_i64_srliw:
733 ; RV64I-NEXT: srliw a0, a0, 16
736 ; RV64XTHEADBB-LABEL: zext_i64_srliw:
737 ; RV64XTHEADBB: # %bb.0:
738 ; RV64XTHEADBB-NEXT: srliw a0, a0, 16
739 ; RV64XTHEADBB-NEXT: ret
741 %and = and i64 %1, 65535
745 declare i32 @llvm.bswap.i32(i32)
747 define signext i32 @bswap_i32(i32 signext %a) nounwind {
748 ; RV64I-LABEL: bswap_i32:
750 ; RV64I-NEXT: srli a1, a0, 8
751 ; RV64I-NEXT: lui a2, 16
752 ; RV64I-NEXT: addiw a2, a2, -256
753 ; RV64I-NEXT: and a1, a1, a2
754 ; RV64I-NEXT: srliw a3, a0, 24
755 ; RV64I-NEXT: or a1, a1, a3
756 ; RV64I-NEXT: and a2, a0, a2
757 ; RV64I-NEXT: slli a2, a2, 8
758 ; RV64I-NEXT: slliw a0, a0, 24
759 ; RV64I-NEXT: or a0, a0, a2
760 ; RV64I-NEXT: or a0, a0, a1
763 ; RV64XTHEADBB-LABEL: bswap_i32:
764 ; RV64XTHEADBB: # %bb.0:
765 ; RV64XTHEADBB-NEXT: th.revw a0, a0
766 ; RV64XTHEADBB-NEXT: ret
767 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
771 ; Similar to bswap_i32 but the result is not sign extended.
772 define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
773 ; RV64I-LABEL: bswap_i32_nosext:
775 ; RV64I-NEXT: srli a2, a0, 8
776 ; RV64I-NEXT: lui a3, 16
777 ; RV64I-NEXT: addi a3, a3, -256
778 ; RV64I-NEXT: and a2, a2, a3
779 ; RV64I-NEXT: srliw a4, a0, 24
780 ; RV64I-NEXT: or a2, a2, a4
781 ; RV64I-NEXT: and a3, a0, a3
782 ; RV64I-NEXT: slli a3, a3, 8
783 ; RV64I-NEXT: slli a0, a0, 24
784 ; RV64I-NEXT: or a0, a0, a3
785 ; RV64I-NEXT: or a0, a0, a2
786 ; RV64I-NEXT: sw a0, 0(a1)
789 ; RV64XTHEADBB-LABEL: bswap_i32_nosext:
790 ; RV64XTHEADBB: # %bb.0:
791 ; RV64XTHEADBB-NEXT: th.revw a0, a0
792 ; RV64XTHEADBB-NEXT: sw a0, 0(a1)
793 ; RV64XTHEADBB-NEXT: ret
794 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
799 declare i64 @llvm.bswap.i64(i64)
801 define i64 @bswap_i64(i64 %a) {
802 ; RV64I-LABEL: bswap_i64:
804 ; RV64I-NEXT: srli a1, a0, 40
805 ; RV64I-NEXT: lui a2, 16
806 ; RV64I-NEXT: addiw a2, a2, -256
807 ; RV64I-NEXT: and a1, a1, a2
808 ; RV64I-NEXT: srli a3, a0, 56
809 ; RV64I-NEXT: or a1, a1, a3
810 ; RV64I-NEXT: srli a3, a0, 24
811 ; RV64I-NEXT: lui a4, 4080
812 ; RV64I-NEXT: and a3, a3, a4
813 ; RV64I-NEXT: srli a5, a0, 8
814 ; RV64I-NEXT: srliw a5, a5, 24
815 ; RV64I-NEXT: slli a5, a5, 24
816 ; RV64I-NEXT: or a3, a5, a3
817 ; RV64I-NEXT: or a1, a3, a1
818 ; RV64I-NEXT: and a4, a0, a4
819 ; RV64I-NEXT: slli a4, a4, 24
820 ; RV64I-NEXT: srliw a3, a0, 24
821 ; RV64I-NEXT: slli a3, a3, 32
822 ; RV64I-NEXT: or a3, a4, a3
823 ; RV64I-NEXT: and a2, a0, a2
824 ; RV64I-NEXT: slli a2, a2, 40
825 ; RV64I-NEXT: slli a0, a0, 56
826 ; RV64I-NEXT: or a0, a0, a2
827 ; RV64I-NEXT: or a0, a0, a3
828 ; RV64I-NEXT: or a0, a0, a1
831 ; RV64XTHEADBB-LABEL: bswap_i64:
832 ; RV64XTHEADBB: # %bb.0:
833 ; RV64XTHEADBB-NEXT: th.rev a0, a0
834 ; RV64XTHEADBB-NEXT: ret
835 %1 = call i64 @llvm.bswap.i64(i64 %a)