1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
3 ; RUN: -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
4 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
5 ; RUN: -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64XTHEADBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define signext i32 @ctlz_i32(i32 signext %a) nounwind {
10 ; RV64I-LABEL: ctlz_i32:
12 ; RV64I-NEXT: beqz a0, .LBB0_2
13 ; RV64I-NEXT: # %bb.1: # %cond.false
14 ; RV64I-NEXT: addi sp, sp, -16
15 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
16 ; RV64I-NEXT: srliw a1, a0, 1
17 ; RV64I-NEXT: or a0, a0, a1
18 ; RV64I-NEXT: srliw a1, a0, 2
19 ; RV64I-NEXT: or a0, a0, a1
20 ; RV64I-NEXT: srliw a1, a0, 4
21 ; RV64I-NEXT: or a0, a0, a1
22 ; RV64I-NEXT: srliw a1, a0, 8
23 ; RV64I-NEXT: or a0, a0, a1
24 ; RV64I-NEXT: srliw a1, a0, 16
25 ; RV64I-NEXT: or a0, a0, a1
26 ; RV64I-NEXT: not a0, a0
27 ; RV64I-NEXT: srliw a1, a0, 1
28 ; RV64I-NEXT: lui a2, 349525
29 ; RV64I-NEXT: addi a2, a2, 1365
30 ; RV64I-NEXT: and a1, a1, a2
31 ; RV64I-NEXT: subw a0, a0, a1
32 ; RV64I-NEXT: lui a1, 209715
33 ; RV64I-NEXT: addi a1, a1, 819
34 ; RV64I-NEXT: and a2, a0, a1
35 ; RV64I-NEXT: srliw a0, a0, 2
36 ; RV64I-NEXT: and a0, a0, a1
37 ; RV64I-NEXT: add a0, a2, a0
38 ; RV64I-NEXT: srliw a1, a0, 4
39 ; RV64I-NEXT: addw a0, a0, a1
40 ; RV64I-NEXT: lui a1, 61681
41 ; RV64I-NEXT: addiw a1, a1, -241
42 ; RV64I-NEXT: and a0, a0, a1
43 ; RV64I-NEXT: lui a1, 4112
44 ; RV64I-NEXT: addiw a1, a1, 257
45 ; RV64I-NEXT: call __muldi3@plt
46 ; RV64I-NEXT: srliw a0, a0, 24
47 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
48 ; RV64I-NEXT: addi sp, sp, 16
50 ; RV64I-NEXT: .LBB0_2:
51 ; RV64I-NEXT: li a0, 32
54 ; RV64XTHEADBB-LABEL: ctlz_i32:
55 ; RV64XTHEADBB: # %bb.0:
56 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 31, 0
57 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
58 ; RV64XTHEADBB-NEXT: addi a0, a0, -32
59 ; RV64XTHEADBB-NEXT: ret
60 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
64 define signext i32 @log2_i32(i32 signext %a) nounwind {
65 ; RV64I-LABEL: log2_i32:
67 ; RV64I-NEXT: beqz a0, .LBB1_2
68 ; RV64I-NEXT: # %bb.1: # %cond.false
69 ; RV64I-NEXT: addi sp, sp, -16
70 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
71 ; RV64I-NEXT: srliw a1, a0, 1
72 ; RV64I-NEXT: or a0, a0, a1
73 ; RV64I-NEXT: srliw a1, a0, 2
74 ; RV64I-NEXT: or a0, a0, a1
75 ; RV64I-NEXT: srliw a1, a0, 4
76 ; RV64I-NEXT: or a0, a0, a1
77 ; RV64I-NEXT: srliw a1, a0, 8
78 ; RV64I-NEXT: or a0, a0, a1
79 ; RV64I-NEXT: srliw a1, a0, 16
80 ; RV64I-NEXT: or a0, a0, a1
81 ; RV64I-NEXT: not a0, a0
82 ; RV64I-NEXT: srliw a1, a0, 1
83 ; RV64I-NEXT: lui a2, 349525
84 ; RV64I-NEXT: addi a2, a2, 1365
85 ; RV64I-NEXT: and a1, a1, a2
86 ; RV64I-NEXT: subw a0, a0, a1
87 ; RV64I-NEXT: lui a1, 209715
88 ; RV64I-NEXT: addi a1, a1, 819
89 ; RV64I-NEXT: and a2, a0, a1
90 ; RV64I-NEXT: srliw a0, a0, 2
91 ; RV64I-NEXT: and a0, a0, a1
92 ; RV64I-NEXT: add a0, a2, a0
93 ; RV64I-NEXT: srliw a1, a0, 4
94 ; RV64I-NEXT: addw a0, a0, a1
95 ; RV64I-NEXT: lui a1, 61681
96 ; RV64I-NEXT: addiw a1, a1, -241
97 ; RV64I-NEXT: and a0, a0, a1
98 ; RV64I-NEXT: lui a1, 4112
99 ; RV64I-NEXT: addiw a1, a1, 257
100 ; RV64I-NEXT: call __muldi3@plt
101 ; RV64I-NEXT: srliw a0, a0, 24
102 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
103 ; RV64I-NEXT: addi sp, sp, 16
104 ; RV64I-NEXT: j .LBB1_3
105 ; RV64I-NEXT: .LBB1_2:
106 ; RV64I-NEXT: li a0, 32
107 ; RV64I-NEXT: .LBB1_3: # %cond.end
108 ; RV64I-NEXT: li a1, 31
109 ; RV64I-NEXT: subw a0, a1, a0
112 ; RV64XTHEADBB-LABEL: log2_i32:
113 ; RV64XTHEADBB: # %bb.0:
114 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 31, 0
115 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
116 ; RV64XTHEADBB-NEXT: addi a0, a0, -32
117 ; RV64XTHEADBB-NEXT: li a1, 31
118 ; RV64XTHEADBB-NEXT: subw a0, a1, a0
119 ; RV64XTHEADBB-NEXT: ret
120 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
125 define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
126 ; RV64I-LABEL: log2_ceil_i32:
128 ; RV64I-NEXT: addi sp, sp, -16
129 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
130 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
131 ; RV64I-NEXT: addiw a0, a0, -1
132 ; RV64I-NEXT: li s0, 32
133 ; RV64I-NEXT: li a1, 32
134 ; RV64I-NEXT: beqz a0, .LBB2_2
135 ; RV64I-NEXT: # %bb.1: # %cond.false
136 ; RV64I-NEXT: srliw a1, a0, 1
137 ; RV64I-NEXT: or a0, a0, a1
138 ; RV64I-NEXT: srliw a1, a0, 2
139 ; RV64I-NEXT: or a0, a0, a1
140 ; RV64I-NEXT: srliw a1, a0, 4
141 ; RV64I-NEXT: or a0, a0, a1
142 ; RV64I-NEXT: srliw a1, a0, 8
143 ; RV64I-NEXT: or a0, a0, a1
144 ; RV64I-NEXT: srliw a1, a0, 16
145 ; RV64I-NEXT: or a0, a0, a1
146 ; RV64I-NEXT: not a0, a0
147 ; RV64I-NEXT: srliw a1, a0, 1
148 ; RV64I-NEXT: lui a2, 349525
149 ; RV64I-NEXT: addi a2, a2, 1365
150 ; RV64I-NEXT: and a1, a1, a2
151 ; RV64I-NEXT: subw a0, a0, a1
152 ; RV64I-NEXT: lui a1, 209715
153 ; RV64I-NEXT: addi a1, a1, 819
154 ; RV64I-NEXT: and a2, a0, a1
155 ; RV64I-NEXT: srliw a0, a0, 2
156 ; RV64I-NEXT: and a0, a0, a1
157 ; RV64I-NEXT: add a0, a2, a0
158 ; RV64I-NEXT: srliw a1, a0, 4
159 ; RV64I-NEXT: addw a0, a0, a1
160 ; RV64I-NEXT: lui a1, 61681
161 ; RV64I-NEXT: addiw a1, a1, -241
162 ; RV64I-NEXT: and a0, a0, a1
163 ; RV64I-NEXT: lui a1, 4112
164 ; RV64I-NEXT: addiw a1, a1, 257
165 ; RV64I-NEXT: call __muldi3@plt
166 ; RV64I-NEXT: srliw a1, a0, 24
167 ; RV64I-NEXT: .LBB2_2: # %cond.end
168 ; RV64I-NEXT: subw a0, s0, a1
169 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
170 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
171 ; RV64I-NEXT: addi sp, sp, 16
174 ; RV64XTHEADBB-LABEL: log2_ceil_i32:
175 ; RV64XTHEADBB: # %bb.0:
176 ; RV64XTHEADBB-NEXT: addi a0, a0, -1
177 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
178 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
179 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
180 ; RV64XTHEADBB-NEXT: addi a0, a0, -32
181 ; RV64XTHEADBB-NEXT: li a1, 32
182 ; RV64XTHEADBB-NEXT: subw a0, a1, a0
183 ; RV64XTHEADBB-NEXT: ret
185 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
190 define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
191 ; RV64I-LABEL: findLastSet_i32:
193 ; RV64I-NEXT: addi sp, sp, -16
194 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
195 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
196 ; RV64I-NEXT: mv s0, a0
197 ; RV64I-NEXT: srliw a0, a0, 1
198 ; RV64I-NEXT: or a0, s0, a0
199 ; RV64I-NEXT: srliw a1, a0, 2
200 ; RV64I-NEXT: or a0, a0, a1
201 ; RV64I-NEXT: srliw a1, a0, 4
202 ; RV64I-NEXT: or a0, a0, a1
203 ; RV64I-NEXT: srliw a1, a0, 8
204 ; RV64I-NEXT: or a0, a0, a1
205 ; RV64I-NEXT: srliw a1, a0, 16
206 ; RV64I-NEXT: or a0, a0, a1
207 ; RV64I-NEXT: not a0, a0
208 ; RV64I-NEXT: srliw a1, a0, 1
209 ; RV64I-NEXT: lui a2, 349525
210 ; RV64I-NEXT: addi a2, a2, 1365
211 ; RV64I-NEXT: and a1, a1, a2
212 ; RV64I-NEXT: subw a0, a0, a1
213 ; RV64I-NEXT: lui a1, 209715
214 ; RV64I-NEXT: addi a1, a1, 819
215 ; RV64I-NEXT: and a2, a0, a1
216 ; RV64I-NEXT: srliw a0, a0, 2
217 ; RV64I-NEXT: and a0, a0, a1
218 ; RV64I-NEXT: add a0, a2, a0
219 ; RV64I-NEXT: srliw a1, a0, 4
220 ; RV64I-NEXT: addw a0, a0, a1
221 ; RV64I-NEXT: lui a1, 61681
222 ; RV64I-NEXT: addiw a1, a1, -241
223 ; RV64I-NEXT: and a0, a0, a1
224 ; RV64I-NEXT: lui a1, 4112
225 ; RV64I-NEXT: addiw a1, a1, 257
226 ; RV64I-NEXT: call __muldi3@plt
227 ; RV64I-NEXT: srliw a0, a0, 24
228 ; RV64I-NEXT: xori a0, a0, 31
229 ; RV64I-NEXT: snez a1, s0
230 ; RV64I-NEXT: addiw a1, a1, -1
231 ; RV64I-NEXT: or a0, a1, a0
232 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
233 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
234 ; RV64I-NEXT: addi sp, sp, 16
237 ; RV64XTHEADBB-LABEL: findLastSet_i32:
238 ; RV64XTHEADBB: # %bb.0:
239 ; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0
240 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1
241 ; RV64XTHEADBB-NEXT: addiw a1, a1, -32
242 ; RV64XTHEADBB-NEXT: xori a1, a1, 31
243 ; RV64XTHEADBB-NEXT: snez a0, a0
244 ; RV64XTHEADBB-NEXT: addiw a0, a0, -1
245 ; RV64XTHEADBB-NEXT: or a0, a0, a1
246 ; RV64XTHEADBB-NEXT: ret
247 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
249 %3 = icmp eq i32 %a, 0
250 %4 = select i1 %3, i32 -1, i32 %2
254 define i32 @ctlz_lshr_i32(i32 signext %a) {
255 ; RV64I-LABEL: ctlz_lshr_i32:
257 ; RV64I-NEXT: srliw a0, a0, 1
258 ; RV64I-NEXT: beqz a0, .LBB4_2
259 ; RV64I-NEXT: # %bb.1: # %cond.false
260 ; RV64I-NEXT: addi sp, sp, -16
261 ; RV64I-NEXT: .cfi_def_cfa_offset 16
262 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
263 ; RV64I-NEXT: .cfi_offset ra, -8
264 ; RV64I-NEXT: srliw a1, a0, 1
265 ; RV64I-NEXT: or a0, a0, a1
266 ; RV64I-NEXT: srliw a1, a0, 2
267 ; RV64I-NEXT: or a0, a0, a1
268 ; RV64I-NEXT: srliw a1, a0, 4
269 ; RV64I-NEXT: or a0, a0, a1
270 ; RV64I-NEXT: srliw a1, a0, 8
271 ; RV64I-NEXT: or a0, a0, a1
272 ; RV64I-NEXT: srliw a1, a0, 16
273 ; RV64I-NEXT: or a0, a0, a1
274 ; RV64I-NEXT: not a0, a0
275 ; RV64I-NEXT: srliw a1, a0, 1
276 ; RV64I-NEXT: lui a2, 349525
277 ; RV64I-NEXT: addi a2, a2, 1365
278 ; RV64I-NEXT: and a1, a1, a2
279 ; RV64I-NEXT: subw a0, a0, a1
280 ; RV64I-NEXT: lui a1, 209715
281 ; RV64I-NEXT: addi a1, a1, 819
282 ; RV64I-NEXT: and a2, a0, a1
283 ; RV64I-NEXT: srliw a0, a0, 2
284 ; RV64I-NEXT: and a0, a0, a1
285 ; RV64I-NEXT: add a0, a2, a0
286 ; RV64I-NEXT: srliw a1, a0, 4
287 ; RV64I-NEXT: addw a0, a0, a1
288 ; RV64I-NEXT: lui a1, 61681
289 ; RV64I-NEXT: addiw a1, a1, -241
290 ; RV64I-NEXT: and a0, a0, a1
291 ; RV64I-NEXT: lui a1, 4112
292 ; RV64I-NEXT: addiw a1, a1, 257
293 ; RV64I-NEXT: call __muldi3@plt
294 ; RV64I-NEXT: srliw a0, a0, 24
295 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
296 ; RV64I-NEXT: addi sp, sp, 16
298 ; RV64I-NEXT: .LBB4_2:
299 ; RV64I-NEXT: li a0, 32
302 ; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
303 ; RV64XTHEADBB: # %bb.0:
304 ; RV64XTHEADBB-NEXT: srliw a0, a0, 1
305 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
306 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
307 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
308 ; RV64XTHEADBB-NEXT: addi a0, a0, -32
309 ; RV64XTHEADBB-NEXT: ret
311 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
315 declare i64 @llvm.ctlz.i64(i64, i1)
317 define i64 @ctlz_i64(i64 %a) nounwind {
318 ; RV64I-LABEL: ctlz_i64:
320 ; RV64I-NEXT: beqz a0, .LBB5_2
321 ; RV64I-NEXT: # %bb.1: # %cond.false
322 ; RV64I-NEXT: addi sp, sp, -16
323 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
324 ; RV64I-NEXT: srli a1, a0, 1
325 ; RV64I-NEXT: or a0, a0, a1
326 ; RV64I-NEXT: srli a1, a0, 2
327 ; RV64I-NEXT: or a0, a0, a1
328 ; RV64I-NEXT: srli a1, a0, 4
329 ; RV64I-NEXT: or a0, a0, a1
330 ; RV64I-NEXT: srli a1, a0, 8
331 ; RV64I-NEXT: or a0, a0, a1
332 ; RV64I-NEXT: srli a1, a0, 16
333 ; RV64I-NEXT: or a0, a0, a1
334 ; RV64I-NEXT: srli a1, a0, 32
335 ; RV64I-NEXT: or a0, a0, a1
336 ; RV64I-NEXT: not a0, a0
337 ; RV64I-NEXT: srli a1, a0, 1
338 ; RV64I-NEXT: lui a2, 349525
339 ; RV64I-NEXT: addiw a2, a2, 1365
340 ; RV64I-NEXT: slli a3, a2, 32
341 ; RV64I-NEXT: add a2, a2, a3
342 ; RV64I-NEXT: and a1, a1, a2
343 ; RV64I-NEXT: sub a0, a0, a1
344 ; RV64I-NEXT: lui a1, 209715
345 ; RV64I-NEXT: addiw a1, a1, 819
346 ; RV64I-NEXT: slli a2, a1, 32
347 ; RV64I-NEXT: add a1, a1, a2
348 ; RV64I-NEXT: and a2, a0, a1
349 ; RV64I-NEXT: srli a0, a0, 2
350 ; RV64I-NEXT: and a0, a0, a1
351 ; RV64I-NEXT: add a0, a2, a0
352 ; RV64I-NEXT: srli a1, a0, 4
353 ; RV64I-NEXT: add a0, a0, a1
354 ; RV64I-NEXT: lui a1, 61681
355 ; RV64I-NEXT: addiw a1, a1, -241
356 ; RV64I-NEXT: slli a2, a1, 32
357 ; RV64I-NEXT: add a1, a1, a2
358 ; RV64I-NEXT: and a0, a0, a1
359 ; RV64I-NEXT: lui a1, 4112
360 ; RV64I-NEXT: addiw a1, a1, 257
361 ; RV64I-NEXT: slli a2, a1, 32
362 ; RV64I-NEXT: add a1, a1, a2
363 ; RV64I-NEXT: call __muldi3@plt
364 ; RV64I-NEXT: srli a0, a0, 56
365 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
366 ; RV64I-NEXT: addi sp, sp, 16
368 ; RV64I-NEXT: .LBB5_2:
369 ; RV64I-NEXT: li a0, 64
372 ; RV64XTHEADBB-LABEL: ctlz_i64:
373 ; RV64XTHEADBB: # %bb.0:
374 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
375 ; RV64XTHEADBB-NEXT: ret
376 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
380 declare i32 @llvm.cttz.i32(i32, i1)
382 define signext i32 @cttz_i32(i32 signext %a) nounwind {
383 ; RV64I-LABEL: cttz_i32:
385 ; RV64I-NEXT: beqz a0, .LBB6_2
386 ; RV64I-NEXT: # %bb.1: # %cond.false
387 ; RV64I-NEXT: addi sp, sp, -16
388 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
389 ; RV64I-NEXT: negw a1, a0
390 ; RV64I-NEXT: and a0, a0, a1
391 ; RV64I-NEXT: lui a1, 30667
392 ; RV64I-NEXT: addiw a1, a1, 1329
393 ; RV64I-NEXT: call __muldi3@plt
394 ; RV64I-NEXT: srliw a0, a0, 27
395 ; RV64I-NEXT: slli a0, a0, 32
396 ; RV64I-NEXT: srli a0, a0, 32
397 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
398 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
399 ; RV64I-NEXT: add a0, a1, a0
400 ; RV64I-NEXT: lbu a0, 0(a0)
401 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
402 ; RV64I-NEXT: addi sp, sp, 16
404 ; RV64I-NEXT: .LBB6_2:
405 ; RV64I-NEXT: li a0, 32
408 ; RV64XTHEADBB-LABEL: cttz_i32:
409 ; RV64XTHEADBB: # %bb.0:
410 ; RV64XTHEADBB-NEXT: beqz a0, .LBB6_2
411 ; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
412 ; RV64XTHEADBB-NEXT: addi sp, sp, -16
413 ; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
414 ; RV64XTHEADBB-NEXT: negw a1, a0
415 ; RV64XTHEADBB-NEXT: and a0, a0, a1
416 ; RV64XTHEADBB-NEXT: lui a1, 30667
417 ; RV64XTHEADBB-NEXT: addiw a1, a1, 1329
418 ; RV64XTHEADBB-NEXT: call __muldi3@plt
419 ; RV64XTHEADBB-NEXT: srliw a0, a0, 27
420 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
421 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
422 ; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI6_0)
423 ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI6_0)
424 ; RV64XTHEADBB-NEXT: add a0, a1, a0
425 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
426 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
427 ; RV64XTHEADBB-NEXT: addi sp, sp, 16
428 ; RV64XTHEADBB-NEXT: ret
429 ; RV64XTHEADBB-NEXT: .LBB6_2:
430 ; RV64XTHEADBB-NEXT: li a0, 32
431 ; RV64XTHEADBB-NEXT: ret
432 ; RV64ZBB-LABEL: cttz_i32:
434 ; RV64ZBB-NEXT: ctzw a0, a0
436 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
440 define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
441 ; RV64I-LABEL: cttz_zero_undef_i32:
443 ; RV64I-NEXT: addi sp, sp, -16
444 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
445 ; RV64I-NEXT: negw a1, a0
446 ; RV64I-NEXT: and a0, a0, a1
447 ; RV64I-NEXT: lui a1, 30667
448 ; RV64I-NEXT: addiw a1, a1, 1329
449 ; RV64I-NEXT: call __muldi3@plt
450 ; RV64I-NEXT: srliw a0, a0, 27
451 ; RV64I-NEXT: slli a0, a0, 32
452 ; RV64I-NEXT: srli a0, a0, 32
453 ; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
454 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
455 ; RV64I-NEXT: add a0, a1, a0
456 ; RV64I-NEXT: lbu a0, 0(a0)
457 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
458 ; RV64I-NEXT: addi sp, sp, 16
461 ; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
462 ; RV64XTHEADBB: # %bb.0:
463 ; RV64XTHEADBB-NEXT: addi sp, sp, -16
464 ; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
465 ; RV64XTHEADBB-NEXT: negw a1, a0
466 ; RV64XTHEADBB-NEXT: and a0, a0, a1
467 ; RV64XTHEADBB-NEXT: lui a1, 30667
468 ; RV64XTHEADBB-NEXT: addiw a1, a1, 1329
469 ; RV64XTHEADBB-NEXT: call __muldi3@plt
470 ; RV64XTHEADBB-NEXT: srliw a0, a0, 27
471 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
472 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
473 ; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI7_0)
474 ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI7_0)
475 ; RV64XTHEADBB-NEXT: add a0, a1, a0
476 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
477 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
478 ; RV64XTHEADBB-NEXT: addi sp, sp, 16
479 ; RV64XTHEADBB-NEXT: ret
480 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
484 define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
485 ; RV64I-LABEL: findFirstSet_i32:
487 ; RV64I-NEXT: addi sp, sp, -16
488 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
489 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
490 ; RV64I-NEXT: mv s0, a0
491 ; RV64I-NEXT: negw a0, a0
492 ; RV64I-NEXT: and a0, s0, a0
493 ; RV64I-NEXT: lui a1, 30667
494 ; RV64I-NEXT: addiw a1, a1, 1329
495 ; RV64I-NEXT: call __muldi3@plt
496 ; RV64I-NEXT: srliw a0, a0, 27
497 ; RV64I-NEXT: slli a0, a0, 32
498 ; RV64I-NEXT: srli a0, a0, 32
499 ; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
500 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
501 ; RV64I-NEXT: add a0, a1, a0
502 ; RV64I-NEXT: lbu a0, 0(a0)
503 ; RV64I-NEXT: snez a1, s0
504 ; RV64I-NEXT: addi a1, a1, -1
505 ; RV64I-NEXT: or a0, a1, a0
506 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
507 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
508 ; RV64I-NEXT: addi sp, sp, 16
511 ; RV64XTHEADBB-LABEL: findFirstSet_i32:
512 ; RV64XTHEADBB: # %bb.0:
513 ; RV64XTHEADBB-NEXT: addi sp, sp, -16
514 ; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
515 ; RV64XTHEADBB-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
516 ; RV64XTHEADBB-NEXT: mv s0, a0
517 ; RV64XTHEADBB-NEXT: negw a0, a0
518 ; RV64XTHEADBB-NEXT: and a0, s0, a0
519 ; RV64XTHEADBB-NEXT: lui a1, 30667
520 ; RV64XTHEADBB-NEXT: addiw a1, a1, 1329
521 ; RV64XTHEADBB-NEXT: call __muldi3@plt
522 ; RV64XTHEADBB-NEXT: srliw a0, a0, 27
523 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
524 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
525 ; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI8_0)
526 ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI8_0)
527 ; RV64XTHEADBB-NEXT: add a0, a1, a0
528 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
529 ; RV64XTHEADBB-NEXT: snez a1, s0
530 ; RV64XTHEADBB-NEXT: addi a1, a1, -1
531 ; RV64XTHEADBB-NEXT: or a0, a1, a0
532 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
533 ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
534 ; RV64XTHEADBB-NEXT: addi sp, sp, 16
535 ; RV64XTHEADBB-NEXT: ret
536 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
537 %2 = icmp eq i32 %a, 0
538 %3 = select i1 %2, i32 -1, i32 %1
542 define signext i32 @ffs_i32(i32 signext %a) nounwind {
543 ; RV64I-LABEL: ffs_i32:
545 ; RV64I-NEXT: addi sp, sp, -16
546 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
547 ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
548 ; RV64I-NEXT: mv s0, a0
549 ; RV64I-NEXT: negw a0, a0
550 ; RV64I-NEXT: and a0, s0, a0
551 ; RV64I-NEXT: lui a1, 30667
552 ; RV64I-NEXT: addiw a1, a1, 1329
553 ; RV64I-NEXT: call __muldi3@plt
554 ; RV64I-NEXT: srliw a0, a0, 27
555 ; RV64I-NEXT: slli a0, a0, 32
556 ; RV64I-NEXT: srli a0, a0, 32
557 ; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
558 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
559 ; RV64I-NEXT: add a0, a1, a0
560 ; RV64I-NEXT: lbu a0, 0(a0)
561 ; RV64I-NEXT: addi a0, a0, 1
562 ; RV64I-NEXT: seqz a1, s0
563 ; RV64I-NEXT: addi a1, a1, -1
564 ; RV64I-NEXT: and a0, a1, a0
565 ; RV64I-NEXT: slli a0, a0, 32
566 ; RV64I-NEXT: srli a0, a0, 32
567 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
568 ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
569 ; RV64I-NEXT: addi sp, sp, 16
572 ; RV64XTHEADBB-LABEL: ffs_i32:
573 ; RV64XTHEADBB: # %bb.0:
574 ; RV64XTHEADBB-NEXT: addi sp, sp, -16
575 ; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
576 ; RV64XTHEADBB-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
577 ; RV64XTHEADBB-NEXT: mv s0, a0
578 ; RV64XTHEADBB-NEXT: negw a0, a0
579 ; RV64XTHEADBB-NEXT: and a0, s0, a0
580 ; RV64XTHEADBB-NEXT: lui a1, 30667
581 ; RV64XTHEADBB-NEXT: addiw a1, a1, 1329
582 ; RV64XTHEADBB-NEXT: call __muldi3@plt
583 ; RV64XTHEADBB-NEXT: srliw a0, a0, 27
584 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
585 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
586 ; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI9_0)
587 ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI9_0)
588 ; RV64XTHEADBB-NEXT: add a0, a1, a0
589 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
590 ; RV64XTHEADBB-NEXT: addi a0, a0, 1
591 ; RV64XTHEADBB-NEXT: seqz a1, s0
592 ; RV64XTHEADBB-NEXT: addi a1, a1, -1
593 ; RV64XTHEADBB-NEXT: and a0, a1, a0
594 ; RV64XTHEADBB-NEXT: slli a0, a0, 32
595 ; RV64XTHEADBB-NEXT: srli a0, a0, 32
596 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
597 ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
598 ; RV64XTHEADBB-NEXT: addi sp, sp, 16
599 ; RV64XTHEADBB-NEXT: ret
600 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
602 %3 = icmp eq i32 %a, 0
603 %4 = select i1 %3, i32 0, i32 %2
607 declare i64 @llvm.cttz.i64(i64, i1)
609 define i64 @cttz_i64(i64 %a) nounwind {
610 ; RV64I-LABEL: cttz_i64:
612 ; RV64I-NEXT: beqz a0, .LBB10_2
613 ; RV64I-NEXT: # %bb.1: # %cond.false
614 ; RV64I-NEXT: addi sp, sp, -16
615 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
616 ; RV64I-NEXT: neg a1, a0
617 ; RV64I-NEXT: and a0, a0, a1
618 ; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
619 ; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
620 ; RV64I-NEXT: call __muldi3@plt
621 ; RV64I-NEXT: srli a0, a0, 58
622 ; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
623 ; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
624 ; RV64I-NEXT: add a0, a1, a0
625 ; RV64I-NEXT: lbu a0, 0(a0)
626 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
627 ; RV64I-NEXT: addi sp, sp, 16
629 ; RV64I-NEXT: .LBB10_2:
630 ; RV64I-NEXT: li a0, 64
633 ; RV64XTHEADBB-LABEL: cttz_i64:
634 ; RV64XTHEADBB: # %bb.0:
635 ; RV64XTHEADBB-NEXT: beqz a0, .LBB10_2
636 ; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
637 ; RV64XTHEADBB-NEXT: addi a1, a0, -1
638 ; RV64XTHEADBB-NEXT: not a0, a0
639 ; RV64XTHEADBB-NEXT: and a0, a0, a1
640 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0
641 ; RV64XTHEADBB-NEXT: li a1, 64
642 ; RV64XTHEADBB-NEXT: sub a0, a1, a0
643 ; RV64XTHEADBB-NEXT: ret
644 ; RV64XTHEADBB-NEXT: .LBB10_2:
645 ; RV64XTHEADBB-NEXT: li a0, 64
646 ; RV64XTHEADBB-NEXT: ret
647 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
651 define signext i32 @sextb_i32(i32 signext %a) nounwind {
652 ; RV64I-LABEL: sextb_i32:
654 ; RV64I-NEXT: slli a0, a0, 56
655 ; RV64I-NEXT: srai a0, a0, 56
658 ; RV64XTHEADBB-LABEL: sextb_i32:
659 ; RV64XTHEADBB: # %bb.0:
660 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
661 ; RV64XTHEADBB-NEXT: ret
662 %shl = shl i32 %a, 24
663 %shr = ashr exact i32 %shl, 24
667 define i64 @sextb_i64(i64 %a) nounwind {
668 ; RV64I-LABEL: sextb_i64:
670 ; RV64I-NEXT: slli a0, a0, 56
671 ; RV64I-NEXT: srai a0, a0, 56
674 ; RV64XTHEADBB-LABEL: sextb_i64:
675 ; RV64XTHEADBB: # %bb.0:
676 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
677 ; RV64XTHEADBB-NEXT: ret
678 %shl = shl i64 %a, 56
679 %shr = ashr exact i64 %shl, 56
683 define signext i32 @sexth_i32(i32 signext %a) nounwind {
684 ; RV64I-LABEL: sexth_i32:
686 ; RV64I-NEXT: slli a0, a0, 48
687 ; RV64I-NEXT: srai a0, a0, 48
690 ; RV64XTHEADBB-LABEL: sexth_i32:
691 ; RV64XTHEADBB: # %bb.0:
692 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
693 ; RV64XTHEADBB-NEXT: ret
694 %shl = shl i32 %a, 16
695 %shr = ashr exact i32 %shl, 16
699 define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
700 ; RV64I-LABEL: no_sexth_i32:
702 ; RV64I-NEXT: slli a0, a0, 17
703 ; RV64I-NEXT: sraiw a0, a0, 16
706 ; RV64XTHEADBB-LABEL: no_sexth_i32:
707 ; RV64XTHEADBB: # %bb.0:
708 ; RV64XTHEADBB-NEXT: slli a0, a0, 17
709 ; RV64XTHEADBB-NEXT: sraiw a0, a0, 16
710 ; RV64XTHEADBB-NEXT: ret
711 %shl = shl i32 %a, 17
712 %shr = ashr exact i32 %shl, 16
716 define i64 @sexth_i64(i64 %a) nounwind {
717 ; RV64I-LABEL: sexth_i64:
719 ; RV64I-NEXT: slli a0, a0, 48
720 ; RV64I-NEXT: srai a0, a0, 48
723 ; RV64XTHEADBB-LABEL: sexth_i64:
724 ; RV64XTHEADBB: # %bb.0:
725 ; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
726 ; RV64XTHEADBB-NEXT: ret
727 %shl = shl i64 %a, 48
728 %shr = ashr exact i64 %shl, 48
732 define i64 @no_sexth_i64(i64 %a) nounwind {
733 ; RV64I-LABEL: no_sexth_i64:
735 ; RV64I-NEXT: slli a0, a0, 49
736 ; RV64I-NEXT: srai a0, a0, 48
739 ; RV64XTHEADBB-LABEL: no_sexth_i64:
740 ; RV64XTHEADBB: # %bb.0:
741 ; RV64XTHEADBB-NEXT: slli a0, a0, 49
742 ; RV64XTHEADBB-NEXT: srai a0, a0, 48
743 ; RV64XTHEADBB-NEXT: ret
744 %shl = shl i64 %a, 49
745 %shr = ashr exact i64 %shl, 48
749 define i32 @zexth_i32(i32 %a) nounwind {
750 ; RV64I-LABEL: zexth_i32:
752 ; RV64I-NEXT: lui a1, 16
753 ; RV64I-NEXT: addiw a1, a1, -1
754 ; RV64I-NEXT: and a0, a0, a1
757 ; RV64XTHEADBB-LABEL: zexth_i32:
758 ; RV64XTHEADBB: # %bb.0:
759 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
760 ; RV64XTHEADBB-NEXT: ret
761 %and = and i32 %a, 65535
765 define i64 @zexth_i64(i64 %a) nounwind {
766 ; RV64I-LABEL: zexth_i64:
768 ; RV64I-NEXT: slli a0, a0, 48
769 ; RV64I-NEXT: srli a0, a0, 48
772 ; RV64XTHEADBB-LABEL: zexth_i64:
773 ; RV64XTHEADBB: # %bb.0:
774 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
775 ; RV64XTHEADBB-NEXT: ret
776 %and = and i64 %a, 65535
780 define i64 @zext_bf_i64(i64 %a) nounwind {
781 ; RV64I-LABEL: zext_bf_i64:
783 ; RV64I-NEXT: slli a0, a0, 47
784 ; RV64I-NEXT: srli a0, a0, 48
787 ; RV64XTHEADBB-LABEL: zext_bf_i64:
788 ; RV64XTHEADBB: # %bb.0:
789 ; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1
790 ; RV64XTHEADBB-NEXT: ret
792 %and = and i64 %1, 65535
796 define i64 @zext_i64_srliw(i64 %a) nounwind {
797 ; RV64I-LABEL: zext_i64_srliw:
799 ; RV64I-NEXT: srliw a0, a0, 16
802 ; RV64XTHEADBB-LABEL: zext_i64_srliw:
803 ; RV64XTHEADBB: # %bb.0:
804 ; RV64XTHEADBB-NEXT: srliw a0, a0, 16
805 ; RV64XTHEADBB-NEXT: ret
807 %and = and i64 %1, 65535
811 declare i32 @llvm.bswap.i32(i32)
813 define signext i32 @bswap_i32(i32 signext %a) nounwind {
814 ; RV64I-LABEL: bswap_i32:
816 ; RV64I-NEXT: srliw a1, a0, 8
817 ; RV64I-NEXT: lui a2, 16
818 ; RV64I-NEXT: addiw a2, a2, -256
819 ; RV64I-NEXT: and a1, a1, a2
820 ; RV64I-NEXT: srliw a3, a0, 24
821 ; RV64I-NEXT: or a1, a1, a3
822 ; RV64I-NEXT: and a2, a0, a2
823 ; RV64I-NEXT: slliw a2, a2, 8
824 ; RV64I-NEXT: slliw a0, a0, 24
825 ; RV64I-NEXT: or a0, a0, a2
826 ; RV64I-NEXT: or a0, a0, a1
829 ; RV64XTHEADBB-LABEL: bswap_i32:
830 ; RV64XTHEADBB: # %bb.0:
831 ; RV64XTHEADBB-NEXT: th.revw a0, a0
832 ; RV64XTHEADBB-NEXT: ret
833 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
837 ; Similar to bswap_i32 but the result is not sign extended.
838 define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
839 ; RV64I-LABEL: bswap_i32_nosext:
841 ; RV64I-NEXT: srliw a2, a0, 8
842 ; RV64I-NEXT: lui a3, 16
843 ; RV64I-NEXT: addi a3, a3, -256
844 ; RV64I-NEXT: and a2, a2, a3
845 ; RV64I-NEXT: srliw a4, a0, 24
846 ; RV64I-NEXT: or a2, a2, a4
847 ; RV64I-NEXT: and a3, a0, a3
848 ; RV64I-NEXT: slli a3, a3, 8
849 ; RV64I-NEXT: slli a0, a0, 24
850 ; RV64I-NEXT: or a0, a0, a3
851 ; RV64I-NEXT: or a0, a0, a2
852 ; RV64I-NEXT: sw a0, 0(a1)
855 ; RV64XTHEADBB-LABEL: bswap_i32_nosext:
856 ; RV64XTHEADBB: # %bb.0:
857 ; RV64XTHEADBB-NEXT: th.revw a0, a0
858 ; RV64XTHEADBB-NEXT: sw a0, 0(a1)
859 ; RV64XTHEADBB-NEXT: ret
860 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
865 declare i64 @llvm.bswap.i64(i64)
867 define i64 @bswap_i64(i64 %a) {
868 ; RV64I-LABEL: bswap_i64:
870 ; RV64I-NEXT: srli a1, a0, 40
871 ; RV64I-NEXT: lui a2, 16
872 ; RV64I-NEXT: addiw a2, a2, -256
873 ; RV64I-NEXT: and a1, a1, a2
874 ; RV64I-NEXT: srli a3, a0, 56
875 ; RV64I-NEXT: or a1, a1, a3
876 ; RV64I-NEXT: srli a3, a0, 24
877 ; RV64I-NEXT: lui a4, 4080
878 ; RV64I-NEXT: and a3, a3, a4
879 ; RV64I-NEXT: srli a5, a0, 8
880 ; RV64I-NEXT: srliw a5, a5, 24
881 ; RV64I-NEXT: slli a5, a5, 24
882 ; RV64I-NEXT: or a3, a5, a3
883 ; RV64I-NEXT: or a1, a3, a1
884 ; RV64I-NEXT: and a4, a0, a4
885 ; RV64I-NEXT: slli a4, a4, 24
886 ; RV64I-NEXT: srliw a3, a0, 24
887 ; RV64I-NEXT: slli a3, a3, 32
888 ; RV64I-NEXT: or a3, a4, a3
889 ; RV64I-NEXT: and a2, a0, a2
890 ; RV64I-NEXT: slli a2, a2, 40
891 ; RV64I-NEXT: slli a0, a0, 56
892 ; RV64I-NEXT: or a0, a0, a2
893 ; RV64I-NEXT: or a0, a0, a3
894 ; RV64I-NEXT: or a0, a0, a1
897 ; RV64XTHEADBB-LABEL: bswap_i64:
898 ; RV64XTHEADBB: # %bb.0:
899 ; RV64XTHEADBB-NEXT: th.rev a0, a0
900 ; RV64XTHEADBB-NEXT: ret
901 %1 = call i64 @llvm.bswap.i64(i64 %a)