1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefixes=RV32I
4 ; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define i32 @ctlz_i32(i32 %a) nounwind {
10 ; RV32I-LABEL: ctlz_i32:
12 ; RV32I-NEXT: beqz a0, .LBB0_2
13 ; RV32I-NEXT: # %bb.1: # %cond.false
14 ; RV32I-NEXT: srli a1, a0, 1
15 ; RV32I-NEXT: or a0, a0, a1
16 ; RV32I-NEXT: srli a1, a0, 2
17 ; RV32I-NEXT: or a0, a0, a1
18 ; RV32I-NEXT: srli a1, a0, 4
19 ; RV32I-NEXT: or a0, a0, a1
20 ; RV32I-NEXT: srli a1, a0, 8
21 ; RV32I-NEXT: or a0, a0, a1
22 ; RV32I-NEXT: srli a1, a0, 16
23 ; RV32I-NEXT: or a0, a0, a1
24 ; RV32I-NEXT: not a0, a0
25 ; RV32I-NEXT: srli a1, a0, 1
26 ; RV32I-NEXT: lui a2, 349525
27 ; RV32I-NEXT: addi a2, a2, 1365
28 ; RV32I-NEXT: and a1, a1, a2
29 ; RV32I-NEXT: sub a0, a0, a1
30 ; RV32I-NEXT: lui a1, 209715
31 ; RV32I-NEXT: addi a1, a1, 819
32 ; RV32I-NEXT: and a2, a0, a1
33 ; RV32I-NEXT: srli a0, a0, 2
34 ; RV32I-NEXT: and a0, a0, a1
35 ; RV32I-NEXT: add a0, a2, a0
36 ; RV32I-NEXT: srli a1, a0, 4
37 ; RV32I-NEXT: add a0, a0, a1
38 ; RV32I-NEXT: lui a1, 61681
39 ; RV32I-NEXT: addi a1, a1, -241
40 ; RV32I-NEXT: and a0, a0, a1
41 ; RV32I-NEXT: slli a1, a0, 8
42 ; RV32I-NEXT: add a0, a0, a1
43 ; RV32I-NEXT: slli a1, a0, 16
44 ; RV32I-NEXT: add a0, a0, a1
45 ; RV32I-NEXT: srli a0, a0, 24
47 ; RV32I-NEXT: .LBB0_2:
48 ; RV32I-NEXT: li a0, 32
51 ; RV32XTHEADBB-LABEL: ctlz_i32:
52 ; RV32XTHEADBB: # %bb.0:
53 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
54 ; RV32XTHEADBB-NEXT: ret
55 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
59 declare i64 @llvm.ctlz.i64(i64, i1)
61 define i64 @ctlz_i64(i64 %a) nounwind {
62 ; RV32I-LABEL: ctlz_i64:
64 ; RV32I-NEXT: lui a2, 349525
65 ; RV32I-NEXT: addi a4, a2, 1365
66 ; RV32I-NEXT: lui a2, 209715
67 ; RV32I-NEXT: addi a3, a2, 819
68 ; RV32I-NEXT: lui a2, 61681
69 ; RV32I-NEXT: addi a2, a2, -241
70 ; RV32I-NEXT: bnez a1, .LBB1_2
71 ; RV32I-NEXT: # %bb.1:
72 ; RV32I-NEXT: srli a1, a0, 1
73 ; RV32I-NEXT: or a0, a0, a1
74 ; RV32I-NEXT: srli a1, a0, 2
75 ; RV32I-NEXT: or a0, a0, a1
76 ; RV32I-NEXT: srli a1, a0, 4
77 ; RV32I-NEXT: or a0, a0, a1
78 ; RV32I-NEXT: srli a1, a0, 8
79 ; RV32I-NEXT: or a0, a0, a1
80 ; RV32I-NEXT: srli a1, a0, 16
81 ; RV32I-NEXT: or a0, a0, a1
82 ; RV32I-NEXT: not a0, a0
83 ; RV32I-NEXT: srli a1, a0, 1
84 ; RV32I-NEXT: and a1, a1, a4
85 ; RV32I-NEXT: sub a0, a0, a1
86 ; RV32I-NEXT: and a1, a0, a3
87 ; RV32I-NEXT: srli a0, a0, 2
88 ; RV32I-NEXT: and a0, a0, a3
89 ; RV32I-NEXT: add a0, a1, a0
90 ; RV32I-NEXT: srli a1, a0, 4
91 ; RV32I-NEXT: add a0, a0, a1
92 ; RV32I-NEXT: and a0, a0, a2
93 ; RV32I-NEXT: slli a1, a0, 8
94 ; RV32I-NEXT: add a0, a0, a1
95 ; RV32I-NEXT: slli a1, a0, 16
96 ; RV32I-NEXT: add a0, a0, a1
97 ; RV32I-NEXT: srli a0, a0, 24
98 ; RV32I-NEXT: addi a0, a0, 32
99 ; RV32I-NEXT: li a1, 0
101 ; RV32I-NEXT: .LBB1_2:
102 ; RV32I-NEXT: srli a0, a1, 1
103 ; RV32I-NEXT: or a0, a1, a0
104 ; RV32I-NEXT: srli a1, a0, 2
105 ; RV32I-NEXT: or a0, a0, a1
106 ; RV32I-NEXT: srli a1, a0, 4
107 ; RV32I-NEXT: or a0, a0, a1
108 ; RV32I-NEXT: srli a1, a0, 8
109 ; RV32I-NEXT: or a0, a0, a1
110 ; RV32I-NEXT: srli a1, a0, 16
111 ; RV32I-NEXT: or a0, a0, a1
112 ; RV32I-NEXT: not a0, a0
113 ; RV32I-NEXT: srli a1, a0, 1
114 ; RV32I-NEXT: and a1, a1, a4
115 ; RV32I-NEXT: sub a0, a0, a1
116 ; RV32I-NEXT: and a1, a0, a3
117 ; RV32I-NEXT: srli a0, a0, 2
118 ; RV32I-NEXT: and a0, a0, a3
119 ; RV32I-NEXT: add a0, a1, a0
120 ; RV32I-NEXT: srli a1, a0, 4
121 ; RV32I-NEXT: add a0, a0, a1
122 ; RV32I-NEXT: and a0, a0, a2
123 ; RV32I-NEXT: slli a1, a0, 8
124 ; RV32I-NEXT: add a0, a0, a1
125 ; RV32I-NEXT: slli a1, a0, 16
126 ; RV32I-NEXT: add a0, a0, a1
127 ; RV32I-NEXT: srli a0, a0, 24
128 ; RV32I-NEXT: li a1, 0
131 ; RV32XTHEADBB-LABEL: ctlz_i64:
132 ; RV32XTHEADBB: # %bb.0:
133 ; RV32XTHEADBB-NEXT: bnez a1, .LBB1_2
134 ; RV32XTHEADBB-NEXT: # %bb.1:
135 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
136 ; RV32XTHEADBB-NEXT: addi a0, a0, 32
137 ; RV32XTHEADBB-NEXT: li a1, 0
138 ; RV32XTHEADBB-NEXT: ret
139 ; RV32XTHEADBB-NEXT: .LBB1_2:
140 ; RV32XTHEADBB-NEXT: th.ff1 a0, a1
141 ; RV32XTHEADBB-NEXT: li a1, 0
142 ; RV32XTHEADBB-NEXT: ret
143 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
147 declare i32 @llvm.cttz.i32(i32, i1)
149 define i32 @cttz_i32(i32 %a) nounwind {
150 ; RV32I-LABEL: cttz_i32:
152 ; RV32I-NEXT: beqz a0, .LBB2_2
153 ; RV32I-NEXT: # %bb.1: # %cond.false
154 ; RV32I-NEXT: addi sp, sp, -16
155 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
156 ; RV32I-NEXT: neg a1, a0
157 ; RV32I-NEXT: and a0, a0, a1
158 ; RV32I-NEXT: lui a1, 30667
159 ; RV32I-NEXT: addi a1, a1, 1329
160 ; RV32I-NEXT: call __mulsi3
161 ; RV32I-NEXT: srli a0, a0, 27
162 ; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
163 ; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
164 ; RV32I-NEXT: add a0, a1, a0
165 ; RV32I-NEXT: lbu a0, 0(a0)
166 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
167 ; RV32I-NEXT: addi sp, sp, 16
169 ; RV32I-NEXT: .LBB2_2:
170 ; RV32I-NEXT: li a0, 32
173 ; RV32XTHEADBB-LABEL: cttz_i32:
174 ; RV32XTHEADBB: # %bb.0:
175 ; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2
176 ; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
177 ; RV32XTHEADBB-NEXT: addi a1, a0, -1
178 ; RV32XTHEADBB-NEXT: not a0, a0
179 ; RV32XTHEADBB-NEXT: and a0, a0, a1
180 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
181 ; RV32XTHEADBB-NEXT: li a1, 32
182 ; RV32XTHEADBB-NEXT: sub a0, a1, a0
183 ; RV32XTHEADBB-NEXT: ret
184 ; RV32XTHEADBB-NEXT: .LBB2_2:
185 ; RV32XTHEADBB-NEXT: li a0, 32
186 ; RV32XTHEADBB-NEXT: ret
187 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
191 declare i64 @llvm.cttz.i64(i64, i1)
193 define i64 @cttz_i64(i64 %a) nounwind {
194 ; RV32I-LABEL: cttz_i64:
196 ; RV32I-NEXT: addi sp, sp, -32
197 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
198 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
199 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
200 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
201 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
202 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
203 ; RV32I-NEXT: mv s2, a1
204 ; RV32I-NEXT: mv s0, a0
205 ; RV32I-NEXT: neg a0, a0
206 ; RV32I-NEXT: and a0, s0, a0
207 ; RV32I-NEXT: lui a1, 30667
208 ; RV32I-NEXT: addi s3, a1, 1329
209 ; RV32I-NEXT: mv a1, s3
210 ; RV32I-NEXT: call __mulsi3
211 ; RV32I-NEXT: mv s1, a0
212 ; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
213 ; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
214 ; RV32I-NEXT: neg a0, s2
215 ; RV32I-NEXT: and a0, s2, a0
216 ; RV32I-NEXT: mv a1, s3
217 ; RV32I-NEXT: call __mulsi3
218 ; RV32I-NEXT: bnez s2, .LBB3_3
219 ; RV32I-NEXT: # %bb.1:
220 ; RV32I-NEXT: li a0, 32
221 ; RV32I-NEXT: beqz s0, .LBB3_4
222 ; RV32I-NEXT: .LBB3_2:
223 ; RV32I-NEXT: srli s1, s1, 27
224 ; RV32I-NEXT: add s1, s4, s1
225 ; RV32I-NEXT: lbu a0, 0(s1)
226 ; RV32I-NEXT: j .LBB3_5
227 ; RV32I-NEXT: .LBB3_3:
228 ; RV32I-NEXT: srli a0, a0, 27
229 ; RV32I-NEXT: add a0, s4, a0
230 ; RV32I-NEXT: lbu a0, 0(a0)
231 ; RV32I-NEXT: bnez s0, .LBB3_2
232 ; RV32I-NEXT: .LBB3_4:
233 ; RV32I-NEXT: addi a0, a0, 32
234 ; RV32I-NEXT: .LBB3_5:
235 ; RV32I-NEXT: li a1, 0
236 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
237 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
238 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
239 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
240 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
241 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
242 ; RV32I-NEXT: addi sp, sp, 32
245 ; RV32XTHEADBB-LABEL: cttz_i64:
246 ; RV32XTHEADBB: # %bb.0:
247 ; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2
248 ; RV32XTHEADBB-NEXT: # %bb.1:
249 ; RV32XTHEADBB-NEXT: addi a0, a1, -1
250 ; RV32XTHEADBB-NEXT: not a1, a1
251 ; RV32XTHEADBB-NEXT: and a0, a1, a0
252 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
253 ; RV32XTHEADBB-NEXT: li a1, 64
254 ; RV32XTHEADBB-NEXT: j .LBB3_3
255 ; RV32XTHEADBB-NEXT: .LBB3_2:
256 ; RV32XTHEADBB-NEXT: addi a1, a0, -1
257 ; RV32XTHEADBB-NEXT: not a0, a0
258 ; RV32XTHEADBB-NEXT: and a0, a0, a1
259 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
260 ; RV32XTHEADBB-NEXT: li a1, 32
261 ; RV32XTHEADBB-NEXT: .LBB3_3:
262 ; RV32XTHEADBB-NEXT: sub a0, a1, a0
263 ; RV32XTHEADBB-NEXT: li a1, 0
264 ; RV32XTHEADBB-NEXT: ret
265 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
269 define i32 @sextb_i32(i32 %a) nounwind {
270 ; RV32I-LABEL: sextb_i32:
272 ; RV32I-NEXT: slli a0, a0, 24
273 ; RV32I-NEXT: srai a0, a0, 24
276 ; RV32XTHEADBB-LABEL: sextb_i32:
277 ; RV32XTHEADBB: # %bb.0:
278 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
279 ; RV32XTHEADBB-NEXT: ret
280 %shl = shl i32 %a, 24
281 %shr = ashr exact i32 %shl, 24
285 define i64 @sextb_i64(i64 %a) nounwind {
286 ; RV32I-LABEL: sextb_i64:
288 ; RV32I-NEXT: slli a1, a0, 24
289 ; RV32I-NEXT: srai a0, a1, 24
290 ; RV32I-NEXT: srai a1, a1, 31
293 ; RV32XTHEADBB-LABEL: sextb_i64:
294 ; RV32XTHEADBB: # %bb.0:
295 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
296 ; RV32XTHEADBB-NEXT: srai a1, a0, 31
297 ; RV32XTHEADBB-NEXT: ret
298 %shl = shl i64 %a, 56
299 %shr = ashr exact i64 %shl, 56
303 define i32 @sexth_i32(i32 %a) nounwind {
304 ; RV32I-LABEL: sexth_i32:
306 ; RV32I-NEXT: slli a0, a0, 16
307 ; RV32I-NEXT: srai a0, a0, 16
310 ; RV32XTHEADBB-LABEL: sexth_i32:
311 ; RV32XTHEADBB: # %bb.0:
312 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
313 ; RV32XTHEADBB-NEXT: ret
314 %shl = shl i32 %a, 16
315 %shr = ashr exact i32 %shl, 16
319 define i32 @no_sexth_i32(i32 %a) nounwind {
320 ; RV32I-LABEL: no_sexth_i32:
322 ; RV32I-NEXT: slli a0, a0, 17
323 ; RV32I-NEXT: srai a0, a0, 16
326 ; RV32XTHEADBB-LABEL: no_sexth_i32:
327 ; RV32XTHEADBB: # %bb.0:
328 ; RV32XTHEADBB-NEXT: slli a0, a0, 17
329 ; RV32XTHEADBB-NEXT: srai a0, a0, 16
330 ; RV32XTHEADBB-NEXT: ret
331 %shl = shl i32 %a, 17
332 %shr = ashr exact i32 %shl, 16
336 define i64 @sexth_i64(i64 %a) nounwind {
337 ; RV32I-LABEL: sexth_i64:
339 ; RV32I-NEXT: slli a1, a0, 16
340 ; RV32I-NEXT: srai a0, a1, 16
341 ; RV32I-NEXT: srai a1, a1, 31
344 ; RV32XTHEADBB-LABEL: sexth_i64:
345 ; RV32XTHEADBB: # %bb.0:
346 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
347 ; RV32XTHEADBB-NEXT: srai a1, a0, 31
348 ; RV32XTHEADBB-NEXT: ret
349 %shl = shl i64 %a, 48
350 %shr = ashr exact i64 %shl, 48
354 define i64 @no_sexth_i64(i64 %a) nounwind {
355 ; RV32I-LABEL: no_sexth_i64:
357 ; RV32I-NEXT: slli a1, a0, 17
358 ; RV32I-NEXT: srai a0, a1, 16
359 ; RV32I-NEXT: srai a1, a1, 31
362 ; RV32XTHEADBB-LABEL: no_sexth_i64:
363 ; RV32XTHEADBB: # %bb.0:
364 ; RV32XTHEADBB-NEXT: slli a1, a0, 17
365 ; RV32XTHEADBB-NEXT: srai a0, a1, 16
366 ; RV32XTHEADBB-NEXT: srai a1, a1, 31
367 ; RV32XTHEADBB-NEXT: ret
368 %shl = shl i64 %a, 49
369 %shr = ashr exact i64 %shl, 48
373 define i32 @zexth_i32(i32 %a) nounwind {
374 ; RV32I-LABEL: zexth_i32:
376 ; RV32I-NEXT: slli a0, a0, 16
377 ; RV32I-NEXT: srli a0, a0, 16
380 ; RV32XTHEADBB-LABEL: zexth_i32:
381 ; RV32XTHEADBB: # %bb.0:
382 ; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
383 ; RV32XTHEADBB-NEXT: ret
384 %and = and i32 %a, 65535
388 define i64 @zexth_i64(i64 %a) nounwind {
389 ; RV32I-LABEL: zexth_i64:
391 ; RV32I-NEXT: slli a0, a0, 16
392 ; RV32I-NEXT: srli a0, a0, 16
393 ; RV32I-NEXT: li a1, 0
396 ; RV32XTHEADBB-LABEL: zexth_i64:
397 ; RV32XTHEADBB: # %bb.0:
398 ; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
399 ; RV32XTHEADBB-NEXT: li a1, 0
400 ; RV32XTHEADBB-NEXT: ret
401 %and = and i64 %a, 65535
405 declare i32 @llvm.bswap.i32(i32)
407 define i32 @bswap_i32(i32 %a) nounwind {
408 ; RV32I-LABEL: bswap_i32:
410 ; RV32I-NEXT: srli a1, a0, 8
411 ; RV32I-NEXT: lui a2, 16
412 ; RV32I-NEXT: addi a2, a2, -256
413 ; RV32I-NEXT: and a1, a1, a2
414 ; RV32I-NEXT: srli a3, a0, 24
415 ; RV32I-NEXT: or a1, a1, a3
416 ; RV32I-NEXT: and a2, a0, a2
417 ; RV32I-NEXT: slli a2, a2, 8
418 ; RV32I-NEXT: slli a0, a0, 24
419 ; RV32I-NEXT: or a0, a0, a2
420 ; RV32I-NEXT: or a0, a0, a1
423 ; RV32XTHEADBB-LABEL: bswap_i32:
424 ; RV32XTHEADBB: # %bb.0:
425 ; RV32XTHEADBB-NEXT: th.rev a0, a0
426 ; RV32XTHEADBB-NEXT: ret
427 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
431 declare i64 @llvm.bswap.i64(i64)
433 define i64 @bswap_i64(i64 %a) {
434 ; RV32I-LABEL: bswap_i64:
436 ; RV32I-NEXT: srli a2, a1, 8
437 ; RV32I-NEXT: lui a3, 16
438 ; RV32I-NEXT: addi a3, a3, -256
439 ; RV32I-NEXT: and a2, a2, a3
440 ; RV32I-NEXT: srli a4, a1, 24
441 ; RV32I-NEXT: or a2, a2, a4
442 ; RV32I-NEXT: and a4, a1, a3
443 ; RV32I-NEXT: slli a4, a4, 8
444 ; RV32I-NEXT: slli a1, a1, 24
445 ; RV32I-NEXT: or a1, a1, a4
446 ; RV32I-NEXT: or a2, a1, a2
447 ; RV32I-NEXT: srli a1, a0, 8
448 ; RV32I-NEXT: and a1, a1, a3
449 ; RV32I-NEXT: srli a4, a0, 24
450 ; RV32I-NEXT: or a1, a1, a4
451 ; RV32I-NEXT: and a3, a0, a3
452 ; RV32I-NEXT: slli a3, a3, 8
453 ; RV32I-NEXT: slli a0, a0, 24
454 ; RV32I-NEXT: or a0, a0, a3
455 ; RV32I-NEXT: or a1, a0, a1
456 ; RV32I-NEXT: mv a0, a2
459 ; RV32XTHEADBB-LABEL: bswap_i64:
460 ; RV32XTHEADBB: # %bb.0:
461 ; RV32XTHEADBB-NEXT: th.rev a2, a1
462 ; RV32XTHEADBB-NEXT: th.rev a1, a0
463 ; RV32XTHEADBB-NEXT: mv a0, a2
464 ; RV32XTHEADBB-NEXT: ret
465 %1 = call i64 @llvm.bswap.i64(i64 %a)