1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefixes=RV32I
4 ; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBB
7 declare i32 @llvm.ctlz.i32(i32, i1)
9 define i32 @ctlz_i32(i32 %a) nounwind {
10 ; RV32I-LABEL: ctlz_i32:
12 ; RV32I-NEXT: beqz a0, .LBB0_2
13 ; RV32I-NEXT: # %bb.1: # %cond.false
14 ; RV32I-NEXT: srli a1, a0, 1
15 ; RV32I-NEXT: lui a2, 349525
16 ; RV32I-NEXT: or a0, a0, a1
17 ; RV32I-NEXT: addi a1, a2, 1365
18 ; RV32I-NEXT: srli a2, a0, 2
19 ; RV32I-NEXT: or a0, a0, a2
20 ; RV32I-NEXT: srli a2, a0, 4
21 ; RV32I-NEXT: or a0, a0, a2
22 ; RV32I-NEXT: srli a2, a0, 8
23 ; RV32I-NEXT: or a0, a0, a2
24 ; RV32I-NEXT: srli a2, a0, 16
25 ; RV32I-NEXT: or a0, a0, a2
26 ; RV32I-NEXT: not a0, a0
27 ; RV32I-NEXT: srli a2, a0, 1
28 ; RV32I-NEXT: and a1, a2, a1
29 ; RV32I-NEXT: lui a2, 209715
30 ; RV32I-NEXT: addi a2, a2, 819
31 ; RV32I-NEXT: sub a0, a0, a1
32 ; RV32I-NEXT: and a1, a0, a2
33 ; RV32I-NEXT: srli a0, a0, 2
34 ; RV32I-NEXT: and a0, a0, a2
35 ; RV32I-NEXT: lui a2, 61681
36 ; RV32I-NEXT: add a0, a1, a0
37 ; RV32I-NEXT: srli a1, a0, 4
38 ; RV32I-NEXT: add a0, a0, a1
39 ; RV32I-NEXT: addi a1, a2, -241
40 ; RV32I-NEXT: and a0, a0, a1
41 ; RV32I-NEXT: slli a1, a0, 8
42 ; RV32I-NEXT: add a0, a0, a1
43 ; RV32I-NEXT: slli a1, a0, 16
44 ; RV32I-NEXT: add a0, a0, a1
45 ; RV32I-NEXT: srli a0, a0, 24
47 ; RV32I-NEXT: .LBB0_2:
48 ; RV32I-NEXT: li a0, 32
51 ; RV32XTHEADBB-LABEL: ctlz_i32:
52 ; RV32XTHEADBB: # %bb.0:
53 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
54 ; RV32XTHEADBB-NEXT: ret
55 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
59 declare i64 @llvm.ctlz.i64(i64, i1)
61 define i64 @ctlz_i64(i64 %a) nounwind {
62 ; RV32I-LABEL: ctlz_i64:
64 ; RV32I-NEXT: lui a2, 349525
65 ; RV32I-NEXT: lui a3, 209715
66 ; RV32I-NEXT: lui a5, 61681
67 ; RV32I-NEXT: addi a4, a2, 1365
68 ; RV32I-NEXT: addi a3, a3, 819
69 ; RV32I-NEXT: addi a2, a5, -241
70 ; RV32I-NEXT: bnez a1, .LBB1_2
71 ; RV32I-NEXT: # %bb.1:
72 ; RV32I-NEXT: srli a1, a0, 1
73 ; RV32I-NEXT: or a0, a0, a1
74 ; RV32I-NEXT: srli a1, a0, 2
75 ; RV32I-NEXT: or a0, a0, a1
76 ; RV32I-NEXT: srli a1, a0, 4
77 ; RV32I-NEXT: or a0, a0, a1
78 ; RV32I-NEXT: srli a1, a0, 8
79 ; RV32I-NEXT: or a0, a0, a1
80 ; RV32I-NEXT: srli a1, a0, 16
81 ; RV32I-NEXT: or a0, a0, a1
82 ; RV32I-NEXT: not a0, a0
83 ; RV32I-NEXT: srli a1, a0, 1
84 ; RV32I-NEXT: and a1, a1, a4
85 ; RV32I-NEXT: sub a0, a0, a1
86 ; RV32I-NEXT: and a1, a0, a3
87 ; RV32I-NEXT: srli a0, a0, 2
88 ; RV32I-NEXT: and a0, a0, a3
89 ; RV32I-NEXT: add a0, a1, a0
90 ; RV32I-NEXT: srli a1, a0, 4
91 ; RV32I-NEXT: add a0, a0, a1
92 ; RV32I-NEXT: and a0, a0, a2
93 ; RV32I-NEXT: slli a1, a0, 8
94 ; RV32I-NEXT: add a0, a0, a1
95 ; RV32I-NEXT: slli a1, a0, 16
96 ; RV32I-NEXT: add a0, a0, a1
97 ; RV32I-NEXT: srli a0, a0, 24
98 ; RV32I-NEXT: addi a0, a0, 32
99 ; RV32I-NEXT: li a1, 0
101 ; RV32I-NEXT: .LBB1_2:
102 ; RV32I-NEXT: srli a0, a1, 1
103 ; RV32I-NEXT: or a0, a1, a0
104 ; RV32I-NEXT: srli a1, a0, 2
105 ; RV32I-NEXT: or a0, a0, a1
106 ; RV32I-NEXT: srli a1, a0, 4
107 ; RV32I-NEXT: or a0, a0, a1
108 ; RV32I-NEXT: srli a1, a0, 8
109 ; RV32I-NEXT: or a0, a0, a1
110 ; RV32I-NEXT: srli a1, a0, 16
111 ; RV32I-NEXT: or a0, a0, a1
112 ; RV32I-NEXT: not a0, a0
113 ; RV32I-NEXT: srli a1, a0, 1
114 ; RV32I-NEXT: and a1, a1, a4
115 ; RV32I-NEXT: sub a0, a0, a1
116 ; RV32I-NEXT: and a1, a0, a3
117 ; RV32I-NEXT: srli a0, a0, 2
118 ; RV32I-NEXT: and a0, a0, a3
119 ; RV32I-NEXT: add a0, a1, a0
120 ; RV32I-NEXT: srli a1, a0, 4
121 ; RV32I-NEXT: add a0, a0, a1
122 ; RV32I-NEXT: and a0, a0, a2
123 ; RV32I-NEXT: slli a1, a0, 8
124 ; RV32I-NEXT: add a0, a0, a1
125 ; RV32I-NEXT: slli a1, a0, 16
126 ; RV32I-NEXT: add a0, a0, a1
127 ; RV32I-NEXT: srli a0, a0, 24
128 ; RV32I-NEXT: li a1, 0
131 ; RV32XTHEADBB-LABEL: ctlz_i64:
132 ; RV32XTHEADBB: # %bb.0:
133 ; RV32XTHEADBB-NEXT: bnez a1, .LBB1_2
134 ; RV32XTHEADBB-NEXT: # %bb.1:
135 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
136 ; RV32XTHEADBB-NEXT: addi a0, a0, 32
137 ; RV32XTHEADBB-NEXT: li a1, 0
138 ; RV32XTHEADBB-NEXT: ret
139 ; RV32XTHEADBB-NEXT: .LBB1_2:
140 ; RV32XTHEADBB-NEXT: th.ff1 a0, a1
141 ; RV32XTHEADBB-NEXT: li a1, 0
142 ; RV32XTHEADBB-NEXT: ret
143 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
147 declare i32 @llvm.cttz.i32(i32, i1)
149 define i32 @cttz_i32(i32 %a) nounwind {
150 ; RV32I-LABEL: cttz_i32:
152 ; RV32I-NEXT: beqz a0, .LBB2_2
153 ; RV32I-NEXT: # %bb.1: # %cond.false
154 ; RV32I-NEXT: addi sp, sp, -16
155 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
156 ; RV32I-NEXT: neg a1, a0
157 ; RV32I-NEXT: and a0, a0, a1
158 ; RV32I-NEXT: lui a1, 30667
159 ; RV32I-NEXT: addi a1, a1, 1329
160 ; RV32I-NEXT: call __mulsi3
161 ; RV32I-NEXT: srli a0, a0, 27
162 ; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
163 ; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
164 ; RV32I-NEXT: add a0, a1, a0
165 ; RV32I-NEXT: lbu a0, 0(a0)
166 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
167 ; RV32I-NEXT: addi sp, sp, 16
169 ; RV32I-NEXT: .LBB2_2:
170 ; RV32I-NEXT: li a0, 32
173 ; RV32XTHEADBB-LABEL: cttz_i32:
174 ; RV32XTHEADBB: # %bb.0:
175 ; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2
176 ; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
177 ; RV32XTHEADBB-NEXT: addi a1, a0, -1
178 ; RV32XTHEADBB-NEXT: not a0, a0
179 ; RV32XTHEADBB-NEXT: and a0, a0, a1
180 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
181 ; RV32XTHEADBB-NEXT: li a1, 32
182 ; RV32XTHEADBB-NEXT: sub a0, a1, a0
183 ; RV32XTHEADBB-NEXT: ret
184 ; RV32XTHEADBB-NEXT: .LBB2_2:
185 ; RV32XTHEADBB-NEXT: li a0, 32
186 ; RV32XTHEADBB-NEXT: ret
187 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
191 declare i64 @llvm.cttz.i64(i64, i1)
193 define i64 @cttz_i64(i64 %a) nounwind {
194 ; RV32I-LABEL: cttz_i64:
196 ; RV32I-NEXT: addi sp, sp, -32
197 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
198 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
199 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
200 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
201 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
202 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
203 ; RV32I-NEXT: mv s2, a1
204 ; RV32I-NEXT: mv s0, a0
205 ; RV32I-NEXT: neg a0, a0
206 ; RV32I-NEXT: and a0, s0, a0
207 ; RV32I-NEXT: lui a1, 30667
208 ; RV32I-NEXT: addi s3, a1, 1329
209 ; RV32I-NEXT: mv a1, s3
210 ; RV32I-NEXT: call __mulsi3
211 ; RV32I-NEXT: mv s1, a0
212 ; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
213 ; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
214 ; RV32I-NEXT: neg a0, s2
215 ; RV32I-NEXT: and a0, s2, a0
216 ; RV32I-NEXT: mv a1, s3
217 ; RV32I-NEXT: call __mulsi3
218 ; RV32I-NEXT: bnez s2, .LBB3_3
219 ; RV32I-NEXT: # %bb.1:
220 ; RV32I-NEXT: li a0, 32
221 ; RV32I-NEXT: beqz s0, .LBB3_4
222 ; RV32I-NEXT: .LBB3_2:
223 ; RV32I-NEXT: srli s1, s1, 27
224 ; RV32I-NEXT: add s1, s4, s1
225 ; RV32I-NEXT: lbu a0, 0(s1)
226 ; RV32I-NEXT: j .LBB3_5
227 ; RV32I-NEXT: .LBB3_3:
228 ; RV32I-NEXT: srli a0, a0, 27
229 ; RV32I-NEXT: add a0, s4, a0
230 ; RV32I-NEXT: lbu a0, 0(a0)
231 ; RV32I-NEXT: bnez s0, .LBB3_2
232 ; RV32I-NEXT: .LBB3_4:
233 ; RV32I-NEXT: addi a0, a0, 32
234 ; RV32I-NEXT: .LBB3_5:
235 ; RV32I-NEXT: li a1, 0
236 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
237 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
238 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
239 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
240 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
241 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
242 ; RV32I-NEXT: addi sp, sp, 32
245 ; RV32XTHEADBB-LABEL: cttz_i64:
246 ; RV32XTHEADBB: # %bb.0:
247 ; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2
248 ; RV32XTHEADBB-NEXT: # %bb.1:
249 ; RV32XTHEADBB-NEXT: addi a0, a1, -1
250 ; RV32XTHEADBB-NEXT: not a1, a1
251 ; RV32XTHEADBB-NEXT: and a0, a1, a0
252 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
253 ; RV32XTHEADBB-NEXT: li a1, 64
254 ; RV32XTHEADBB-NEXT: j .LBB3_3
255 ; RV32XTHEADBB-NEXT: .LBB3_2:
256 ; RV32XTHEADBB-NEXT: addi a1, a0, -1
257 ; RV32XTHEADBB-NEXT: not a0, a0
258 ; RV32XTHEADBB-NEXT: and a0, a0, a1
259 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0
260 ; RV32XTHEADBB-NEXT: li a1, 32
261 ; RV32XTHEADBB-NEXT: .LBB3_3:
262 ; RV32XTHEADBB-NEXT: sub a0, a1, a0
263 ; RV32XTHEADBB-NEXT: li a1, 0
264 ; RV32XTHEADBB-NEXT: ret
265 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
269 define i32 @sexti1_i32(i32 %a) nounwind {
270 ; RV32I-LABEL: sexti1_i32:
272 ; RV32I-NEXT: slli a0, a0, 31
273 ; RV32I-NEXT: srai a0, a0, 31
276 ; RV32XTHEADBB-LABEL: sexti1_i32:
277 ; RV32XTHEADBB: # %bb.0:
278 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 0, 0
279 ; RV32XTHEADBB-NEXT: ret
280 %shl = shl i32 %a, 31
281 %shr = ashr exact i32 %shl, 31
285 define i32 @sexti1_i32_2(i1 %a) nounwind {
286 ; RV32I-LABEL: sexti1_i32_2:
288 ; RV32I-NEXT: slli a0, a0, 31
289 ; RV32I-NEXT: srai a0, a0, 31
292 ; RV32XTHEADBB-LABEL: sexti1_i32_2:
293 ; RV32XTHEADBB: # %bb.0:
294 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 0, 0
295 ; RV32XTHEADBB-NEXT: ret
296 %sext = sext i1 %a to i32
300 define i32 @sextb_i32(i32 %a) nounwind {
301 ; RV32I-LABEL: sextb_i32:
303 ; RV32I-NEXT: slli a0, a0, 24
304 ; RV32I-NEXT: srai a0, a0, 24
307 ; RV32XTHEADBB-LABEL: sextb_i32:
308 ; RV32XTHEADBB: # %bb.0:
309 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
310 ; RV32XTHEADBB-NEXT: ret
311 %shl = shl i32 %a, 24
312 %shr = ashr exact i32 %shl, 24
316 define i64 @sextb_i64(i64 %a) nounwind {
317 ; RV32I-LABEL: sextb_i64:
319 ; RV32I-NEXT: slli a1, a0, 24
320 ; RV32I-NEXT: srai a0, a1, 24
321 ; RV32I-NEXT: srai a1, a1, 31
324 ; RV32XTHEADBB-LABEL: sextb_i64:
325 ; RV32XTHEADBB: # %bb.0:
326 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
327 ; RV32XTHEADBB-NEXT: srai a1, a0, 31
328 ; RV32XTHEADBB-NEXT: ret
329 %shl = shl i64 %a, 56
330 %shr = ashr exact i64 %shl, 56
334 define i32 @sexth_i32(i32 %a) nounwind {
335 ; RV32I-LABEL: sexth_i32:
337 ; RV32I-NEXT: slli a0, a0, 16
338 ; RV32I-NEXT: srai a0, a0, 16
341 ; RV32XTHEADBB-LABEL: sexth_i32:
342 ; RV32XTHEADBB: # %bb.0:
343 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
344 ; RV32XTHEADBB-NEXT: ret
345 %shl = shl i32 %a, 16
346 %shr = ashr exact i32 %shl, 16
350 define i32 @no_sexth_i32(i32 %a) nounwind {
351 ; RV32I-LABEL: no_sexth_i32:
353 ; RV32I-NEXT: slli a0, a0, 17
354 ; RV32I-NEXT: srai a0, a0, 16
357 ; RV32XTHEADBB-LABEL: no_sexth_i32:
358 ; RV32XTHEADBB: # %bb.0:
359 ; RV32XTHEADBB-NEXT: slli a0, a0, 17
360 ; RV32XTHEADBB-NEXT: srai a0, a0, 16
361 ; RV32XTHEADBB-NEXT: ret
362 %shl = shl i32 %a, 17
363 %shr = ashr exact i32 %shl, 16
367 define i64 @sexth_i64(i64 %a) nounwind {
368 ; RV32I-LABEL: sexth_i64:
370 ; RV32I-NEXT: slli a1, a0, 16
371 ; RV32I-NEXT: srai a0, a1, 16
372 ; RV32I-NEXT: srai a1, a1, 31
375 ; RV32XTHEADBB-LABEL: sexth_i64:
376 ; RV32XTHEADBB: # %bb.0:
377 ; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
378 ; RV32XTHEADBB-NEXT: srai a1, a0, 31
379 ; RV32XTHEADBB-NEXT: ret
380 %shl = shl i64 %a, 48
381 %shr = ashr exact i64 %shl, 48
385 define i64 @no_sexth_i64(i64 %a) nounwind {
386 ; RV32I-LABEL: no_sexth_i64:
388 ; RV32I-NEXT: slli a1, a0, 17
389 ; RV32I-NEXT: srai a0, a1, 16
390 ; RV32I-NEXT: srai a1, a1, 31
393 ; RV32XTHEADBB-LABEL: no_sexth_i64:
394 ; RV32XTHEADBB: # %bb.0:
395 ; RV32XTHEADBB-NEXT: slli a1, a0, 17
396 ; RV32XTHEADBB-NEXT: srai a0, a1, 16
397 ; RV32XTHEADBB-NEXT: srai a1, a1, 31
398 ; RV32XTHEADBB-NEXT: ret
399 %shl = shl i64 %a, 49
400 %shr = ashr exact i64 %shl, 48
404 define i32 @zexth_i32(i32 %a) nounwind {
405 ; RV32I-LABEL: zexth_i32:
407 ; RV32I-NEXT: slli a0, a0, 16
408 ; RV32I-NEXT: srli a0, a0, 16
411 ; RV32XTHEADBB-LABEL: zexth_i32:
412 ; RV32XTHEADBB: # %bb.0:
413 ; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
414 ; RV32XTHEADBB-NEXT: ret
415 %and = and i32 %a, 65535
419 define i64 @zexth_i64(i64 %a) nounwind {
420 ; RV32I-LABEL: zexth_i64:
422 ; RV32I-NEXT: slli a0, a0, 16
423 ; RV32I-NEXT: srli a0, a0, 16
424 ; RV32I-NEXT: li a1, 0
427 ; RV32XTHEADBB-LABEL: zexth_i64:
428 ; RV32XTHEADBB: # %bb.0:
429 ; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
430 ; RV32XTHEADBB-NEXT: li a1, 0
431 ; RV32XTHEADBB-NEXT: ret
432 %and = and i64 %a, 65535
436 declare i32 @llvm.bswap.i32(i32)
438 define i32 @bswap_i32(i32 %a) nounwind {
439 ; RV32I-LABEL: bswap_i32:
441 ; RV32I-NEXT: srli a1, a0, 8
442 ; RV32I-NEXT: lui a2, 16
443 ; RV32I-NEXT: srli a3, a0, 24
444 ; RV32I-NEXT: addi a2, a2, -256
445 ; RV32I-NEXT: and a1, a1, a2
446 ; RV32I-NEXT: and a2, a0, a2
447 ; RV32I-NEXT: or a1, a1, a3
448 ; RV32I-NEXT: slli a2, a2, 8
449 ; RV32I-NEXT: slli a0, a0, 24
450 ; RV32I-NEXT: or a0, a0, a2
451 ; RV32I-NEXT: or a0, a0, a1
454 ; RV32XTHEADBB-LABEL: bswap_i32:
455 ; RV32XTHEADBB: # %bb.0:
456 ; RV32XTHEADBB-NEXT: th.rev a0, a0
457 ; RV32XTHEADBB-NEXT: ret
458 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
462 declare i64 @llvm.bswap.i64(i64)
464 define i64 @bswap_i64(i64 %a) {
465 ; RV32I-LABEL: bswap_i64:
467 ; RV32I-NEXT: srli a2, a1, 8
468 ; RV32I-NEXT: lui a3, 16
469 ; RV32I-NEXT: srli a4, a1, 24
470 ; RV32I-NEXT: srli a5, a0, 8
471 ; RV32I-NEXT: addi a3, a3, -256
472 ; RV32I-NEXT: and a2, a2, a3
473 ; RV32I-NEXT: or a2, a2, a4
474 ; RV32I-NEXT: srli a4, a0, 24
475 ; RV32I-NEXT: and a5, a5, a3
476 ; RV32I-NEXT: or a4, a5, a4
477 ; RV32I-NEXT: slli a5, a1, 24
478 ; RV32I-NEXT: and a1, a1, a3
479 ; RV32I-NEXT: slli a1, a1, 8
480 ; RV32I-NEXT: or a1, a5, a1
481 ; RV32I-NEXT: and a3, a0, a3
482 ; RV32I-NEXT: slli a0, a0, 24
483 ; RV32I-NEXT: slli a3, a3, 8
484 ; RV32I-NEXT: or a3, a0, a3
485 ; RV32I-NEXT: or a0, a1, a2
486 ; RV32I-NEXT: or a1, a3, a4
489 ; RV32XTHEADBB-LABEL: bswap_i64:
490 ; RV32XTHEADBB: # %bb.0:
491 ; RV32XTHEADBB-NEXT: th.rev a2, a1
492 ; RV32XTHEADBB-NEXT: th.rev a1, a0
493 ; RV32XTHEADBB-NEXT: mv a0, a2
494 ; RV32XTHEADBB-NEXT: ret
495 %1 = call i64 @llvm.bswap.i64(i64 %a)