1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+m -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64I
4 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+m,+zba -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB
6 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \
7 ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB
9 define i64 @slliuw(i64 %a) nounwind {
10 ; RV64I-LABEL: slliuw:
12 ; RV64I-NEXT: li a1, 1
13 ; RV64I-NEXT: slli a1, a1, 33
14 ; RV64I-NEXT: addi a1, a1, -2
15 ; RV64I-NEXT: slli a0, a0, 1
16 ; RV64I-NEXT: and a0, a0, a1
19 ; RV64ZBA-LABEL: slliuw:
21 ; RV64ZBA-NEXT: slli a0, a0, 1
22 ; RV64ZBA-NEXT: srli a0, a0, 1
23 ; RV64ZBA-NEXT: slli.uw a0, a0, 1
25 %conv1 = shl i64 %a, 1
26 %shl = and i64 %conv1, 8589934590
30 define i128 @slliuw_2(i32 signext %0, ptr %1) {
31 ; RV64I-LABEL: slliuw_2:
33 ; RV64I-NEXT: slli a0, a0, 32
34 ; RV64I-NEXT: srli a0, a0, 28
35 ; RV64I-NEXT: add a1, a1, a0
36 ; RV64I-NEXT: ld a0, 0(a1)
37 ; RV64I-NEXT: ld a1, 8(a1)
40 ; RV64ZBA-LABEL: slliuw_2:
42 ; RV64ZBA-NEXT: slli.uw a0, a0, 4
43 ; RV64ZBA-NEXT: add a1, a1, a0
44 ; RV64ZBA-NEXT: ld a0, 0(a1)
45 ; RV64ZBA-NEXT: ld a1, 8(a1)
47 %3 = zext i32 %0 to i64
48 %4 = getelementptr inbounds i128, ptr %1, i64 %3
49 %5 = load i128, ptr %4
53 define i64 @adduw(i64 %a, i64 %b) nounwind {
56 ; RV64I-NEXT: slli a1, a1, 32
57 ; RV64I-NEXT: srli a1, a1, 32
58 ; RV64I-NEXT: add a0, a1, a0
61 ; RV64ZBA-LABEL: adduw:
63 ; RV64ZBA-NEXT: add.uw a0, a1, a0
65 %and = and i64 %b, 4294967295
66 %add = add i64 %and, %a
70 define signext i8 @adduw_2(i32 signext %0, ptr %1) {
71 ; RV64I-LABEL: adduw_2:
73 ; RV64I-NEXT: slli a0, a0, 32
74 ; RV64I-NEXT: srli a0, a0, 32
75 ; RV64I-NEXT: add a0, a1, a0
76 ; RV64I-NEXT: lb a0, 0(a0)
79 ; RV64ZBA-LABEL: adduw_2:
81 ; RV64ZBA-NEXT: add.uw a0, a0, a1
82 ; RV64ZBA-NEXT: lb a0, 0(a0)
84 %3 = zext i32 %0 to i64
85 %4 = getelementptr inbounds i8, ptr %1, i64 %3
90 define i64 @zextw_i64(i64 %a) nounwind {
91 ; RV64I-LABEL: zextw_i64:
93 ; RV64I-NEXT: slli a0, a0, 32
94 ; RV64I-NEXT: srli a0, a0, 32
97 ; RV64ZBA-LABEL: zextw_i64:
99 ; RV64ZBA-NEXT: zext.w a0, a0
101 %and = and i64 %a, 4294967295
105 ; This makes sure targetShrinkDemandedConstant changes the and immmediate to
106 ; allow zext.w or slli+srli.
107 define i64 @zextw_demandedbits_i64(i64 %0) {
108 ; CHECK-LABEL: zextw_demandedbits_i64:
110 ; CHECK-NEXT: srliw a0, a0, 1
111 ; CHECK-NEXT: slli a0, a0, 1
112 ; CHECK-NEXT: ori a0, a0, 1
114 %2 = and i64 %0, 4294967294
119 define signext i16 @sh1add(i64 %0, ptr %1) {
120 ; RV64I-LABEL: sh1add:
122 ; RV64I-NEXT: slli a0, a0, 1
123 ; RV64I-NEXT: add a0, a1, a0
124 ; RV64I-NEXT: lh a0, 0(a0)
127 ; RV64ZBA-LABEL: sh1add:
129 ; RV64ZBA-NEXT: sh1add a0, a0, a1
130 ; RV64ZBA-NEXT: lh a0, 0(a0)
132 %3 = getelementptr inbounds i16, ptr %1, i64 %0
133 %4 = load i16, ptr %3
137 define signext i32 @sh2add(i64 %0, ptr %1) {
138 ; RV64I-LABEL: sh2add:
140 ; RV64I-NEXT: slli a0, a0, 2
141 ; RV64I-NEXT: add a0, a1, a0
142 ; RV64I-NEXT: lw a0, 0(a0)
145 ; RV64ZBA-LABEL: sh2add:
147 ; RV64ZBA-NEXT: sh2add a0, a0, a1
148 ; RV64ZBA-NEXT: lw a0, 0(a0)
150 %3 = getelementptr inbounds i32, ptr %1, i64 %0
151 %4 = load i32, ptr %3
155 define i64 @sh3add(i64 %0, ptr %1) {
156 ; RV64I-LABEL: sh3add:
158 ; RV64I-NEXT: slli a0, a0, 3
159 ; RV64I-NEXT: add a0, a1, a0
160 ; RV64I-NEXT: ld a0, 0(a0)
163 ; RV64ZBA-LABEL: sh3add:
165 ; RV64ZBA-NEXT: sh3add a0, a0, a1
166 ; RV64ZBA-NEXT: ld a0, 0(a0)
168 %3 = getelementptr inbounds i64, ptr %1, i64 %0
169 %4 = load i64, ptr %3
173 define signext i16 @sh1adduw(i32 signext %0, ptr %1) {
174 ; RV64I-LABEL: sh1adduw:
176 ; RV64I-NEXT: slli a0, a0, 32
177 ; RV64I-NEXT: srli a0, a0, 31
178 ; RV64I-NEXT: add a0, a1, a0
179 ; RV64I-NEXT: lh a0, 0(a0)
182 ; RV64ZBA-LABEL: sh1adduw:
184 ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1
185 ; RV64ZBA-NEXT: lh a0, 0(a0)
187 %3 = zext i32 %0 to i64
188 %4 = getelementptr inbounds i16, ptr %1, i64 %3
189 %5 = load i16, ptr %4
193 define i64 @sh1adduw_2(i64 %0, i64 %1) {
194 ; RV64I-LABEL: sh1adduw_2:
196 ; RV64I-NEXT: li a2, 1
197 ; RV64I-NEXT: slli a2, a2, 33
198 ; RV64I-NEXT: addi a2, a2, -2
199 ; RV64I-NEXT: slli a0, a0, 1
200 ; RV64I-NEXT: and a0, a0, a2
201 ; RV64I-NEXT: add a0, a0, a1
204 ; RV64ZBA-LABEL: sh1adduw_2:
206 ; RV64ZBA-NEXT: slli a0, a0, 1
207 ; RV64ZBA-NEXT: srli a0, a0, 1
208 ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1
211 %4 = and i64 %3, 8589934590
216 define i64 @sh1adduw_3(i64 %0, i64 %1) {
217 ; RV64I-LABEL: sh1adduw_3:
219 ; RV64I-NEXT: li a2, 1
220 ; RV64I-NEXT: slli a2, a2, 33
221 ; RV64I-NEXT: addi a2, a2, -2
222 ; RV64I-NEXT: slli a0, a0, 1
223 ; RV64I-NEXT: and a0, a0, a2
224 ; RV64I-NEXT: or a0, a0, a1
227 ; RV64ZBA-LABEL: sh1adduw_3:
229 ; RV64ZBA-NEXT: slli a0, a0, 1
230 ; RV64ZBA-NEXT: srli a0, a0, 1
231 ; RV64ZBA-NEXT: slli.uw a0, a0, 1
232 ; RV64ZBA-NEXT: or a0, a0, a1
235 %4 = and i64 %3, 8589934590
236 %5 = or disjoint i64 %4, %1
240 define signext i32 @sh2adduw(i32 signext %0, ptr %1) {
241 ; RV64I-LABEL: sh2adduw:
243 ; RV64I-NEXT: slli a0, a0, 32
244 ; RV64I-NEXT: srli a0, a0, 30
245 ; RV64I-NEXT: add a0, a1, a0
246 ; RV64I-NEXT: lw a0, 0(a0)
249 ; RV64ZBA-LABEL: sh2adduw:
251 ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1
252 ; RV64ZBA-NEXT: lw a0, 0(a0)
254 %3 = zext i32 %0 to i64
255 %4 = getelementptr inbounds i32, ptr %1, i64 %3
256 %5 = load i32, ptr %4
260 define i64 @sh2adduw_2(i64 %0, i64 %1) {
261 ; RV64I-LABEL: sh2adduw_2:
263 ; RV64I-NEXT: li a2, 1
264 ; RV64I-NEXT: slli a2, a2, 34
265 ; RV64I-NEXT: addi a2, a2, -4
266 ; RV64I-NEXT: slli a0, a0, 2
267 ; RV64I-NEXT: and a0, a0, a2
268 ; RV64I-NEXT: add a0, a0, a1
271 ; RV64ZBA-LABEL: sh2adduw_2:
273 ; RV64ZBA-NEXT: slli a0, a0, 2
274 ; RV64ZBA-NEXT: srli a0, a0, 2
275 ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1
278 %4 = and i64 %3, 17179869180
283 define i64 @sh2adduw_3(i64 %0, i64 %1) {
284 ; RV64I-LABEL: sh2adduw_3:
286 ; RV64I-NEXT: li a2, 1
287 ; RV64I-NEXT: slli a2, a2, 34
288 ; RV64I-NEXT: addi a2, a2, -4
289 ; RV64I-NEXT: slli a0, a0, 2
290 ; RV64I-NEXT: and a0, a0, a2
291 ; RV64I-NEXT: or a0, a0, a1
294 ; RV64ZBA-LABEL: sh2adduw_3:
296 ; RV64ZBA-NEXT: slli a0, a0, 2
297 ; RV64ZBA-NEXT: srli a0, a0, 2
298 ; RV64ZBA-NEXT: slli.uw a0, a0, 2
299 ; RV64ZBA-NEXT: or a0, a0, a1
302 %4 = and i64 %3, 17179869180
303 %5 = or disjoint i64 %4, %1
307 define i64 @sh3adduw(i32 signext %0, ptr %1) {
308 ; RV64I-LABEL: sh3adduw:
310 ; RV64I-NEXT: slli a0, a0, 32
311 ; RV64I-NEXT: srli a0, a0, 29
312 ; RV64I-NEXT: add a0, a1, a0
313 ; RV64I-NEXT: ld a0, 0(a0)
316 ; RV64ZBA-LABEL: sh3adduw:
318 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1
319 ; RV64ZBA-NEXT: ld a0, 0(a0)
321 %3 = zext i32 %0 to i64
322 %4 = getelementptr inbounds i64, ptr %1, i64 %3
323 %5 = load i64, ptr %4
327 define i64 @sh3adduw_2(i64 %0, i64 %1) {
328 ; RV64I-LABEL: sh3adduw_2:
330 ; RV64I-NEXT: li a2, 1
331 ; RV64I-NEXT: slli a2, a2, 35
332 ; RV64I-NEXT: addi a2, a2, -8
333 ; RV64I-NEXT: slli a0, a0, 3
334 ; RV64I-NEXT: and a0, a0, a2
335 ; RV64I-NEXT: add a0, a0, a1
338 ; RV64ZBA-LABEL: sh3adduw_2:
340 ; RV64ZBA-NEXT: slli a0, a0, 3
341 ; RV64ZBA-NEXT: srli a0, a0, 3
342 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1
345 %4 = and i64 %3, 34359738360
350 define i64 @sh3adduw_3(i64 %0, i64 %1) {
351 ; RV64I-LABEL: sh3adduw_3:
353 ; RV64I-NEXT: li a2, 1
354 ; RV64I-NEXT: slli a2, a2, 35
355 ; RV64I-NEXT: addi a2, a2, -8
356 ; RV64I-NEXT: slli a0, a0, 3
357 ; RV64I-NEXT: and a0, a0, a2
358 ; RV64I-NEXT: or a0, a0, a1
361 ; RV64ZBA-LABEL: sh3adduw_3:
363 ; RV64ZBA-NEXT: slli a0, a0, 3
364 ; RV64ZBA-NEXT: srli a0, a0, 3
365 ; RV64ZBA-NEXT: slli.uw a0, a0, 3
366 ; RV64ZBA-NEXT: or a0, a0, a1
369 %4 = and i64 %3, 34359738360
370 %5 = or disjoint i64 %4, %1
374 ; Make sure we use sext.h+slli+srli for Zba+Zbb.
375 ; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
376 define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
377 ; RV64I-LABEL: sext_ashr_zext_i8:
379 ; RV64I-NEXT: slli a0, a0, 56
380 ; RV64I-NEXT: srai a0, a0, 63
381 ; RV64I-NEXT: slli a0, a0, 32
382 ; RV64I-NEXT: srli a0, a0, 32
385 ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
386 ; RV64ZBANOZBB: # %bb.0:
387 ; RV64ZBANOZBB-NEXT: slli a0, a0, 56
388 ; RV64ZBANOZBB-NEXT: srai a0, a0, 63
389 ; RV64ZBANOZBB-NEXT: zext.w a0, a0
390 ; RV64ZBANOZBB-NEXT: ret
392 ; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
393 ; RV64ZBAZBB: # %bb.0:
394 ; RV64ZBAZBB-NEXT: sext.b a0, a0
395 ; RV64ZBAZBB-NEXT: srai a0, a0, 9
396 ; RV64ZBAZBB-NEXT: zext.w a0, a0
397 ; RV64ZBAZBB-NEXT: ret
398 %ext = sext i8 %a to i32
399 %1 = ashr i32 %ext, 9
403 define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
404 ; RV64I-LABEL: sh6_sh3_add1:
405 ; RV64I: # %bb.0: # %entry
406 ; RV64I-NEXT: slli a2, a2, 3
407 ; RV64I-NEXT: slli a1, a1, 6
408 ; RV64I-NEXT: add a1, a1, a2
409 ; RV64I-NEXT: add a0, a1, a0
412 ; RV64ZBA-LABEL: sh6_sh3_add1:
413 ; RV64ZBA: # %bb.0: # %entry
414 ; RV64ZBA-NEXT: slli a1, a1, 6
415 ; RV64ZBA-NEXT: sh3add a1, a2, a1
416 ; RV64ZBA-NEXT: add a0, a1, a0
420 %shl1 = shl i64 %y, 6
421 %add = add nsw i64 %shl1, %shl
422 %add2 = add nsw i64 %add, %x
426 define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
427 ; RV64I-LABEL: sh6_sh3_add2:
428 ; RV64I: # %bb.0: # %entry
429 ; RV64I-NEXT: slli a2, a2, 3
430 ; RV64I-NEXT: slli a1, a1, 6
431 ; RV64I-NEXT: add a0, a1, a0
432 ; RV64I-NEXT: add a0, a0, a2
435 ; RV64ZBA-LABEL: sh6_sh3_add2:
436 ; RV64ZBA: # %bb.0: # %entry
437 ; RV64ZBA-NEXT: sh3add a1, a1, a2
438 ; RV64ZBA-NEXT: sh3add a0, a1, a0
442 %shl1 = shl i64 %y, 6
443 %add = add nsw i64 %shl1, %x
444 %add2 = add nsw i64 %add, %shl
448 define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
449 ; RV64I-LABEL: sh6_sh3_add3:
450 ; RV64I: # %bb.0: # %entry
451 ; RV64I-NEXT: slli a2, a2, 3
452 ; RV64I-NEXT: slli a1, a1, 6
453 ; RV64I-NEXT: add a1, a1, a2
454 ; RV64I-NEXT: add a0, a0, a1
457 ; RV64ZBA-LABEL: sh6_sh3_add3:
458 ; RV64ZBA: # %bb.0: # %entry
459 ; RV64ZBA-NEXT: slli a1, a1, 6
460 ; RV64ZBA-NEXT: sh3add a1, a2, a1
461 ; RV64ZBA-NEXT: add a0, a0, a1
465 %shl1 = shl i64 %y, 6
466 %add = add nsw i64 %shl1, %shl
467 %add2 = add nsw i64 %x, %add
471 define i64 @sh6_sh3_add4(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
472 ; RV64I-LABEL: sh6_sh3_add4:
473 ; RV64I: # %bb.0: # %entry
474 ; RV64I-NEXT: slli a2, a2, 3
475 ; RV64I-NEXT: slli a1, a1, 6
476 ; RV64I-NEXT: add a0, a0, a2
477 ; RV64I-NEXT: add a0, a0, a1
480 ; RV64ZBA-LABEL: sh6_sh3_add4:
481 ; RV64ZBA: # %bb.0: # %entry
482 ; RV64ZBA-NEXT: slli a1, a1, 6
483 ; RV64ZBA-NEXT: sh3add a0, a2, a0
484 ; RV64ZBA-NEXT: add a0, a0, a1
488 %shl1 = shl i64 %y, 6
489 %add = add nsw i64 %x, %shl
490 %add2 = add nsw i64 %add, %shl1
494 ; Make sure we use sext.h+slli+srli for Zba+Zbb.
495 ; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
496 define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
497 ; RV64I-LABEL: sext_ashr_zext_i16:
499 ; RV64I-NEXT: slli a0, a0, 48
500 ; RV64I-NEXT: srai a0, a0, 57
501 ; RV64I-NEXT: slli a0, a0, 32
502 ; RV64I-NEXT: srli a0, a0, 32
505 ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
506 ; RV64ZBANOZBB: # %bb.0:
507 ; RV64ZBANOZBB-NEXT: slli a0, a0, 48
508 ; RV64ZBANOZBB-NEXT: srai a0, a0, 57
509 ; RV64ZBANOZBB-NEXT: zext.w a0, a0
510 ; RV64ZBANOZBB-NEXT: ret
512 ; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
513 ; RV64ZBAZBB: # %bb.0:
514 ; RV64ZBAZBB-NEXT: sext.h a0, a0
515 ; RV64ZBAZBB-NEXT: srai a0, a0, 9
516 ; RV64ZBAZBB-NEXT: zext.w a0, a0
517 ; RV64ZBAZBB-NEXT: ret
518 %ext = sext i16 %a to i32
519 %1 = ashr i32 %ext, 9
523 ; This the IR you get from InstCombine if take the difference of 2 pointers and
524 ; cast is to unsigned before using as an index.
525 define signext i16 @sh1adduw_ptrdiff(i64 %diff, ptr %baseptr) {
526 ; RV64I-LABEL: sh1adduw_ptrdiff:
528 ; RV64I-NEXT: srli a0, a0, 1
529 ; RV64I-NEXT: slli a0, a0, 32
530 ; RV64I-NEXT: srli a0, a0, 31
531 ; RV64I-NEXT: add a0, a1, a0
532 ; RV64I-NEXT: lh a0, 0(a0)
535 ; RV64ZBA-LABEL: sh1adduw_ptrdiff:
537 ; RV64ZBA-NEXT: srli a0, a0, 1
538 ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1
539 ; RV64ZBA-NEXT: lh a0, 0(a0)
541 %ptrdiff = lshr exact i64 %diff, 1
542 %cast = and i64 %ptrdiff, 4294967295
543 %ptr = getelementptr inbounds i16, ptr %baseptr, i64 %cast
544 %res = load i16, ptr %ptr
548 define signext i32 @sh2adduw_ptrdiff(i64 %diff, ptr %baseptr) {
549 ; RV64I-LABEL: sh2adduw_ptrdiff:
551 ; RV64I-NEXT: srli a0, a0, 2
552 ; RV64I-NEXT: slli a0, a0, 32
553 ; RV64I-NEXT: srli a0, a0, 30
554 ; RV64I-NEXT: add a0, a1, a0
555 ; RV64I-NEXT: lw a0, 0(a0)
558 ; RV64ZBA-LABEL: sh2adduw_ptrdiff:
560 ; RV64ZBA-NEXT: srli a0, a0, 2
561 ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1
562 ; RV64ZBA-NEXT: lw a0, 0(a0)
564 %ptrdiff = lshr exact i64 %diff, 2
565 %cast = and i64 %ptrdiff, 4294967295
566 %ptr = getelementptr inbounds i32, ptr %baseptr, i64 %cast
567 %res = load i32, ptr %ptr
571 define i64 @sh3adduw_ptrdiff(i64 %diff, ptr %baseptr) {
572 ; RV64I-LABEL: sh3adduw_ptrdiff:
574 ; RV64I-NEXT: srli a0, a0, 3
575 ; RV64I-NEXT: slli a0, a0, 32
576 ; RV64I-NEXT: srli a0, a0, 29
577 ; RV64I-NEXT: add a0, a1, a0
578 ; RV64I-NEXT: ld a0, 0(a0)
581 ; RV64ZBA-LABEL: sh3adduw_ptrdiff:
583 ; RV64ZBA-NEXT: srli a0, a0, 3
584 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1
585 ; RV64ZBA-NEXT: ld a0, 0(a0)
587 %ptrdiff = lshr exact i64 %diff, 3
588 %cast = and i64 %ptrdiff, 4294967295
589 %ptr = getelementptr inbounds i64, ptr %baseptr, i64 %cast
590 %res = load i64, ptr %ptr
594 define signext i16 @srliw_1_sh1add(ptr %0, i32 signext %1) {
595 ; RV64I-LABEL: srliw_1_sh1add:
597 ; RV64I-NEXT: srliw a1, a1, 1
598 ; RV64I-NEXT: slli a1, a1, 1
599 ; RV64I-NEXT: add a0, a0, a1
600 ; RV64I-NEXT: lh a0, 0(a0)
603 ; RV64ZBA-LABEL: srliw_1_sh1add:
605 ; RV64ZBA-NEXT: srliw a1, a1, 1
606 ; RV64ZBA-NEXT: sh1add a0, a1, a0
607 ; RV64ZBA-NEXT: lh a0, 0(a0)
610 %4 = zext i32 %3 to i64
611 %5 = getelementptr inbounds i16, ptr %0, i64 %4
612 %6 = load i16, ptr %5, align 2
616 define i128 @slliuw_ptrdiff(i64 %diff, ptr %baseptr) {
617 ; RV64I-LABEL: slliuw_ptrdiff:
619 ; RV64I-NEXT: srli a0, a0, 4
620 ; RV64I-NEXT: slli a0, a0, 32
621 ; RV64I-NEXT: srli a0, a0, 28
622 ; RV64I-NEXT: add a1, a1, a0
623 ; RV64I-NEXT: ld a0, 0(a1)
624 ; RV64I-NEXT: ld a1, 8(a1)
627 ; RV64ZBA-LABEL: slliuw_ptrdiff:
629 ; RV64ZBA-NEXT: srli a0, a0, 4
630 ; RV64ZBA-NEXT: slli.uw a0, a0, 4
631 ; RV64ZBA-NEXT: add a1, a1, a0
632 ; RV64ZBA-NEXT: ld a0, 0(a1)
633 ; RV64ZBA-NEXT: ld a1, 8(a1)
635 %ptrdiff = lshr exact i64 %diff, 4
636 %cast = and i64 %ptrdiff, 4294967295
637 %ptr = getelementptr inbounds i128, ptr %baseptr, i64 %cast
638 %res = load i128, ptr %ptr
642 define signext i32 @srliw_2_sh2add(ptr %0, i32 signext %1) {
643 ; RV64I-LABEL: srliw_2_sh2add:
645 ; RV64I-NEXT: srliw a1, a1, 2
646 ; RV64I-NEXT: slli a1, a1, 2
647 ; RV64I-NEXT: add a0, a0, a1
648 ; RV64I-NEXT: lw a0, 0(a0)
651 ; RV64ZBA-LABEL: srliw_2_sh2add:
653 ; RV64ZBA-NEXT: srliw a1, a1, 2
654 ; RV64ZBA-NEXT: sh2add a0, a1, a0
655 ; RV64ZBA-NEXT: lw a0, 0(a0)
658 %4 = zext i32 %3 to i64
659 %5 = getelementptr inbounds i32, ptr %0, i64 %4
660 %6 = load i32, ptr %5, align 4
664 define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) {
665 ; RV64I-LABEL: srliw_3_sh3add:
667 ; RV64I-NEXT: srliw a1, a1, 3
668 ; RV64I-NEXT: slli a1, a1, 3
669 ; RV64I-NEXT: add a0, a0, a1
670 ; RV64I-NEXT: ld a0, 0(a0)
673 ; RV64ZBA-LABEL: srliw_3_sh3add:
675 ; RV64ZBA-NEXT: srliw a1, a1, 3
676 ; RV64ZBA-NEXT: sh3add a0, a1, a0
677 ; RV64ZBA-NEXT: ld a0, 0(a0)
680 %4 = zext i32 %3 to i64
681 %5 = getelementptr inbounds i64, ptr %0, i64 %4
682 %6 = load i64, ptr %5, align 8
686 define signext i32 @srliw_1_sh2add(ptr %0, i32 signext %1) {
687 ; RV64I-LABEL: srliw_1_sh2add:
689 ; RV64I-NEXT: srliw a1, a1, 1
690 ; RV64I-NEXT: slli a1, a1, 2
691 ; RV64I-NEXT: add a0, a0, a1
692 ; RV64I-NEXT: lw a0, 0(a0)
695 ; RV64ZBA-LABEL: srliw_1_sh2add:
697 ; RV64ZBA-NEXT: srliw a1, a1, 1
698 ; RV64ZBA-NEXT: sh2add a0, a1, a0
699 ; RV64ZBA-NEXT: lw a0, 0(a0)
702 %4 = zext i32 %3 to i64
703 %5 = getelementptr inbounds i32, ptr %0, i64 %4
704 %6 = load i32, ptr %5, align 4
708 define i64 @srliw_1_sh3add(ptr %0, i32 signext %1) {
709 ; RV64I-LABEL: srliw_1_sh3add:
711 ; RV64I-NEXT: srliw a1, a1, 1
712 ; RV64I-NEXT: slli a1, a1, 3
713 ; RV64I-NEXT: add a0, a0, a1
714 ; RV64I-NEXT: ld a0, 0(a0)
717 ; RV64ZBA-LABEL: srliw_1_sh3add:
719 ; RV64ZBA-NEXT: srliw a1, a1, 1
720 ; RV64ZBA-NEXT: sh3add a0, a1, a0
721 ; RV64ZBA-NEXT: ld a0, 0(a0)
724 %4 = zext i32 %3 to i64
725 %5 = getelementptr inbounds i64, ptr %0, i64 %4
726 %6 = load i64, ptr %5, align 8
730 define i64 @srliw_2_sh3add(ptr %0, i32 signext %1) {
731 ; RV64I-LABEL: srliw_2_sh3add:
733 ; RV64I-NEXT: srliw a1, a1, 2
734 ; RV64I-NEXT: slli a1, a1, 3
735 ; RV64I-NEXT: add a0, a0, a1
736 ; RV64I-NEXT: ld a0, 0(a0)
739 ; RV64ZBA-LABEL: srliw_2_sh3add:
741 ; RV64ZBA-NEXT: srliw a1, a1, 2
742 ; RV64ZBA-NEXT: sh3add a0, a1, a0
743 ; RV64ZBA-NEXT: ld a0, 0(a0)
746 %4 = zext i32 %3 to i64
747 %5 = getelementptr inbounds i64, ptr %0, i64 %4
748 %6 = load i64, ptr %5, align 8
752 define signext i16 @srliw_2_sh1add(ptr %0, i32 signext %1) {
753 ; RV64I-LABEL: srliw_2_sh1add:
755 ; RV64I-NEXT: srliw a1, a1, 2
756 ; RV64I-NEXT: slli a1, a1, 1
757 ; RV64I-NEXT: add a0, a0, a1
758 ; RV64I-NEXT: lh a0, 0(a0)
761 ; RV64ZBA-LABEL: srliw_2_sh1add:
763 ; RV64ZBA-NEXT: srliw a1, a1, 2
764 ; RV64ZBA-NEXT: sh1add a0, a1, a0
765 ; RV64ZBA-NEXT: lh a0, 0(a0)
768 %4 = zext i32 %3 to i64
769 %5 = getelementptr inbounds i16, ptr %0, i64 %4
770 %6 = load i16, ptr %5, align 2
775 define signext i32 @srliw_3_sh2add(ptr %0, i32 signext %1) {
776 ; RV64I-LABEL: srliw_3_sh2add:
778 ; RV64I-NEXT: srliw a1, a1, 3
779 ; RV64I-NEXT: slli a1, a1, 2
780 ; RV64I-NEXT: add a0, a0, a1
781 ; RV64I-NEXT: lw a0, 0(a0)
784 ; RV64ZBA-LABEL: srliw_3_sh2add:
786 ; RV64ZBA-NEXT: srliw a1, a1, 3
787 ; RV64ZBA-NEXT: sh2add a0, a1, a0
788 ; RV64ZBA-NEXT: lw a0, 0(a0)
791 %4 = zext i32 %3 to i64
792 %5 = getelementptr inbounds i32, ptr %0, i64 %4
793 %6 = load i32, ptr %5, align 4
797 define i64 @srliw_4_sh3add(ptr %0, i32 signext %1) {
798 ; RV64I-LABEL: srliw_4_sh3add:
800 ; RV64I-NEXT: srliw a1, a1, 4
801 ; RV64I-NEXT: slli a1, a1, 3
802 ; RV64I-NEXT: add a0, a0, a1
803 ; RV64I-NEXT: ld a0, 0(a0)
806 ; RV64ZBA-LABEL: srliw_4_sh3add:
808 ; RV64ZBA-NEXT: srliw a1, a1, 4
809 ; RV64ZBA-NEXT: sh3add a0, a1, a0
810 ; RV64ZBA-NEXT: ld a0, 0(a0)
813 %4 = zext i32 %3 to i64
814 %5 = getelementptr inbounds i64, ptr %0, i64 %4
815 %6 = load i64, ptr %5, align 8
819 define signext i32 @srli_1_sh2add(ptr %0, i64 %1) {
820 ; RV64I-LABEL: srli_1_sh2add:
822 ; RV64I-NEXT: srli a1, a1, 1
823 ; RV64I-NEXT: slli a1, a1, 2
824 ; RV64I-NEXT: add a0, a0, a1
825 ; RV64I-NEXT: lw a0, 0(a0)
828 ; RV64ZBA-LABEL: srli_1_sh2add:
830 ; RV64ZBA-NEXT: srli a1, a1, 1
831 ; RV64ZBA-NEXT: sh2add a0, a1, a0
832 ; RV64ZBA-NEXT: lw a0, 0(a0)
835 %4 = getelementptr inbounds i32, ptr %0, i64 %3
836 %5 = load i32, ptr %4, align 4
840 define i64 @srli_2_sh3add(ptr %0, i64 %1) {
841 ; RV64I-LABEL: srli_2_sh3add:
843 ; RV64I-NEXT: srli a1, a1, 2
844 ; RV64I-NEXT: slli a1, a1, 3
845 ; RV64I-NEXT: add a0, a0, a1
846 ; RV64I-NEXT: ld a0, 0(a0)
849 ; RV64ZBA-LABEL: srli_2_sh3add:
851 ; RV64ZBA-NEXT: srli a1, a1, 2
852 ; RV64ZBA-NEXT: sh3add a0, a1, a0
853 ; RV64ZBA-NEXT: ld a0, 0(a0)
856 %4 = getelementptr inbounds i64, ptr %0, i64 %3
857 %5 = load i64, ptr %4, align 8
861 define signext i16 @srli_2_sh1add(ptr %0, i64 %1) {
862 ; RV64I-LABEL: srli_2_sh1add:
864 ; RV64I-NEXT: srli a1, a1, 2
865 ; RV64I-NEXT: slli a1, a1, 1
866 ; RV64I-NEXT: add a0, a0, a1
867 ; RV64I-NEXT: lh a0, 0(a0)
870 ; RV64ZBA-LABEL: srli_2_sh1add:
872 ; RV64ZBA-NEXT: srli a1, a1, 2
873 ; RV64ZBA-NEXT: sh1add a0, a1, a0
874 ; RV64ZBA-NEXT: lh a0, 0(a0)
877 %4 = getelementptr inbounds i16, ptr %0, i64 %3
878 %5 = load i16, ptr %4, align 2
882 define signext i32 @srli_3_sh2add(ptr %0, i64 %1) {
883 ; RV64I-LABEL: srli_3_sh2add:
885 ; RV64I-NEXT: srli a1, a1, 3
886 ; RV64I-NEXT: slli a1, a1, 2
887 ; RV64I-NEXT: add a0, a0, a1
888 ; RV64I-NEXT: lw a0, 0(a0)
891 ; RV64ZBA-LABEL: srli_3_sh2add:
893 ; RV64ZBA-NEXT: srli a1, a1, 3
894 ; RV64ZBA-NEXT: sh2add a0, a1, a0
895 ; RV64ZBA-NEXT: lw a0, 0(a0)
898 %4 = getelementptr inbounds i32, ptr %0, i64 %3
899 %5 = load i32, ptr %4, align 4
903 define i64 @srli_4_sh3add(ptr %0, i64 %1) {
904 ; RV64I-LABEL: srli_4_sh3add:
906 ; RV64I-NEXT: srli a1, a1, 4
907 ; RV64I-NEXT: slli a1, a1, 3
908 ; RV64I-NEXT: add a0, a0, a1
909 ; RV64I-NEXT: ld a0, 0(a0)
912 ; RV64ZBA-LABEL: srli_4_sh3add:
914 ; RV64ZBA-NEXT: srli a1, a1, 4
915 ; RV64ZBA-NEXT: sh3add a0, a1, a0
916 ; RV64ZBA-NEXT: ld a0, 0(a0)
919 %4 = getelementptr inbounds i64, ptr %0, i64 %3
920 %5 = load i64, ptr %4, align 8
924 define signext i16 @shl_2_sh1adduw(ptr %0, i32 signext %1) {
925 ; RV64I-LABEL: shl_2_sh1adduw:
927 ; RV64I-NEXT: slli a1, a1, 2
928 ; RV64I-NEXT: slli a1, a1, 32
929 ; RV64I-NEXT: srli a1, a1, 31
930 ; RV64I-NEXT: add a0, a0, a1
931 ; RV64I-NEXT: lh a0, 0(a0)
934 ; RV64ZBA-LABEL: shl_2_sh1adduw:
936 ; RV64ZBA-NEXT: slli a1, a1, 2
937 ; RV64ZBA-NEXT: sh1add.uw a0, a1, a0
938 ; RV64ZBA-NEXT: lh a0, 0(a0)
941 %4 = zext i32 %3 to i64
942 %5 = getelementptr inbounds i16, ptr %0, i64 %4
943 %6 = load i16, ptr %5, align 2
947 define signext i32 @shl_16_sh2adduw(ptr %0, i32 signext %1) {
948 ; RV64I-LABEL: shl_16_sh2adduw:
950 ; RV64I-NEXT: slli a1, a1, 16
951 ; RV64I-NEXT: slli a1, a1, 32
952 ; RV64I-NEXT: srli a1, a1, 30
953 ; RV64I-NEXT: add a0, a0, a1
954 ; RV64I-NEXT: lw a0, 0(a0)
957 ; RV64ZBA-LABEL: shl_16_sh2adduw:
959 ; RV64ZBA-NEXT: slli a1, a1, 16
960 ; RV64ZBA-NEXT: sh2add.uw a0, a1, a0
961 ; RV64ZBA-NEXT: lw a0, 0(a0)
964 %4 = zext i32 %3 to i64
965 %5 = getelementptr inbounds i32, ptr %0, i64 %4
966 %6 = load i32, ptr %5, align 4
970 define i64 @shl_31_sh3adduw(ptr %0, i32 signext %1) {
971 ; RV64I-LABEL: shl_31_sh3adduw:
973 ; RV64I-NEXT: slli a1, a1, 31
974 ; RV64I-NEXT: slli a1, a1, 32
975 ; RV64I-NEXT: srli a1, a1, 29
976 ; RV64I-NEXT: add a0, a0, a1
977 ; RV64I-NEXT: ld a0, 0(a0)
980 ; RV64ZBA-LABEL: shl_31_sh3adduw:
982 ; RV64ZBA-NEXT: slli a1, a1, 31
983 ; RV64ZBA-NEXT: sh3add.uw a0, a1, a0
984 ; RV64ZBA-NEXT: ld a0, 0(a0)
987 %4 = zext i32 %3 to i64
988 %5 = getelementptr inbounds i64, ptr %0, i64 %4
989 %6 = load i64, ptr %5, align 8
993 define i64 @pack_i64(i64 %a, i64 %b) nounwind {
994 ; RV64I-LABEL: pack_i64:
996 ; RV64I-NEXT: slli a0, a0, 32
997 ; RV64I-NEXT: srli a0, a0, 32
998 ; RV64I-NEXT: slli a1, a1, 32
999 ; RV64I-NEXT: or a0, a1, a0
1002 ; RV64ZBA-LABEL: pack_i64:
1004 ; RV64ZBA-NEXT: zext.w a0, a0
1005 ; RV64ZBA-NEXT: slli a1, a1, 32
1006 ; RV64ZBA-NEXT: or a0, a1, a0
1008 %shl = and i64 %a, 4294967295
1009 %shl1 = shl i64 %b, 32
1010 %or = or i64 %shl1, %shl
1014 define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
1015 ; RV64I-LABEL: pack_i64_2:
1017 ; RV64I-NEXT: slli a0, a0, 32
1018 ; RV64I-NEXT: slli a1, a1, 32
1019 ; RV64I-NEXT: srli a0, a0, 32
1020 ; RV64I-NEXT: srli a1, a1, 32
1021 ; RV64I-NEXT: slli a1, a1, 32
1022 ; RV64I-NEXT: or a0, a1, a0
1025 ; RV64ZBA-LABEL: pack_i64_2:
1027 ; RV64ZBA-NEXT: zext.w a0, a0
1028 ; RV64ZBA-NEXT: zext.w a1, a1
1029 ; RV64ZBA-NEXT: slli a1, a1, 32
1030 ; RV64ZBA-NEXT: or a0, a1, a0
1032 %zexta = zext i32 %a to i64
1033 %zextb = zext i32 %b to i64
1034 %shl1 = shl i64 %zextb, 32
1035 %or = or i64 %shl1, %zexta
1039 define i64 @pack_i64_disjoint(i64 %a, i64 %b) nounwind {
1040 ; RV64I-LABEL: pack_i64_disjoint:
1042 ; RV64I-NEXT: slli a0, a0, 32
1043 ; RV64I-NEXT: srli a0, a0, 32
1044 ; RV64I-NEXT: or a0, a1, a0
1047 ; RV64ZBA-LABEL: pack_i64_disjoint:
1049 ; RV64ZBA-NEXT: zext.w a0, a0
1050 ; RV64ZBA-NEXT: or a0, a1, a0
1052 %shl = and i64 %a, 4294967295
1053 %or = or disjoint i64 %b, %shl
1057 define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind {
1058 ; RV64I-LABEL: pack_i64_disjoint_2:
1060 ; RV64I-NEXT: slli a0, a0, 32
1061 ; RV64I-NEXT: srli a0, a0, 32
1062 ; RV64I-NEXT: or a0, a1, a0
1065 ; RV64ZBA-LABEL: pack_i64_disjoint_2:
1067 ; RV64ZBA-NEXT: zext.w a0, a0
1068 ; RV64ZBA-NEXT: or a0, a1, a0
1070 %zexta = zext i32 %a to i64
1071 %or = or disjoint i64 %b, %zexta
1075 define i8 @array_index_sh1_sh0(ptr %p, i64 %idx1, i64 %idx2) {
1076 ; RV64I-LABEL: array_index_sh1_sh0:
1078 ; RV64I-NEXT: slli a1, a1, 1
1079 ; RV64I-NEXT: add a0, a0, a2
1080 ; RV64I-NEXT: add a0, a0, a1
1081 ; RV64I-NEXT: lbu a0, 0(a0)
1084 ; RV64ZBA-LABEL: array_index_sh1_sh0:
1086 ; RV64ZBA-NEXT: sh1add a0, a1, a0
1087 ; RV64ZBA-NEXT: add a0, a0, a2
1088 ; RV64ZBA-NEXT: lbu a0, 0(a0)
1090 %a = getelementptr inbounds [2 x i8], ptr %p, i64 %idx1, i64 %idx2
1091 %b = load i8, ptr %a, align 1
1095 define i16 @array_index_sh1_sh1(ptr %p, i64 %idx1, i64 %idx2) {
1096 ; RV64I-LABEL: array_index_sh1_sh1:
1098 ; RV64I-NEXT: slli a1, a1, 2
1099 ; RV64I-NEXT: add a0, a0, a1
1100 ; RV64I-NEXT: slli a2, a2, 1
1101 ; RV64I-NEXT: add a0, a0, a2
1102 ; RV64I-NEXT: lh a0, 0(a0)
1105 ; RV64ZBA-LABEL: array_index_sh1_sh1:
1107 ; RV64ZBA-NEXT: sh2add a0, a1, a0
1108 ; RV64ZBA-NEXT: sh1add a0, a2, a0
1109 ; RV64ZBA-NEXT: lh a0, 0(a0)
1111 %a = getelementptr inbounds [2 x i16], ptr %p, i64 %idx1, i64 %idx2
1112 %b = load i16, ptr %a, align 2
1116 define i32 @array_index_sh1_sh2(ptr %p, i64 %idx1, i64 %idx2) {
1117 ; RV64I-LABEL: array_index_sh1_sh2:
1119 ; RV64I-NEXT: slli a1, a1, 3
1120 ; RV64I-NEXT: add a0, a0, a1
1121 ; RV64I-NEXT: slli a2, a2, 2
1122 ; RV64I-NEXT: add a0, a0, a2
1123 ; RV64I-NEXT: lw a0, 0(a0)
1126 ; RV64ZBA-LABEL: array_index_sh1_sh2:
1128 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1129 ; RV64ZBA-NEXT: sh2add a0, a2, a0
1130 ; RV64ZBA-NEXT: lw a0, 0(a0)
1132 %a = getelementptr inbounds [2 x i32], ptr %p, i64 %idx1, i64 %idx2
1133 %b = load i32, ptr %a, align 4
1137 define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) {
1138 ; RV64I-LABEL: array_index_sh1_sh3:
1140 ; RV64I-NEXT: slli a1, a1, 4
1141 ; RV64I-NEXT: add a0, a0, a1
1142 ; RV64I-NEXT: slli a2, a2, 3
1143 ; RV64I-NEXT: add a0, a0, a2
1144 ; RV64I-NEXT: ld a0, 0(a0)
1147 ; RV64ZBA-LABEL: array_index_sh1_sh3:
1149 ; RV64ZBA-NEXT: sh1add a1, a1, a2
1150 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1151 ; RV64ZBA-NEXT: ld a0, 0(a0)
1153 %a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2
1154 %b = load i64, ptr %a, align 8
1158 define i8 @array_index_sh2_sh0(ptr %p, i64 %idx1, i64 %idx2) {
1159 ; RV64I-LABEL: array_index_sh2_sh0:
1161 ; RV64I-NEXT: slli a1, a1, 2
1162 ; RV64I-NEXT: add a0, a0, a2
1163 ; RV64I-NEXT: add a0, a0, a1
1164 ; RV64I-NEXT: lbu a0, 0(a0)
1167 ; RV64ZBA-LABEL: array_index_sh2_sh0:
1169 ; RV64ZBA-NEXT: sh2add a0, a1, a0
1170 ; RV64ZBA-NEXT: add a0, a0, a2
1171 ; RV64ZBA-NEXT: lbu a0, 0(a0)
1173 %a = getelementptr inbounds [4 x i8], ptr %p, i64 %idx1, i64 %idx2
1174 %b = load i8, ptr %a, align 1
1178 define i16 @array_index_sh2_sh1(ptr %p, i64 %idx1, i64 %idx2) {
1179 ; RV64I-LABEL: array_index_sh2_sh1:
1181 ; RV64I-NEXT: slli a1, a1, 3
1182 ; RV64I-NEXT: add a0, a0, a1
1183 ; RV64I-NEXT: slli a2, a2, 1
1184 ; RV64I-NEXT: add a0, a0, a2
1185 ; RV64I-NEXT: lh a0, 0(a0)
1188 ; RV64ZBA-LABEL: array_index_sh2_sh1:
1190 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1191 ; RV64ZBA-NEXT: sh1add a0, a2, a0
1192 ; RV64ZBA-NEXT: lh a0, 0(a0)
1194 %a = getelementptr inbounds [4 x i16], ptr %p, i64 %idx1, i64 %idx2
1195 %b = load i16, ptr %a, align 2
1199 define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) {
1200 ; RV64I-LABEL: array_index_sh2_sh2:
1202 ; RV64I-NEXT: slli a1, a1, 4
1203 ; RV64I-NEXT: add a0, a0, a1
1204 ; RV64I-NEXT: slli a2, a2, 2
1205 ; RV64I-NEXT: add a0, a0, a2
1206 ; RV64I-NEXT: lw a0, 0(a0)
1209 ; RV64ZBA-LABEL: array_index_sh2_sh2:
1211 ; RV64ZBA-NEXT: sh2add a1, a1, a2
1212 ; RV64ZBA-NEXT: sh2add a0, a1, a0
1213 ; RV64ZBA-NEXT: lw a0, 0(a0)
1215 %a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2
1216 %b = load i32, ptr %a, align 4
1220 define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) {
1221 ; RV64I-LABEL: array_index_sh2_sh3:
1223 ; RV64I-NEXT: slli a1, a1, 5
1224 ; RV64I-NEXT: add a0, a0, a1
1225 ; RV64I-NEXT: slli a2, a2, 3
1226 ; RV64I-NEXT: add a0, a0, a2
1227 ; RV64I-NEXT: ld a0, 0(a0)
1230 ; RV64ZBA-LABEL: array_index_sh2_sh3:
1232 ; RV64ZBA-NEXT: sh2add a1, a1, a2
1233 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1234 ; RV64ZBA-NEXT: ld a0, 0(a0)
1236 %a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2
1237 %b = load i64, ptr %a, align 8
1241 define i8 @array_index_sh3_sh0(ptr %p, i64 %idx1, i64 %idx2) {
1242 ; RV64I-LABEL: array_index_sh3_sh0:
1244 ; RV64I-NEXT: slli a1, a1, 3
1245 ; RV64I-NEXT: add a0, a0, a2
1246 ; RV64I-NEXT: add a0, a0, a1
1247 ; RV64I-NEXT: lbu a0, 0(a0)
1250 ; RV64ZBA-LABEL: array_index_sh3_sh0:
1252 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1253 ; RV64ZBA-NEXT: add a0, a0, a2
1254 ; RV64ZBA-NEXT: lbu a0, 0(a0)
1256 %a = getelementptr inbounds [8 x i8], ptr %p, i64 %idx1, i64 %idx2
1257 %b = load i8, ptr %a, align 1
1261 define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) {
1262 ; RV64I-LABEL: array_index_sh3_sh1:
1264 ; RV64I-NEXT: slli a1, a1, 4
1265 ; RV64I-NEXT: add a0, a0, a1
1266 ; RV64I-NEXT: slli a2, a2, 1
1267 ; RV64I-NEXT: add a0, a0, a2
1268 ; RV64I-NEXT: lh a0, 0(a0)
1271 ; RV64ZBA-LABEL: array_index_sh3_sh1:
1273 ; RV64ZBA-NEXT: sh3add a1, a1, a2
1274 ; RV64ZBA-NEXT: sh1add a0, a1, a0
1275 ; RV64ZBA-NEXT: lh a0, 0(a0)
1277 %a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2
1278 %b = load i16, ptr %a, align 2
1282 define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) {
1283 ; RV64I-LABEL: array_index_sh3_sh2:
1285 ; RV64I-NEXT: slli a1, a1, 5
1286 ; RV64I-NEXT: add a0, a0, a1
1287 ; RV64I-NEXT: slli a2, a2, 2
1288 ; RV64I-NEXT: add a0, a0, a2
1289 ; RV64I-NEXT: lw a0, 0(a0)
1292 ; RV64ZBA-LABEL: array_index_sh3_sh2:
1294 ; RV64ZBA-NEXT: sh3add a1, a1, a2
1295 ; RV64ZBA-NEXT: sh2add a0, a1, a0
1296 ; RV64ZBA-NEXT: lw a0, 0(a0)
1298 %a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2
1299 %b = load i32, ptr %a, align 4
1303 define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
1304 ; RV64I-LABEL: array_index_sh3_sh3:
1306 ; RV64I-NEXT: slli a1, a1, 6
1307 ; RV64I-NEXT: add a0, a0, a1
1308 ; RV64I-NEXT: slli a2, a2, 3
1309 ; RV64I-NEXT: add a0, a0, a2
1310 ; RV64I-NEXT: ld a0, 0(a0)
1313 ; RV64ZBA-LABEL: array_index_sh3_sh3:
1315 ; RV64ZBA-NEXT: sh3add a1, a1, a2
1316 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1317 ; RV64ZBA-NEXT: ld a0, 0(a0)
1319 %a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2
1320 %b = load i64, ptr %a, align 8
1324 ; Similar to above, but with a lshr on one of the indices. This requires
1325 ; special handling during isel to form a shift pair.
1326 define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
1327 ; RV64I-LABEL: array_index_lshr_sh3_sh3:
1329 ; RV64I-NEXT: srli a1, a1, 58
1330 ; RV64I-NEXT: slli a2, a2, 3
1331 ; RV64I-NEXT: slli a1, a1, 6
1332 ; RV64I-NEXT: add a0, a0, a2
1333 ; RV64I-NEXT: add a0, a0, a1
1334 ; RV64I-NEXT: ld a0, 0(a0)
1337 ; RV64ZBA-LABEL: array_index_lshr_sh3_sh3:
1339 ; RV64ZBA-NEXT: srli a1, a1, 58
1340 ; RV64ZBA-NEXT: sh3add a1, a1, a2
1341 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1342 ; RV64ZBA-NEXT: ld a0, 0(a0)
1344 %shr = lshr i64 %idx1, 58
1345 %a = getelementptr inbounds [8 x i64], ptr %p, i64 %shr, i64 %idx2
1346 %b = load i64, ptr %a, align 8
1350 define i8 @array_index_sh4_sh0(ptr %p, i64 %idx1, i64 %idx2) {
1351 ; CHECK-LABEL: array_index_sh4_sh0:
1353 ; CHECK-NEXT: slli a1, a1, 4
1354 ; CHECK-NEXT: add a0, a0, a2
1355 ; CHECK-NEXT: add a0, a0, a1
1356 ; CHECK-NEXT: lbu a0, 0(a0)
1358 %a = getelementptr inbounds [16 x i8], ptr %p, i64 %idx1, i64 %idx2
1359 %b = load i8, ptr %a, align 1
1363 define i16 @array_index_sh4_sh1(ptr %p, i64 %idx1, i64 %idx2) {
1364 ; RV64I-LABEL: array_index_sh4_sh1:
1366 ; RV64I-NEXT: slli a1, a1, 5
1367 ; RV64I-NEXT: add a0, a0, a1
1368 ; RV64I-NEXT: slli a2, a2, 1
1369 ; RV64I-NEXT: add a0, a0, a2
1370 ; RV64I-NEXT: lh a0, 0(a0)
1373 ; RV64ZBA-LABEL: array_index_sh4_sh1:
1375 ; RV64ZBA-NEXT: slli a1, a1, 5
1376 ; RV64ZBA-NEXT: add a0, a0, a1
1377 ; RV64ZBA-NEXT: sh1add a0, a2, a0
1378 ; RV64ZBA-NEXT: lh a0, 0(a0)
1380 %a = getelementptr inbounds [16 x i16], ptr %p, i64 %idx1, i64 %idx2
1381 %b = load i16, ptr %a, align 2
1385 define i32 @array_index_sh4_sh2(ptr %p, i64 %idx1, i64 %idx2) {
1386 ; RV64I-LABEL: array_index_sh4_sh2:
1388 ; RV64I-NEXT: slli a1, a1, 6
1389 ; RV64I-NEXT: add a0, a0, a1
1390 ; RV64I-NEXT: slli a2, a2, 2
1391 ; RV64I-NEXT: add a0, a0, a2
1392 ; RV64I-NEXT: lw a0, 0(a0)
1395 ; RV64ZBA-LABEL: array_index_sh4_sh2:
1397 ; RV64ZBA-NEXT: slli a1, a1, 6
1398 ; RV64ZBA-NEXT: add a0, a0, a1
1399 ; RV64ZBA-NEXT: sh2add a0, a2, a0
1400 ; RV64ZBA-NEXT: lw a0, 0(a0)
1402 %a = getelementptr inbounds [16 x i32], ptr %p, i64 %idx1, i64 %idx2
1403 %b = load i32, ptr %a, align 4
1407 define i64 @array_index_sh4_sh3(ptr %p, i64 %idx1, i64 %idx2) {
1408 ; RV64I-LABEL: array_index_sh4_sh3:
1410 ; RV64I-NEXT: slli a1, a1, 7
1411 ; RV64I-NEXT: add a0, a0, a1
1412 ; RV64I-NEXT: slli a2, a2, 3
1413 ; RV64I-NEXT: add a0, a0, a2
1414 ; RV64I-NEXT: ld a0, 0(a0)
1417 ; RV64ZBA-LABEL: array_index_sh4_sh3:
1419 ; RV64ZBA-NEXT: slli a1, a1, 7
1420 ; RV64ZBA-NEXT: add a0, a0, a1
1421 ; RV64ZBA-NEXT: sh3add a0, a2, a0
1422 ; RV64ZBA-NEXT: ld a0, 0(a0)
1424 %a = getelementptr inbounds [16 x i64], ptr %p, i64 %idx1, i64 %idx2
1425 %b = load i64, ptr %a, align 8
1429 define ptr @gep_lshr_i32(ptr %0, i64 %1) {
1430 ; RV64I-LABEL: gep_lshr_i32:
1431 ; RV64I: # %bb.0: # %entry
1432 ; RV64I-NEXT: srli a1, a1, 2
1433 ; RV64I-NEXT: slli a1, a1, 32
1434 ; RV64I-NEXT: srli a1, a1, 32
1435 ; RV64I-NEXT: li a2, 80
1436 ; RV64I-NEXT: mul a1, a1, a2
1437 ; RV64I-NEXT: add a0, a0, a1
1440 ; RV64ZBA-LABEL: gep_lshr_i32:
1441 ; RV64ZBA: # %bb.0: # %entry
1442 ; RV64ZBA-NEXT: srli a1, a1, 2
1443 ; RV64ZBA-NEXT: zext.w a1, a1
1444 ; RV64ZBA-NEXT: li a2, 80
1445 ; RV64ZBA-NEXT: mul a1, a1, a2
1446 ; RV64ZBA-NEXT: add a0, a0, a1
1449 %2 = lshr exact i64 %1, 2
1450 %3 = and i64 %2, 4294967295
1451 %5 = getelementptr [80 x i8], ptr %0, i64 %3
1455 define i64 @srli_slliuw(i64 %1) {
1456 ; RV64I-LABEL: srli_slliuw:
1457 ; RV64I: # %bb.0: # %entry
1458 ; RV64I-NEXT: srli a0, a0, 2
1459 ; RV64I-NEXT: slli a0, a0, 32
1460 ; RV64I-NEXT: srli a0, a0, 28
1463 ; RV64ZBA-LABEL: srli_slliuw:
1464 ; RV64ZBA: # %bb.0: # %entry
1465 ; RV64ZBA-NEXT: srli a0, a0, 2
1466 ; RV64ZBA-NEXT: slli.uw a0, a0, 4
1469 %2 = lshr exact i64 %1, 2
1470 %3 = and i64 %2, 4294967295
1475 define i64 @srli_slliuw_canonical(i64 %0) {
1476 ; RV64I-LABEL: srli_slliuw_canonical:
1477 ; RV64I: # %bb.0: # %entry
1478 ; RV64I-NEXT: li a1, 1
1479 ; RV64I-NEXT: slli a1, a1, 36
1480 ; RV64I-NEXT: addi a1, a1, -16
1481 ; RV64I-NEXT: slli a0, a0, 2
1482 ; RV64I-NEXT: and a0, a0, a1
1485 ; RV64ZBA-LABEL: srli_slliuw_canonical:
1486 ; RV64ZBA: # %bb.0: # %entry
1487 ; RV64ZBA-NEXT: slli a0, a0, 2
1488 ; RV64ZBA-NEXT: srli a0, a0, 4
1489 ; RV64ZBA-NEXT: slli.uw a0, a0, 4
1493 %2 = and i64 %1, 68719476720
1497 ; Make sure we don't accidentally use slli.uw with a shift of 32.
1498 define i64 @srli_slliuw_negative_test(i64 %0) {
1499 ; CHECK-LABEL: srli_slliuw_negative_test:
1500 ; CHECK: # %bb.0: # %entry
1501 ; CHECK-NEXT: srli a0, a0, 6
1502 ; CHECK-NEXT: slli a0, a0, 32
1510 define i64 @srli_slli_i16(i64 %1) {
1511 ; RV64I-LABEL: srli_slli_i16:
1512 ; RV64I: # %bb.0: # %entry
1513 ; RV64I-NEXT: srli a0, a0, 2
1514 ; RV64I-NEXT: slli a0, a0, 48
1515 ; RV64I-NEXT: srli a0, a0, 48
1516 ; RV64I-NEXT: slli a0, a0, 4
1519 ; RV64ZBANOZBB-LABEL: srli_slli_i16:
1520 ; RV64ZBANOZBB: # %bb.0: # %entry
1521 ; RV64ZBANOZBB-NEXT: srli a0, a0, 2
1522 ; RV64ZBANOZBB-NEXT: slli a0, a0, 48
1523 ; RV64ZBANOZBB-NEXT: srli a0, a0, 48
1524 ; RV64ZBANOZBB-NEXT: slli a0, a0, 4
1525 ; RV64ZBANOZBB-NEXT: ret
1527 ; RV64ZBAZBB-LABEL: srli_slli_i16:
1528 ; RV64ZBAZBB: # %bb.0: # %entry
1529 ; RV64ZBAZBB-NEXT: srli a0, a0, 2
1530 ; RV64ZBAZBB-NEXT: zext.h a0, a0
1531 ; RV64ZBAZBB-NEXT: slli a0, a0, 4
1532 ; RV64ZBAZBB-NEXT: ret
1534 %2 = lshr exact i64 %1, 2
1535 %3 = and i64 %2, 65535
1540 define i64 @srli_slliuw_2(i64 %1) {
1541 ; RV64I-LABEL: srli_slliuw_2:
1542 ; RV64I: # %bb.0: # %entry
1543 ; RV64I-NEXT: srli a0, a0, 18
1544 ; RV64I-NEXT: slli a0, a0, 32
1545 ; RV64I-NEXT: srli a0, a0, 29
1548 ; RV64ZBA-LABEL: srli_slliuw_2:
1549 ; RV64ZBA: # %bb.0: # %entry
1550 ; RV64ZBA-NEXT: srli a0, a0, 18
1551 ; RV64ZBA-NEXT: slli.uw a0, a0, 3
1554 %2 = lshr i64 %1, 18
1555 %3 = and i64 %2, 4294967295
1560 define i64 @srli_slliuw_canonical_2(i64 %0) {
1561 ; RV64I-LABEL: srli_slliuw_canonical_2:
1562 ; RV64I: # %bb.0: # %entry
1563 ; RV64I-NEXT: li a1, 1
1564 ; RV64I-NEXT: slli a1, a1, 35
1565 ; RV64I-NEXT: addi a1, a1, -8
1566 ; RV64I-NEXT: srli a0, a0, 15
1567 ; RV64I-NEXT: and a0, a0, a1
1570 ; RV64ZBA-LABEL: srli_slliuw_canonical_2:
1571 ; RV64ZBA: # %bb.0: # %entry
1572 ; RV64ZBA-NEXT: srli a0, a0, 15
1573 ; RV64ZBA-NEXT: srli a0, a0, 3
1574 ; RV64ZBA-NEXT: slli.uw a0, a0, 3
1577 %1 = lshr i64 %0, 15
1578 %2 = and i64 %1, 34359738360
1582 define ptr @srai_srli_sh3add(ptr %0, i64 %1) nounwind {
1583 ; RV64I-LABEL: srai_srli_sh3add:
1584 ; RV64I: # %bb.0: # %entry
1585 ; RV64I-NEXT: srai a1, a1, 32
1586 ; RV64I-NEXT: srli a1, a1, 6
1587 ; RV64I-NEXT: slli a1, a1, 3
1588 ; RV64I-NEXT: add a0, a0, a1
1591 ; RV64ZBA-LABEL: srai_srli_sh3add:
1592 ; RV64ZBA: # %bb.0: # %entry
1593 ; RV64ZBA-NEXT: srai a1, a1, 32
1594 ; RV64ZBA-NEXT: srli a1, a1, 6
1595 ; RV64ZBA-NEXT: sh3add a0, a1, a0
1598 %2 = ashr i64 %1, 32
1600 %4 = getelementptr i64, ptr %0, i64 %3
1604 define ptr @srai_srli_slli(ptr %0, i64 %1) nounwind {
1605 ; CHECK-LABEL: srai_srli_slli:
1606 ; CHECK: # %bb.0: # %entry
1607 ; CHECK-NEXT: srai a1, a1, 32
1608 ; CHECK-NEXT: srli a1, a1, 6
1609 ; CHECK-NEXT: slli a1, a1, 4
1610 ; CHECK-NEXT: add a0, a0, a1
1613 %2 = ashr i64 %1, 32
1615 %4 = getelementptr i128, ptr %0, i64 %3
1619 ; Negative to make sure the peephole added for srai_srli_slli and
1620 ; srai_srli_sh3add doesn't break this.
1621 define i64 @srai_andi(i64 %x) nounwind {
1622 ; CHECK-LABEL: srai_andi:
1623 ; CHECK: # %bb.0: # %entry
1624 ; CHECK-NEXT: srai a0, a0, 8
1625 ; CHECK-NEXT: andi a0, a0, -8
1633 ; Negative to make sure the peephole added for srai_srli_slli and
1634 ; srai_srli_sh3add doesn't break this.
1635 define i64 @srai_lui_and(i64 %x) nounwind {
1636 ; CHECK-LABEL: srai_lui_and:
1637 ; CHECK: # %bb.0: # %entry
1638 ; CHECK-NEXT: lui a1, 1048574
1639 ; CHECK-NEXT: srai a0, a0, 8
1640 ; CHECK-NEXT: and a0, a0, a1
1644 %z = and i64 %y, -8192
1648 define i64 @add_u32simm32_zextw(i64 %x) nounwind {
1649 ; RV64I-LABEL: add_u32simm32_zextw:
1650 ; RV64I: # %bb.0: # %entry
1651 ; RV64I-NEXT: li a1, 1
1652 ; RV64I-NEXT: slli a1, a1, 32
1653 ; RV64I-NEXT: addi a1, a1, -2
1654 ; RV64I-NEXT: add a0, a0, a1
1655 ; RV64I-NEXT: addi a1, a1, 1
1656 ; RV64I-NEXT: and a0, a0, a1
1659 ; RV64ZBA-LABEL: add_u32simm32_zextw:
1660 ; RV64ZBA: # %bb.0: # %entry
1661 ; RV64ZBA-NEXT: li a1, -2
1662 ; RV64ZBA-NEXT: zext.w a1, a1
1663 ; RV64ZBA-NEXT: add a0, a0, a1
1664 ; RV64ZBA-NEXT: zext.w a0, a0
1667 %add = add i64 %x, 4294967294
1668 %and = and i64 %add, 4294967295