1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV32I
4 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \
5 ; RUN: | FileCheck %s -check-prefix=RV64I
7 ; Check that memory accesses to array elements with large offsets have those
8 ; offsets split into a base offset, plus a smaller offset that is folded into
9 ; the memory operation. We should also only compute that base offset once,
10 ; since it can be shared for all memory operations in this test.
11 define void @test1(ptr %sp, ptr %t, i32 %n) {
13 ; RV32I: # %bb.0: # %entry
14 ; RV32I-NEXT: lw a0, 0(a0)
15 ; RV32I-NEXT: lui a2, 20
16 ; RV32I-NEXT: addi a2, a2, -1920
17 ; RV32I-NEXT: add a1, a1, a2
18 ; RV32I-NEXT: add a0, a0, a2
19 ; RV32I-NEXT: li a2, 2
20 ; RV32I-NEXT: sw a2, 0(a0)
21 ; RV32I-NEXT: li a3, 1
22 ; RV32I-NEXT: sw a3, 4(a0)
23 ; RV32I-NEXT: sw a3, 0(a1)
24 ; RV32I-NEXT: sw a2, 4(a1)
28 ; RV64I: # %bb.0: # %entry
29 ; RV64I-NEXT: ld a0, 0(a0)
30 ; RV64I-NEXT: lui a2, 20
31 ; RV64I-NEXT: addiw a2, a2, -1920
32 ; RV64I-NEXT: add a1, a1, a2
33 ; RV64I-NEXT: add a0, a0, a2
34 ; RV64I-NEXT: li a2, 2
35 ; RV64I-NEXT: sw a2, 0(a0)
36 ; RV64I-NEXT: li a3, 1
37 ; RV64I-NEXT: sw a3, 4(a0)
38 ; RV64I-NEXT: sw a3, 0(a1)
39 ; RV64I-NEXT: sw a2, 4(a1)
42 %s = load ptr, ptr %sp
43 %gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20000
44 %gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20001
45 %gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20000
46 %gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20001
47 store i32 2, ptr %gep0
48 store i32 1, ptr %gep1
49 store i32 1, ptr %gep2
50 store i32 2, ptr %gep3
54 ; Ditto. Check it when the GEPs are not in the entry block.
55 define void @test2(ptr %sp, ptr %t, i32 %n) {
57 ; RV32I: # %bb.0: # %entry
58 ; RV32I-NEXT: li a3, 0
59 ; RV32I-NEXT: lw a0, 0(a0)
60 ; RV32I-NEXT: lui a4, 20
61 ; RV32I-NEXT: addi a4, a4, -1920
62 ; RV32I-NEXT: add a1, a1, a4
63 ; RV32I-NEXT: add a0, a0, a4
64 ; RV32I-NEXT: blez a2, .LBB1_2
65 ; RV32I-NEXT: .LBB1_1: # %while_body
66 ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
67 ; RV32I-NEXT: addi a4, a3, 1
68 ; RV32I-NEXT: sw a4, 0(a0)
69 ; RV32I-NEXT: sw a3, 4(a0)
70 ; RV32I-NEXT: sw a4, 0(a1)
71 ; RV32I-NEXT: sw a3, 4(a1)
72 ; RV32I-NEXT: mv a3, a4
73 ; RV32I-NEXT: blt a4, a2, .LBB1_1
74 ; RV32I-NEXT: .LBB1_2: # %while_end
78 ; RV64I: # %bb.0: # %entry
79 ; RV64I-NEXT: li a3, 0
80 ; RV64I-NEXT: ld a0, 0(a0)
81 ; RV64I-NEXT: lui a4, 20
82 ; RV64I-NEXT: addiw a4, a4, -1920
83 ; RV64I-NEXT: add a1, a1, a4
84 ; RV64I-NEXT: add a0, a0, a4
85 ; RV64I-NEXT: sext.w a2, a2
86 ; RV64I-NEXT: blez a2, .LBB1_2
87 ; RV64I-NEXT: .LBB1_1: # %while_body
88 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
89 ; RV64I-NEXT: addiw a4, a3, 1
90 ; RV64I-NEXT: sw a4, 0(a0)
91 ; RV64I-NEXT: sw a3, 4(a0)
92 ; RV64I-NEXT: sw a4, 0(a1)
93 ; RV64I-NEXT: sw a3, 4(a1)
94 ; RV64I-NEXT: mv a3, a4
95 ; RV64I-NEXT: blt a4, a2, .LBB1_1
96 ; RV64I-NEXT: .LBB1_2: # %while_end
99 %s = load ptr, ptr %sp
102 %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
103 %gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20000
104 %gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20001
105 %gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20000
106 %gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20001
107 %cmp = icmp slt i32 %phi, %n
108 br i1 %cmp, label %while_body, label %while_end
112 store i32 %i, ptr %gep0
113 store i32 %phi, ptr %gep1
114 store i32 %i, ptr %gep2
115 store i32 %phi, ptr %gep3
121 ; GEPs have been manually split so the base GEP does not get used by any memory
122 ; instructions. Make sure we use an offset and common base for each of the
124 define void @test3(ptr %t) {
125 ; RV32I-LABEL: test3:
126 ; RV32I: # %bb.0: # %entry
127 ; RV32I-NEXT: lui a1, 20
128 ; RV32I-NEXT: addi a1, a1, -1920
129 ; RV32I-NEXT: add a0, a0, a1
130 ; RV32I-NEXT: li a1, 2
131 ; RV32I-NEXT: sw a1, 4(a0)
132 ; RV32I-NEXT: li a1, 3
133 ; RV32I-NEXT: sw a1, 8(a0)
136 ; RV64I-LABEL: test3:
137 ; RV64I: # %bb.0: # %entry
138 ; RV64I-NEXT: lui a1, 20
139 ; RV64I-NEXT: addiw a1, a1, -1920
140 ; RV64I-NEXT: add a0, a0, a1
141 ; RV64I-NEXT: li a1, 2
142 ; RV64I-NEXT: sw a1, 4(a0)
143 ; RV64I-NEXT: li a1, 3
144 ; RV64I-NEXT: sw a1, 8(a0)
147 %splitgep = getelementptr i8, ptr %t, i64 80000
148 %0 = getelementptr i8, ptr %splitgep, i64 4
149 %1 = getelementptr i8, ptr %splitgep, i64 8
150 store i32 2, ptr %0, align 4
151 store i32 3, ptr %1, align 4
156 define void @test4(ptr %dest) {
157 ; RV32I-LABEL: test4:
159 ; RV32I-NEXT: addi a0, a0, 2047
160 ; RV32I-NEXT: li a1, 1
161 ; RV32I-NEXT: sb a1, 1(a0)
162 ; RV32I-NEXT: sb a1, 2(a0)
163 ; RV32I-NEXT: sb a1, 3(a0)
164 ; RV32I-NEXT: sb a1, 4(a0)
167 ; RV64I-LABEL: test4:
169 ; RV64I-NEXT: addi a0, a0, 2047
170 ; RV64I-NEXT: li a1, 1
171 ; RV64I-NEXT: sb a1, 1(a0)
172 ; RV64I-NEXT: sb a1, 2(a0)
173 ; RV64I-NEXT: sb a1, 3(a0)
174 ; RV64I-NEXT: sb a1, 4(a0)
176 %p1 = getelementptr i8, ptr %dest, i32 2048
178 %p2 = getelementptr i8, ptr %dest, i32 2049
180 %p3 = getelementptr i8, ptr %dest, i32 2050
182 %p4 = getelementptr i8, ptr %dest, i32 2051