1 ; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The
2 ; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused.
5 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
6 ; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \
7 ; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s
9 define i64 @store_i64(ptr nocapture %P, i64 %v) {
11 ; CHECK: ********** MI Scheduling **********
12 ; CHECK-LABEL: store_i64:%bb.0
13 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
14 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
15 ; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
16 ; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
17 ; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
18 ; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
19 ; CHECK: ********** MI Scheduling **********
20 ; CHECK-LABEL: store_i64:%bb.0
21 ; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
22 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
23 ; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16
24 ; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8
25 ; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24
26 ; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
27 %arrayidx = getelementptr inbounds i64, ptr %P, i64 3
28 store i64 %v, ptr %arrayidx
29 %arrayidx1 = getelementptr inbounds i64, ptr %P, i64 2
30 store i64 %v, ptr %arrayidx1
31 %arrayidx2 = getelementptr inbounds i64, ptr %P, i64 1
32 store i64 %v, ptr %arrayidx2
33 %arrayidx3 = getelementptr inbounds i64, ptr %P, i64 4
34 store i64 %v, ptr %arrayidx3
38 define i32 @store_i32(ptr nocapture %P, i32 %v) {
40 ; CHECK: ********** MI Scheduling **********
41 ; CHECK-LABEL: store_i32:%bb.0
42 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
43 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
44 ; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52
45 ; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48
46 ; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44
47 ; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56
48 ; CHECK: ********** MI Scheduling **********
49 ; CHECK-LABEL: store_i32:%bb.0
50 ; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
51 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
52 ; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48
53 ; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44
54 ; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52
55 ; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56
56 %arrayidx = getelementptr inbounds i32, ptr %P, i32 13
57 store i32 %v, ptr %arrayidx
58 %arrayidx1 = getelementptr inbounds i32, ptr %P, i32 12
59 store i32 %v, ptr %arrayidx1
60 %arrayidx2 = getelementptr inbounds i32, ptr %P, i32 11
61 store i32 %v, ptr %arrayidx2
62 %arrayidx3 = getelementptr inbounds i32, ptr %P, i32 14
63 store i32 %v, ptr %arrayidx3
67 define void @store_i64_neg(ptr nocapture %P, i64 %v) #0 {
69 ; CHECK: ********** MI Scheduling **********
70 ; CHECK-LABEL: store_i64_neg:%bb.0
71 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
72 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
73 ; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24
74 ; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8
75 ; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16
76 ; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32
77 ; CHECK: ********** MI Scheduling **********
78 ; CHECK-LABEL: store_i64_neg:%bb.0
79 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
80 ; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
81 ; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8
82 ; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16
83 ; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24
84 ; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32
85 %arrayidx = getelementptr inbounds i64, ptr %P, i64 -3
86 store i64 %v, ptr %arrayidx
87 %arrayidx1 = getelementptr inbounds i64, ptr %P, i64 -1
88 store i64 %v, ptr %arrayidx1
89 %arrayidx2 = getelementptr inbounds i64, ptr %P, i64 -2
90 store i64 %v, ptr %arrayidx2
91 %arrayidx3 = getelementptr inbounds i64, ptr %P, i64 -4
92 store i64 %v, ptr %arrayidx3
96 define void @store_i32_neg(ptr nocapture %P, i32 %v) #0 {
98 ; CHECK: ********** MI Scheduling **********
99 ; CHECK-LABEL: store_i32_neg:%bb.0
100 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
101 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
102 ; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12
103 ; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4
104 ; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8
105 ; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16
106 ; CHECK: ********** MI Scheduling **********
107 ; CHECK-LABEL: store_i32_neg:%bb.0
108 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
109 ; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
110 ; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4
111 ; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8
112 ; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12
113 ; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16
114 %arrayidx = getelementptr inbounds i32, ptr %P, i32 -3
115 store i32 %v, ptr %arrayidx
116 %arrayidx1 = getelementptr inbounds i32, ptr %P, i32 -1
117 store i32 %v, ptr %arrayidx1
118 %arrayidx2 = getelementptr inbounds i32, ptr %P, i32 -2
119 store i32 %v, ptr %arrayidx2
120 %arrayidx3 = getelementptr inbounds i32, ptr %P, i32 -4
121 store i32 %v, ptr %arrayidx3
125 define void @store_double(ptr nocapture %P, double %v) {
127 ; CHECK: ********** MI Scheduling **********
128 ; CHECK-LABEL: store_double:%bb.0
129 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
130 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
131 ; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24
132 ; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8
133 ; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16
134 ; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32
135 ; CHECK: ********** MI Scheduling **********
136 ; CHECK-LABEL: store_double:%bb.0
137 ; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
138 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
139 ; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8
140 ; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16
141 ; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24
142 ; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32
143 %arrayidx = getelementptr inbounds double, ptr %P, i64 3
144 store double %v, ptr %arrayidx
145 %arrayidx1 = getelementptr inbounds double, ptr %P, i64 1
146 store double %v, ptr %arrayidx1
147 %arrayidx2 = getelementptr inbounds double, ptr %P, i64 2
148 store double %v, ptr %arrayidx2
149 %arrayidx3 = getelementptr inbounds double, ptr %P, i64 4
150 store double %v, ptr %arrayidx3
154 define void @store_float(ptr nocapture %P, float %v) {
156 ; CHECK: ********** MI Scheduling **********
157 ; CHECK-LABEL: store_float:%bb.0
158 ; CHECK-NOT: Cluster ld/st
159 ; CHECK-NOT: Cluster ld/st
160 ; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12
161 ; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4
162 ; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8
163 ; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16
164 ; CHECK: ********** MI Scheduling **********
165 ; CHECK-LABEL: store_float:%bb.0
166 ; CHECK-NOT: Cluster ld/st
167 ; CHECK-NOT: Cluster ld/st
168 ; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12
169 ; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4
170 ; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8
171 ; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16
172 %arrayidx = getelementptr inbounds float, ptr %P, i64 3
173 store float %v, ptr %arrayidx
174 %arrayidx1 = getelementptr inbounds float, ptr %P, i64 1
175 store float %v, ptr %arrayidx1
176 %arrayidx2 = getelementptr inbounds float, ptr %P, i64 2
177 store float %v, ptr %arrayidx2
178 %arrayidx3 = getelementptr inbounds float, ptr %P, i64 4
179 store float %v, ptr %arrayidx3
183 ; Cannot fuse the store/load if there is volatile in between
184 define i64 @store_volatile(ptr nocapture %P, i64 %v) {
186 ; CHECK: ********** MI Scheduling **********
187 ; CHECK-LABEL: store_volatile:%bb.0
188 ; CHECK-NOT: Cluster ld/st
189 ; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
190 ; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
191 ; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
192 ; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
193 ; CHECK: ********** MI Scheduling **********
194 ; CHECK-LABEL: store_volatile:%bb.0
195 ; CHECK-NOT: Cluster ld/st
196 ; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24
197 ; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16
198 ; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8
199 ; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
200 %arrayidx = getelementptr inbounds i64, ptr %P, i64 3
201 store volatile i64 %v, ptr %arrayidx
202 %arrayidx1 = getelementptr inbounds i64, ptr %P, i64 2
203 store volatile i64 %v, ptr %arrayidx1
204 %arrayidx2 = getelementptr inbounds i64, ptr %P, i64 1
205 store volatile i64 %v, ptr %arrayidx2
206 %arrayidx3 = getelementptr inbounds i64, ptr %P, i64 4
207 store volatile i64 %v, ptr %arrayidx3
211 @p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4
213 define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) {
215 ; CHECK: ********** MI Scheduling **********
216 ; CHECK-LABEL: store_i32_stw_stw8:%bb.0
217 ; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]])
218 ; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24
219 ; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20
220 ; CHECK: ********** MI Scheduling **********
221 ; CHECK-LABEL: store_i32_stw_stw8:%bb.0
222 ; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]])
223 ; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24
224 ; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20
225 store i32 9, ptr getelementptr inbounds ([100 x i32], ptr @p, i64 0, i64 6), align 4
226 store i32 %n, ptr getelementptr inbounds ([100 x i32], ptr @p, i64 0, i64 7), align 4
227 %add = add nsw i32 %n, %m
228 store i32 %add, ptr getelementptr inbounds ([100 x i32], ptr @p, i64 0, i64 5), align 4
232 define void @store_i32_stw8(i32 signext %m, i32 signext %n) {
234 ; CHECK: ********** MI Scheduling **********
235 ; CHECK-LABEL: store_i32_stw8:%bb.0
236 ; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]])
237 ; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24
238 ; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28
239 ; CHECK: ********** MI Scheduling **********
240 ; CHECK-LABEL: store_i32_stw8:%bb.0
241 ; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
242 ; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24
243 ; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28
244 store i32 9, ptr getelementptr inbounds ([100 x i32], ptr @p, i64 0, i64 6), align 4
245 store i32 %n, ptr getelementptr inbounds ([100 x i32], ptr @p, i64 0, i64 7), align 4
249 declare void @bar(ptr)
251 define void @store_frame_index(i32 %a, i32 %b) {
253 ; CHECK: ********** MI Scheduling **********
254 ; CHECK-LABEL: store_frame_index:%bb.0
255 ; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
256 ; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf
257 ; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf
258 %buf = alloca [8 x i64], align 8
259 %conv = zext i32 %a to i64
260 store i64 %conv, ptr %buf, align 8
261 %conv1 = zext i32 %b to i64
262 %arrayidx2 = getelementptr inbounds [8 x i64], ptr %buf, i64 0, i64 1
263 store i64 %conv1, ptr %arrayidx2, align 8
264 call void @bar(ptr nonnull %buf)