2 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
3 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -force-fast-cluster -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-FAST
5 ; CHECK: ********** MI Scheduling **********
6 ; CHECK-LABEL: stp_i64_scale:%bb.0
7 ; CHECK:Cluster ld/st SU(3) - SU(4)
8 ; CHECK:Cluster ld/st SU(2) - SU(5)
9 ; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1
10 ; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2
11 ; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3
12 ; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4
13 define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
15 %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
16 store i64 %v, i64* %arrayidx
17 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
18 store i64 %v, i64* %arrayidx1
19 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
20 store i64 %v, i64* %arrayidx2
21 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
22 store i64 %v, i64* %arrayidx3
26 ; CHECK: ********** MI Scheduling **********
27 ; CHECK-LABEL: stp_i32_scale:%bb.0
28 ; CHECK:Cluster ld/st SU(3) - SU(4)
29 ; CHECK:Cluster ld/st SU(2) - SU(5)
30 ; CHECK:SU(4): STRWui %1:gpr32, %0:gpr64common, 1
31 ; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 2
32 ; CHECK:SU(2): STRWui %1:gpr32, %0:gpr64common, 3
33 ; CHECK:SU(5): STRWui %1:gpr32, %0:gpr64common, 4
34 define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
36 %arrayidx = getelementptr inbounds i32, i32* %P, i32 3
37 store i32 %v, i32* %arrayidx
38 %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
39 store i32 %v, i32* %arrayidx1
40 %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
41 store i32 %v, i32* %arrayidx2
42 %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
43 store i32 %v, i32* %arrayidx3
47 ; CHECK:********** MI Scheduling **********
48 ; CHECK-LABEL:stp_i64_unscale:%bb.0 entry
49 ; CHECK:Cluster ld/st SU(2) - SU(5)
50 ; CHECK:Cluster ld/st SU(3) - SU(4)
51 ; CHECK:SU(2): STURXi %1:gpr64, %0:gpr64common, -24
52 ; CHECK:SU(3): STURXi %1:gpr64, %0:gpr64common, -8
53 ; CHECK:SU(4): STURXi %1:gpr64, %0:gpr64common, -16
54 ; CHECK:SU(5): STURXi %1:gpr64, %0:gpr64common, -32
55 define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
57 %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
58 store i64 %v, i64* %arrayidx
59 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
60 store i64 %v, i64* %arrayidx1
61 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
62 store i64 %v, i64* %arrayidx2
63 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
64 store i64 %v, i64* %arrayidx3
68 ; CHECK:********** MI Scheduling **********
69 ; CHECK-LABEL:stp_i32_unscale:%bb.0 entry
70 ; CHECK:Cluster ld/st SU(2) - SU(5)
71 ; CHECK:Cluster ld/st SU(3) - SU(4)
72 ; CHECK:SU(2): STURWi %1:gpr32, %0:gpr64common, -12
73 ; CHECK:SU(3): STURWi %1:gpr32, %0:gpr64common, -4
74 ; CHECK:SU(4): STURWi %1:gpr32, %0:gpr64common, -8
75 ; CHECK:SU(5): STURWi %1:gpr32, %0:gpr64common, -16
76 define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
78 %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
79 store i32 %v, i32* %arrayidx
80 %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
81 store i32 %v, i32* %arrayidx1
82 %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
83 store i32 %v, i32* %arrayidx2
84 %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
85 store i32 %v, i32* %arrayidx3
89 ; CHECK:********** MI Scheduling **********
90 ; CHECK-LABEL:stp_double:%bb.0
91 ; CHECK:Cluster ld/st SU(3) - SU(4)
92 ; CHECK:Cluster ld/st SU(2) - SU(5)
93 ; CHECK:SU(3): STRDui %1:fpr64, %0:gpr64common, 1
94 ; CHECK:SU(4): STRDui %1:fpr64, %0:gpr64common, 2
95 ; CHECK:SU(2): STRDui %1:fpr64, %0:gpr64common, 3
96 ; CHECK:SU(5): STRDui %1:fpr64, %0:gpr64common, 4
97 define void @stp_double(double* nocapture %P, double %v) {
99 %arrayidx = getelementptr inbounds double, double* %P, i64 3
100 store double %v, double* %arrayidx
101 %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
102 store double %v, double* %arrayidx1
103 %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
104 store double %v, double* %arrayidx2
105 %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
106 store double %v, double* %arrayidx3
110 ; CHECK:********** MI Scheduling **********
111 ; CHECK-LABEL:stp_float:%bb.0
112 ; CHECK:Cluster ld/st SU(3) - SU(4)
113 ; CHECK:Cluster ld/st SU(2) - SU(5)
114 ; CHECK:SU(3): STRSui %1:fpr32, %0:gpr64common, 1
115 ; CHECK:SU(4): STRSui %1:fpr32, %0:gpr64common, 2
116 ; CHECK:SU(2): STRSui %1:fpr32, %0:gpr64common, 3
117 ; CHECK:SU(5): STRSui %1:fpr32, %0:gpr64common, 4
118 define void @stp_float(float* nocapture %P, float %v) {
120 %arrayidx = getelementptr inbounds float, float* %P, i64 3
121 store float %v, float* %arrayidx
122 %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
123 store float %v, float* %arrayidx1
124 %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
125 store float %v, float* %arrayidx2
126 %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
127 store float %v, float* %arrayidx3
131 ; CHECK: ********** MI Scheduling **********
132 ; CHECK-LABEL: stp_volatile:%bb.0
133 ; CHECK-NOT: Cluster ld/st
134 ; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3 :: (volatile
135 ; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2 :: (volatile
136 ; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1 :: (volatile
137 ; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4 :: (volatile
138 define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
140 %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
141 store volatile i64 %v, i64* %arrayidx
142 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
143 store volatile i64 %v, i64* %arrayidx1
144 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
145 store volatile i64 %v, i64* %arrayidx2
146 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
147 store volatile i64 %v, i64* %arrayidx3
151 ; CHECK: ********** MI Scheduling **********
152 ; CHECK-LABEL: stp_i64_with_ld:%bb.0
153 ; CHECK:Cluster ld/st SU(5) - SU(10)
154 ; CHECK:Cluster ld/st SU(15) - SU(20)
155 ; CHECK:SU(5): STRXui %7:gpr64, %0:gpr64common, 0 ::
156 ; CHECK:SU(10): STRXui %12:gpr64, %0:gpr64common, 1 ::
157 ; CHECK:SU(15): STRXui %17:gpr64, %0:gpr64common, 2 ::
158 ; CHECK:SU(20): STRXui %22:gpr64, %0:gpr64common, 3 ::
159 define void @stp_i64_with_ld(i64* noalias nocapture %a, i64* noalias nocapture readnone %b, i64* noalias nocapture readnone %c) {
161 %arrayidx = getelementptr inbounds i64, i64* %a, i64 8
162 %0 = load i64, i64* %arrayidx, align 8
163 %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 16
164 %1 = load i64, i64* %arrayidx3, align 8
165 %mul = mul nsw i64 %1, %0
166 %2 = load i64, i64* %a, align 8
167 %add6 = add nsw i64 %2, %mul
168 store i64 %add6, i64* %a, align 8
169 %arrayidx.1 = getelementptr inbounds i64, i64* %a, i64 9
170 %3 = load i64, i64* %arrayidx.1, align 8
171 %arrayidx3.1 = getelementptr inbounds i64, i64* %a, i64 17
172 %4 = load i64, i64* %arrayidx3.1, align 8
173 %mul.1 = mul nsw i64 %4, %3
174 %arrayidx5.1 = getelementptr inbounds i64, i64* %a, i64 1
175 %5 = load i64, i64* %arrayidx5.1, align 8
176 %add6.1 = add nsw i64 %5, %mul.1
177 store i64 %add6.1, i64* %arrayidx5.1, align 8
178 %arrayidx.2 = getelementptr inbounds i64, i64* %a, i64 10
179 %6 = load i64, i64* %arrayidx.2, align 8
180 %arrayidx3.2 = getelementptr inbounds i64, i64* %a, i64 18
181 %7 = load i64, i64* %arrayidx3.2, align 8
182 %mul.2 = mul nsw i64 %7, %6
183 %arrayidx5.2 = getelementptr inbounds i64, i64* %a, i64 2
184 %8 = load i64, i64* %arrayidx5.2, align 8
185 %add6.2 = add nsw i64 %8, %mul.2
186 store i64 %add6.2, i64* %arrayidx5.2, align 8
187 %arrayidx.3 = getelementptr inbounds i64, i64* %a, i64 11
188 %9 = load i64, i64* %arrayidx.3, align 8
189 %arrayidx3.3 = getelementptr inbounds i64, i64* %a, i64 19
190 %10 = load i64, i64* %arrayidx3.3, align 8
191 %mul.3 = mul nsw i64 %10, %9
192 %arrayidx5.3 = getelementptr inbounds i64, i64* %a, i64 3
193 %11 = load i64, i64* %arrayidx5.3, align 8
194 %add6.3 = add nsw i64 %11, %mul.3
195 store i64 %add6.3, i64* %arrayidx5.3, align 8
199 ; Verify that the SU(2) and SU(4) are the preds of SU(3)
200 ; CHECK: ********** MI Scheduling **********
201 ; CHECK-LABEL: stp_missing_preds_edges:%bb.0
202 ; CHECK:Cluster ld/st SU(3) - SU(5)
203 ; CHECK: Copy Pred SU(4)
204 ; CHECK: Copy Pred SU(2)
205 ; CHECK:SU(2): %0:gpr64common = COPY $x0
206 ; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 0
207 ; CHECK:SU(4): %3:gpr32common = nsw ADDWri %2:gpr32common, 5, 0
208 ; CHECK:SU(5): STRWui %3:gpr32common, %0:gpr64common, 1
209 define void @stp_missing_preds_edges(i32* %p, i32 %m, i32 %n) {
211 store i32 %m, i32* %p, align 4
212 %add = add nsw i32 %n, 5
213 %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
214 store i32 %add, i32* %arrayidx1, align 4
218 ; Verify that the SU(4) and SU(7) can be clustered even with
220 ; CHECK: ********** MI Scheduling **********
221 ; CHECK-LABEL: cluster_with_different_preds:%bb.0
222 ; CHECK:Cluster ld/st SU(4) - SU(7)
223 ; CHECK:SU(3): STRWui %2:gpr32, %0:gpr64common, 0 ::
224 ; CHECK:SU(4): %3:gpr32 = LDRWui %1:gpr64common, 0 ::
225 ; CHECK:Predecessors:
226 ; CHECK: SU(3): Ord Latency=1 Memory
227 ; CHECK:SU(6): STRBBui %4:gpr32, %1:gpr64common, 4 ::
228 ; CHECK:SU(7): %5:gpr32 = LDRWui %1:gpr64common, 1 ::
229 ; CHECK:Predecessors:
230 ; CHECK:SU(6): Ord Latency=1 Memory
231 ; CHECK-FAST: cluster_with_different_preds:%bb.0
232 ; CHECK-FAST-NOT: Cluster ld/st
233 ; CHECK-FAST:SU(3): STRWui %2:gpr32, %0:gpr64common, 0 ::
234 ; CHECK-FAST:SU(4): %3:gpr32 = LDRWui %1:gpr64common, 0 ::
235 define i32 @cluster_with_different_preds(i32* %p, i32* %q) {
237 store i32 3, i32* %p, align 4
238 %0 = load i32, i32* %q, align 4
239 %add.ptr = getelementptr inbounds i32, i32* %q, i64 1
240 %1 = bitcast i32* %add.ptr to i8*
241 store i8 5, i8* %1, align 1
242 %2 = load i32, i32* %add.ptr, align 4
243 %add = add nsw i32 %2, %0