1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
3 ; Verify that misched resource/latency balancy heuristics are sane.
5 define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
6 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
7 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
12 ; imull folded loads should be in order and interleaved with addl, never
13 ; adjacent. Also check that we have no spilling.
15 ; Since mmult1 IR is already in good order, this effectively ensure
16 ; the scheduler maintains source order.
18 ; CHECK-LABEL: %for.body
21 ; CHECK-NOT: {{imull|rsp}}
24 ; CHECK-NOT: {{imull|rsp}}
27 ; CHECK-NOT: {{imull|rsp}}
30 ; CHECK-NOT: {{imull|rsp}}
33 ; CHECK-NOT: {{imull|rsp}}
36 ; CHECK-NOT: {{imull|rsp}}
39 ; CHECK-NOT: {{imull|rsp}}
42 ; CHECK-NOT: {{imull|rsp}}
45 ; CHECK-NOT: {{imull|rsp}}
47 ; CHECK-NOT: {{imull|rsp}}
50 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
51 %tmp57 = load i32, i32* %tmp56, align 4
52 %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
53 %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
54 %mul.us.i = mul nsw i32 %tmp58, %tmp57
55 %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
56 %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
57 %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
58 %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
59 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
60 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
61 %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
62 %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
63 %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
64 %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
65 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
66 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
67 %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
68 %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
69 %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
70 %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
71 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
72 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
73 %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
74 %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
75 %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
76 %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
77 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
78 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
79 %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
80 %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
81 %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
82 %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
83 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
84 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
85 %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
86 %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
87 %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
88 %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
89 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
90 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
91 %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
92 %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
93 %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
94 %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
95 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
96 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
97 %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
98 %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
99 %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
100 %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
101 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
102 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
103 %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
104 %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
105 %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
106 %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
107 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
108 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
109 %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
110 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
111 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
112 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
113 %exitcond = icmp eq i32 %lftr.wideiv, 10
114 br i1 %exitcond, label %end, label %for.body
120 ; Unlike the above loop, this IR starts out bad and must be
123 ; CHECK-LABEL: %for.body
126 ; CHECK-NOT: {{imull|rsp}}
129 ; CHECK-NOT: {{imull|rsp}}
132 ; CHECK-NOT: {{imull|rsp}}
135 ; CHECK-NOT: {{imull|rsp}}
138 ; CHECK-NOT: {{imull|rsp}}
141 ; CHECK-NOT: {{imull|rsp}}
144 ; CHECK-NOT: {{imull|rsp}}
147 ; CHECK-NOT: {{imull|rsp}}
150 ; CHECK-NOT: {{imull|rsp}}
152 ; CHECK-NOT: {{imull|rsp}}
154 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
155 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
156 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
157 nounwind uwtable ssp {
161 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
162 %tmp57 = load i32, i32* %tmp56, align 4
163 %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
164 %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
165 %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
166 %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
167 %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
168 %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
169 %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
170 %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
171 %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
172 %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
173 %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
174 %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
175 %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
176 %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
177 %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
178 %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
179 %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
180 %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
181 %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
182 %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
183 %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
184 %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
185 %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
186 %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
187 %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
188 %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
189 %mul.us.i = mul nsw i32 %tmp58, %tmp57
190 %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
191 %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
192 %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
193 %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
194 %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
195 %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
196 %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
197 %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
198 %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
199 %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
200 %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
201 %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
202 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
203 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
204 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
205 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
206 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
207 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
208 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
209 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
210 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
211 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
212 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
213 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
214 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
215 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
216 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
217 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
218 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
219 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
220 %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
221 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
222 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
223 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
224 %exitcond = icmp eq i32 %lftr.wideiv, 10
225 br i1 %exitcond, label %end, label %for.body
231 ; A mildly interesting little block extracted from a cipher. The
232 ; balanced heuristics are interesting here because we have resource,
233 ; latency, and register limits all at once. For now, simply check that
234 ; we don't use any callee-saves.
235 ; CHECK-LABEL: @encpc1
236 ; CHECK-LABEL: %entry
240 @a = external global i32, align 4
241 @b = external global i32, align 4
242 @c = external global i32, align 4
243 @d = external global i32, align 4
244 define i32 @encpc1() nounwind {
246 %l1 = load i32, i32* @a, align 16
247 %conv = shl i32 %l1, 8
248 %s5 = lshr i32 %l1, 8
249 %add = or i32 %conv, %s5
250 store i32 %add, i32* @b
251 %l6 = load i32, i32* @a
252 %l7 = load i32, i32* @c
253 %add.i = add i32 %l7, %l6
254 %idxprom.i = zext i32 %l7 to i64
255 %arrayidx.i = getelementptr inbounds i32, i32* @d, i64 %idxprom.i
256 %l8 = load i32, i32* %arrayidx.i
257 store i32 346, i32* @c
258 store i32 20021, i32* @d
259 %l9 = load i32, i32* @a
260 store i32 %l8, i32* @a
261 store i32 %l9, i32* @b
262 store i32 %add.i, i32* @c
263 store i32 %l9, i32* @d
264 %cmp.i = icmp eq i32 %add.i, 0
265 %s10 = lshr i32 %l1, 16
266 %s12 = lshr i32 %l1, 24
267 %s14 = lshr i32 %l1, 30
268 br i1 %cmp.i, label %if, label %return
270 %sa = add i32 %s5, %s10
271 %sb = add i32 %sa, %s12
272 %sc = add i32 %sb, %s14
275 %result = phi i32 [0, %entry], [%sc, %if]