1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
4 declare i64 @may_inf_loop_ro() nounwind readonly
5 declare i64 @may_inf_loop_rw() nounwind
6 declare i64 @may_throw() willreturn
8 ; Base case with no interesting control dependencies
9 define void @test_no_control(ptr %a, ptr %b, ptr %c) {
10 ; CHECK-LABEL: @test_no_control(
11 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
12 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
13 ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
14 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[B:%.*]], align 8
15 ; CHECK-NEXT: ret void
17 %v1 = load i64, ptr %a
18 %a2 = getelementptr i64, ptr %a, i32 1
19 %v2 = load i64, ptr %a2
21 %c1 = load i64, ptr %c
22 %ca2 = getelementptr i64, ptr %c, i32 1
23 %c2 = load i64, ptr %ca2
24 %add1 = add i64 %v1, %c1
25 %add2 = add i64 %v2, %c2
27 store i64 %add1, ptr %b
28 %b2 = getelementptr i64, ptr %b, i32 1
29 store i64 %add2, ptr %b2
33 define void @test1(ptr %a, ptr %b, ptr %c) {
34 ; CHECK-LABEL: @test1(
35 ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
36 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro()
37 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
38 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
39 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
40 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
41 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
42 ; CHECK-NEXT: ret void
44 %v1 = load i64, ptr %a
45 %a2 = getelementptr i64, ptr %a, i32 1
46 %v2 = load i64, ptr %a2
48 %c1 = load i64, ptr %c
49 %c2 = call i64 @may_inf_loop_ro()
50 %add1 = add i64 %v1, %c1
51 %add2 = add i64 %v2, %c2
53 store i64 %add1, ptr %b
54 %b2 = getelementptr i64, ptr %b, i32 1
55 store i64 %add2, ptr %b2
59 define void @test2(ptr %a, ptr %b, ptr %c) {
60 ; CHECK-LABEL: @test2(
61 ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
62 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro()
63 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
64 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
65 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
66 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
67 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
68 ; CHECK-NEXT: ret void
70 %c1 = load i64, ptr %c
71 %c2 = call i64 @may_inf_loop_ro()
73 %v1 = load i64, ptr %a
74 %a2 = getelementptr i64, ptr %a, i32 1
75 %v2 = load i64, ptr %a2
77 %add1 = add i64 %v1, %c1
78 %add2 = add i64 %v2, %c2
80 store i64 %add1, ptr %b
81 %b2 = getelementptr i64, ptr %b, i32 1
82 store i64 %add2, ptr %b2
86 define void @test3(ptr %a, ptr %b, ptr %c) {
87 ; CHECK-LABEL: @test3(
88 ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
89 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro()
90 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
91 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
92 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
93 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
94 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
95 ; CHECK-NEXT: ret void
97 %v1 = load i64, ptr %a
98 %c1 = load i64, ptr %c
99 %add1 = add i64 %v1, %c1
101 %a2 = getelementptr i64, ptr %a, i32 1
102 %v2 = load i64, ptr %a2
103 %c2 = call i64 @may_inf_loop_ro()
104 %add2 = add i64 %v2, %c2
106 store i64 %add1, ptr %b
107 %b2 = getelementptr i64, ptr %b, i32 1
108 store i64 %add2, ptr %b2
112 define void @test4(ptr %a, ptr %b, ptr %c) {
113 ; CHECK-LABEL: @test4(
114 ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
115 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro()
116 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
117 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
118 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
119 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
120 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
121 ; CHECK-NEXT: ret void
123 %v1 = load i64, ptr %a
124 %c1 = load i64, ptr %c
125 %add1 = add i64 %v1, %c1
127 %c2 = call i64 @may_inf_loop_ro()
128 %a2 = getelementptr i64, ptr %a, i32 1
129 %v2 = load i64, ptr %a2
130 %add2 = add i64 %v2, %c2
132 store i64 %add1, ptr %b
133 %b2 = getelementptr i64, ptr %b, i32 1
134 store i64 %add2, ptr %b2
138 define void @test5(ptr %a, ptr %b, ptr %c) {
139 ; CHECK-LABEL: @test5(
140 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro()
141 ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
142 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
143 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
144 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
145 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
146 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
147 ; CHECK-NEXT: ret void
149 %a2 = getelementptr i64, ptr %a, i32 1
150 %v2 = load i64, ptr %a2
151 %c2 = call i64 @may_inf_loop_ro()
152 %add2 = add i64 %v2, %c2
154 %v1 = load i64, ptr %a
155 %c1 = load i64, ptr %c
156 %add1 = add i64 %v1, %c1
158 store i64 %add1, ptr %b
159 %b2 = getelementptr i64, ptr %b, i32 1
160 store i64 %add2, ptr %b2
164 define void @test6(ptr %a, ptr %b, ptr %c) {
165 ; CHECK-LABEL: @test6(
166 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
167 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
168 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
169 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
170 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
171 ; CHECK-NEXT: ret void
173 %v1 = load i64, ptr %a
174 call i64 @may_inf_loop_ro()
175 %a2 = getelementptr i64, ptr %a, i32 1
176 %v2 = load i64, ptr %a2
178 %c1 = load i64, ptr %c
179 %ca2 = getelementptr i64, ptr %c, i32 1
180 %c2 = load i64, ptr %ca2
181 %add1 = add i64 %v1, %c1
182 %add2 = add i64 %v2, %c2
184 store i64 %add1, ptr %b
185 %b2 = getelementptr i64, ptr %b, i32 1
186 store i64 %add2, ptr %b2
190 ; In this case, we can't vectorize the load pair because there's no valid
191 ; scheduling point which respects both memory and control dependence. If
192 ; we scheduled the second load before the store holding the first one in place,
193 ; we'd have hoisted a potentially faulting load above a potentially infinite
194 ; call and thus have introduced a possible fault into a program which didn't
196 define void @test7(ptr %a, ptr %b, ptr %c) {
197 ; CHECK-LABEL: @test7(
198 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
199 ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8
200 ; CHECK-NEXT: store i64 0, ptr [[A]], align 8
201 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
202 ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8
203 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
204 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
205 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
206 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
207 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
208 ; CHECK-NEXT: ret void
210 %v1 = load i64, ptr %a
212 call i64 @may_inf_loop_ro()
213 %a2 = getelementptr i64, ptr %a, i32 1
214 %v2 = load i64, ptr %a2
216 %c1 = load i64, ptr %c
217 %ca2 = getelementptr i64, ptr %c, i32 1
218 %c2 = load i64, ptr %ca2
219 %add1 = add i64 %v1, %c1
220 %add2 = add i64 %v2, %c2
222 store i64 %add1, ptr %b
223 %b2 = getelementptr i64, ptr %b, i32 1
224 store i64 %add2, ptr %b2
228 ; Same as test7, but with a throwing call
229 define void @test8(ptr %a, ptr %b, ptr %c) {
230 ; CHECK-LABEL: @test8(
231 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
232 ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8
233 ; CHECK-NEXT: store i64 0, ptr [[A]], align 8
234 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]]
235 ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8
236 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
237 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
238 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
239 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
240 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
241 ; CHECK-NEXT: ret void
243 %v1 = load i64, ptr %a
245 call i64 @may_throw() readonly
246 %a2 = getelementptr i64, ptr %a, i32 1
247 %v2 = load i64, ptr %a2
249 %c1 = load i64, ptr %c
250 %ca2 = getelementptr i64, ptr %c, i32 1
251 %c2 = load i64, ptr %ca2
252 %add1 = add i64 %v1, %c1
253 %add2 = add i64 %v2, %c2
255 store i64 %add1, ptr %b
256 %b2 = getelementptr i64, ptr %b, i32 1
257 store i64 %add2, ptr %b2
261 ; Same as test8, but with a readwrite maythrow call
262 define void @test9(ptr %a, ptr %b, ptr %c) {
263 ; CHECK-LABEL: @test9(
264 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
265 ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8
266 ; CHECK-NEXT: store i64 0, ptr [[A]], align 8
267 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw()
268 ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8
269 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
270 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
271 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
272 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
273 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
274 ; CHECK-NEXT: ret void
276 %v1 = load i64, ptr %a
278 call i64 @may_throw()
279 %a2 = getelementptr i64, ptr %a, i32 1
280 %v2 = load i64, ptr %a2
282 %c1 = load i64, ptr %c
283 %ca2 = getelementptr i64, ptr %c, i32 1
284 %c2 = load i64, ptr %ca2
285 %add1 = add i64 %v1, %c1
286 %add2 = add i64 %v2, %c2
288 store i64 %add1, ptr %b
289 %b2 = getelementptr i64, ptr %b, i32 1
290 store i64 %add2, ptr %b2
294 ; A variant of test7 which shows the same problem with a non-load instruction
295 define void @test10(ptr %a, ptr %b, ptr %c) {
296 ; CHECK-LABEL: @test10(
297 ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A:%.*]], align 8
298 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A]], i32 1
299 ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8
300 ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[V1]]
301 ; CHECK-NEXT: store i64 [[U1]], ptr [[A]], align 8
302 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
303 ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[V2]]
304 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
305 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
306 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1
307 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
308 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
309 ; CHECK-NEXT: ret void
311 %v1 = load i64, ptr %a
312 %a2 = getelementptr i64, ptr %a, i32 1
313 %v2 = load i64, ptr %a2
315 %u1 = udiv i64 200, %v1
316 store i64 %u1, ptr %a
317 call i64 @may_inf_loop_ro()
318 %u2 = udiv i64 200, %v2
320 %c1 = load i64, ptr %c
321 %ca2 = getelementptr i64, ptr %c, i32 1
322 %c2 = load i64, ptr %ca2
323 %add1 = add i64 %u1, %c1
324 %add2 = add i64 %u2, %c2
326 store i64 %add1, ptr %b
327 %b2 = getelementptr i64, ptr %b, i32 1
328 store i64 %add2, ptr %b2
332 ; Variant of test10 block invariant operands to the udivs
333 ; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop.
334 define void @test11(i64 %x, i64 %y, ptr %b, ptr %c) {
335 ; CHECK-LABEL: @test11(
336 ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[X:%.*]]
337 ; CHECK-NEXT: store i64 [[U1]], ptr [[B:%.*]], align 8
338 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
339 ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[Y:%.*]]
340 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
341 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
342 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1
343 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
344 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B]], align 8
345 ; CHECK-NEXT: ret void
347 %u1 = udiv i64 200, %x
348 store i64 %u1, ptr %b
349 call i64 @may_inf_loop_ro()
350 %u2 = udiv i64 200, %y
352 %c1 = load i64, ptr %c
353 %ca2 = getelementptr i64, ptr %c, i32 1
354 %c2 = load i64, ptr %ca2
355 %add1 = add i64 %u1, %c1
356 %add2 = add i64 %u2, %c2
358 store i64 %add1, ptr %b
359 %b2 = getelementptr i64, ptr %b, i32 1
360 store i64 %add2, ptr %b2