1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
4 declare i64 @may_inf_loop_ro() nounwind readonly
6 ; Base case without allocas or stacksave
7 define void @basecase(ptr %a, ptr %b, ptr %c) {
8 ; CHECK-LABEL: @basecase(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
10 ; CHECK-NEXT: store ptr null, ptr [[A]], align 8
11 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
12 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
13 ; CHECK-NEXT: ret void
16 %v1 = load ptr, ptr %a
17 store ptr zeroinitializer, ptr %a
18 %a2 = getelementptr ptr, ptr %a, i32 1
19 %v2 = load ptr, ptr %a2
21 %add1 = getelementptr i8, ptr %v1, i32 1
22 %add2 = getelementptr i8, ptr %v2, i32 1
24 store ptr %add1, ptr %b
25 %b2 = getelementptr ptr, ptr %b, i32 1
26 store ptr %add2, ptr %b2
30 ; Using two allocas and a buildvector
31 define void @allocas(ptr %a, ptr %b, ptr %c) {
32 ; CHECK-LABEL: @allocas(
33 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
34 ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1
35 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
36 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
37 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
38 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
39 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8
40 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
41 ; CHECK-NEXT: ret void
45 %add1 = getelementptr i8, ptr %v1, i32 1
46 store ptr %add1, ptr %a
49 %add2 = getelementptr i8, ptr %v2, i32 1
51 store ptr %add1, ptr %b
52 %b2 = getelementptr ptr, ptr %b, i32 1
53 store ptr %add2, ptr %b2
57 ; Allocas can not be speculated above a potentially non-returning call
58 define void @allocas_speculation(ptr %a, ptr %b, ptr %c) {
59 ; CHECK-LABEL: @allocas_speculation(
60 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
61 ; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
62 ; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8
63 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
64 ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1
65 ; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
66 ; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8
67 ; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
68 ; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8
69 ; CHECK-NEXT: ret void
73 %add1 = getelementptr i8, ptr %v1, i32 1
74 store ptr %add1, ptr %a
75 call i64 @may_inf_loop_ro()
78 %add2 = getelementptr i8, ptr %v2, i32 1
80 store ptr %add1, ptr %b
81 %b2 = getelementptr ptr, ptr %b, i32 1
82 store ptr %add2, ptr %b2
86 ; We must be careful not to lift the inalloca alloc above the stacksave here.
87 ; We used to miscompile this example before adding explicit dependency handling
89 define void @stacksave(ptr %a, ptr %b, ptr %c) {
90 ; CHECK-LABEL: @stacksave(
91 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
92 ; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
93 ; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8
94 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
95 ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1
96 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
97 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
98 ; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
99 ; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8
100 ; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
101 ; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8
102 ; CHECK-NEXT: ret void
106 %add1 = getelementptr i8, ptr %v1, i32 1
107 store ptr %add1, ptr %a
109 %stack = call ptr @llvm.stacksave()
110 %v2 = alloca inalloca i8
111 call void @use(ptr inalloca(i8) %v2) readnone
112 call void @llvm.stackrestore(ptr %stack)
114 %add2 = getelementptr i8, ptr %v2, i32 1
116 store ptr %add1, ptr %b
117 %b2 = getelementptr ptr, ptr %b, i32 1
118 store ptr %add2, ptr %b2
122 define void @stacksave2(ptr %a, ptr %b, ptr %c) {
123 ; CHECK-LABEL: @stacksave2(
124 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
125 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
126 ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1
127 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
128 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
129 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
130 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
131 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8
132 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
133 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
134 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
135 ; CHECK-NEXT: ret void
139 %add1 = getelementptr i8, ptr %v1, i32 1
141 %stack = call ptr @llvm.stacksave()
142 store ptr %add1, ptr %a
143 %v2 = alloca inalloca i8
144 call void @use(ptr inalloca(i8) %v2) readonly
145 call void @llvm.stackrestore(ptr %stack)
147 %add2 = getelementptr i8, ptr %v2, i32 1
149 store ptr %add1, ptr %b
150 %b2 = getelementptr ptr, ptr %b, i32 1
151 store ptr %add2, ptr %b2
155 define void @stacksave3(ptr %a, ptr %b, ptr %c) {
156 ; CHECK-LABEL: @stacksave3(
157 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
158 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
159 ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1
160 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]]
161 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
162 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
163 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
164 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
165 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
166 ; CHECK-NEXT: ret void
169 %stack = call ptr @llvm.stacksave()
172 %v2 = alloca inalloca i8
173 call void @use(ptr inalloca(i8) %v2) readnone
174 call void @llvm.stackrestore(ptr %stack)
176 %add1 = getelementptr i8, ptr %v1, i32 1
177 %add2 = getelementptr i8, ptr %v2, i32 1
179 store ptr %add1, ptr %b
180 %b2 = getelementptr ptr, ptr %b, i32 1
181 store ptr %add2, ptr %b2
185 ; Here we have an alloca which needs to stay under the stacksave, but is not
186 ; directly part of the vectorization tree. Instead, the stacksave is
187 ; encountered during dependency scanning via the memory chain.
188 define void @stacksave4(ptr %a, ptr %b, ptr %c) {
189 ; CHECK-LABEL: @stacksave4(
190 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
191 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
192 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
193 ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1
194 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
195 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
196 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
197 ; CHECK-NEXT: ret void
200 %v1 = load ptr, ptr %a
201 %a2 = getelementptr ptr, ptr %a, i32 1
202 %v2 = load ptr, ptr %a2
204 %add1 = getelementptr i8, ptr %v1, i32 1
205 %add2 = getelementptr i8, ptr %v2, i32 1
207 %stack = call ptr @llvm.stacksave()
208 %x = alloca inalloca i8
209 call void @use(ptr inalloca(i8) %x) readnone
210 call void @llvm.stackrestore(ptr %stack)
212 store ptr %add1, ptr %b
213 %b2 = getelementptr ptr, ptr %b, i32 1
214 store ptr %add2, ptr %b2
218 define void @stacksave5(ptr %a, ptr %b, ptr %c) {
219 ; CHECK-LABEL: @stacksave5(
220 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
221 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
222 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
223 ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1
224 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
225 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
226 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
227 ; CHECK-NEXT: ret void
230 %v1 = load ptr, ptr %a
231 %a2 = getelementptr ptr, ptr %a, i32 1
232 %v2 = load ptr, ptr %a2
234 %add1 = getelementptr i8, ptr %v1, i32 1
235 %add2 = getelementptr i8, ptr %v2, i32 1
237 %stack = call ptr @llvm.stacksave()
238 %x = alloca inalloca i8
239 call void @use(ptr inalloca(i8) %x) readnone
240 call void @llvm.stackrestore(ptr %stack)
242 store ptr %add1, ptr %b
243 %b2 = getelementptr ptr, ptr %b, i32 1
244 store ptr %add2, ptr %b2
248 ; Reordering the second alloca above the stackrestore while
249 ; leaving the write to it below would introduce a write-after-free
251 define void @stackrestore1(ptr %a, ptr %b, ptr %c) {
252 ; CHECK-LABEL: @stackrestore1(
253 ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
254 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
255 ; CHECK-NEXT: store i8 0, ptr [[V1]], align 1
256 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
257 ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1
258 ; CHECK-NEXT: store i8 0, ptr [[V2]], align 1
259 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
260 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
261 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
262 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
263 ; CHECK-NEXT: ret void
266 %stack = call ptr @llvm.stacksave()
269 call void @llvm.stackrestore(ptr %stack)
273 %add1 = getelementptr i8, ptr %v1, i32 1
274 %add2 = getelementptr i8, ptr %v2, i32 1
276 store ptr %add1, ptr %b
277 %b2 = getelementptr ptr, ptr %b, i32 1
278 store ptr %add2, ptr %b2
282 declare void @use(ptr inalloca(i8))
283 declare ptr @llvm.stacksave()
284 declare void @llvm.stackrestore(ptr)
286 ; The next set are reduced from previous regressions.
288 declare ptr @wibble(ptr)
289 declare void @quux(ptr inalloca(i32))
291 define void @ham() #1 {
293 ; CHECK-NEXT: [[VAR2:%.*]] = alloca i8, align 1
294 ; CHECK-NEXT: [[VAR3:%.*]] = alloca i8, align 1
295 ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1
296 ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1
297 ; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8
298 ; CHECK-NEXT: [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]])
299 ; CHECK-NEXT: [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]])
300 ; CHECK-NEXT: [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]])
301 ; CHECK-NEXT: [[VAR23:%.*]] = call ptr @llvm.stacksave.p0()
302 ; CHECK-NEXT: [[VAR24:%.*]] = alloca inalloca i32, align 4
303 ; CHECK-NEXT: call void @quux(ptr inalloca(i32) [[VAR24]])
304 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[VAR23]])
305 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
306 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
307 ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[VAR12]], align 8
308 ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
309 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
310 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
311 ; CHECK-NEXT: store <4 x ptr> [[TMP4]], ptr [[VAR36]], align 8
312 ; CHECK-NEXT: ret void
318 %var12 = alloca [12 x ptr]
319 %var15 = call ptr @wibble(ptr %var2)
320 %var16 = call ptr @wibble(ptr %var3)
321 %var17 = call ptr @wibble(ptr %var4)
322 %var23 = call ptr @llvm.stacksave()
323 %var24 = alloca inalloca i32
324 call void @quux(ptr inalloca(i32) %var24)
325 call void @llvm.stackrestore(ptr %var23)
326 store ptr %var4, ptr %var12
327 %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1
328 store ptr %var4, ptr %var33
329 %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2
330 store ptr %var4, ptr %var34
331 %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3
332 store ptr %var4, ptr %var35
333 %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
334 store ptr %var4, ptr %var36
335 %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
336 store ptr %var5, ptr %var37
337 %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
338 store ptr %var5, ptr %var38
339 %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
340 store ptr %var5, ptr %var39
344 define void @spam() #1 {
345 ; CHECK-LABEL: @spam(
346 ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1
347 ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1
348 ; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8
349 ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
350 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
351 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
352 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
353 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[VAR36]], align 8
354 ; CHECK-NEXT: ret void
358 %var12 = alloca [12 x ptr]
359 %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
360 store ptr %var4, ptr %var36
361 %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
362 store ptr %var5, ptr %var37
363 %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
364 store ptr %var5, ptr %var38
365 %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
366 store ptr %var5, ptr %var39
370 attributes #0 = { nofree nosync nounwind willreturn }
371 attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }