1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -verify | FileCheck %s -check-prefix=ENABLED
4 ; Without supernode operand reordering, this does not get fully vectorized.
5 ; S[0] = (A[0] + B[0]) + C[0]
6 ; S[1] = (B[1] + C[1]) + A[1]
7 define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
8 ; ENABLED-LABEL: @test_supernode_add(
10 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
11 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
12 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
13 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
14 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
15 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
16 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
17 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
18 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
19 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
20 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
21 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
22 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
23 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
24 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
25 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
26 ; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]]
27 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
28 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
29 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
30 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
31 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
32 ; ENABLED-NEXT: ret void
35 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
36 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
37 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
38 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
39 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
40 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
41 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
42 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
44 %A0 = load double, double *%idxA0, align 8
45 %A1 = load double, double *%idxA1, align 8
47 %B0 = load double, double *%idxB0, align 8
48 %B1 = load double, double *%idxB1, align 8
50 %C0 = load double, double *%idxC0, align 8
51 %C1 = load double, double *%idxC1, align 8
53 %addA0B0 = fadd fast double %A0, %B0
54 %addB1C1 = fadd fast double %B1, %C1
55 %add0 = fadd fast double %addA0B0, %C0
56 %add1 = fadd fast double %addB1C1, %A1
57 store double %add0, double *%idxS0, align 8
58 store double %add1, double *%idxS1, align 8
63 ; Without supernode operand reordering, this does not get fully vectorized.
64 ; S[0] = (A[0] - B[0]) + C[0]
65 ; S[1] = (C[1] - B[1]) + A[1]
66 define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
67 ; ENABLED-LABEL: @test_supernode_addsub(
68 ; ENABLED-NEXT: entry:
69 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
70 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
71 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
72 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
73 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
74 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
75 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
76 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
77 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
78 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
79 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
80 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
81 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
82 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
83 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
84 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
85 ; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
86 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
87 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
88 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
89 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
90 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
91 ; ENABLED-NEXT: ret void
94 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
95 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
96 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
97 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
98 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
99 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
100 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
101 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
103 %A0 = load double, double *%idxA0, align 8
104 %A1 = load double, double *%idxA1, align 8
106 %B0 = load double, double *%idxB0, align 8
107 %B1 = load double, double *%idxB1, align 8
109 %C0 = load double, double *%idxC0, align 8
110 %C1 = load double, double *%idxC1, align 8
112 %subA0B0 = fsub fast double %A0, %B0
113 %subC1B1 = fsub fast double %C1, %B1
114 %add0 = fadd fast double %subA0B0, %C0
115 %add1 = fadd fast double %subC1B1, %A1
116 store double %add0, double *%idxS0, align 8
117 store double %add1, double *%idxS1, align 8
121 ; Without supernode operand reordering, this does not get fully vectorized.
122 ; This checks that the super-node works with alternate sequences.
124 ; S[0] = (A[0] - B[0]) - C[0]
125 ; S[1] = (B[1] + C[1]) + A[1]
126 define void @test_supernode_addsub_alt(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
127 ; ENABLED-LABEL: @test_supernode_addsub_alt(
128 ; ENABLED-NEXT: entry:
129 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
130 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
131 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
132 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
133 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
134 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
135 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
136 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
137 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
138 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
139 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
140 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
141 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
142 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
143 ; ENABLED-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
144 ; ENABLED-NEXT: [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
145 ; ENABLED-NEXT: [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
146 ; ENABLED-NEXT: [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
147 ; ENABLED-NEXT: store double [[SUB0]], double* [[IDXS0]], align 8
148 ; ENABLED-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8
149 ; ENABLED-NEXT: ret void
152 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
153 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
154 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
155 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
156 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
157 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
158 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
159 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
161 %A0 = load double, double *%idxA0, align 8
162 %A1 = load double, double *%idxA1, align 8
164 %B0 = load double, double *%idxB0, align 8
165 %B1 = load double, double *%idxB1, align 8
167 %C0 = load double, double *%idxC0, align 8
168 %C1 = load double, double *%idxC1, align 8
170 %subA0B0 = fsub fast double %A0, %B0
171 %addB1C1 = fadd fast double %B1, %C1
172 %sub0 = fsub fast double %subA0B0, %C0
173 %add1 = fadd fast double %addB1C1, %A1
174 store double %sub0, double *%idxS0, align 8
175 store double %add1, double *%idxS1, align 8
179 ; This checks that vectorizeTree() works correctly with the supernode
180 ; and does not generate uses before defs.
181 ; If all of the operands of the supernode are vectorizable, then the scheduler
182 ; will fix their position in the program. If not, then the scheduler may not
183 ; touch them, leading to uses before defs.
197 ; A0 C A1 B1 A0 C A1 D A0:1 C,D
198 ; \ / \ / Reorder \ / \ / Bundles \ /
199 ; t1 + B0 t3 + D -------> t1 + B0 t3 + B1 ------> t1:3 + B0:1
201 ; t2 + t4 + t2 + t4 + t2:4 +
203 ; After reordering, 'D' conceptually becomes an operand of t3:
205 ; But D is defined *after* its use.
207 define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Carray, double *%Darray, double *%Sarray) {
208 ; ENABLED-LABEL: @supernode_scheduling(
209 ; ENABLED-NEXT: entry:
210 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
211 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
212 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
213 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
214 ; ENABLED-NEXT: [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
215 ; ENABLED-NEXT: [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0
216 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
217 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
218 ; ENABLED-NEXT: [[C:%.*]] = load double, double* [[IDXC]], align 8
219 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
220 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
221 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
222 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
223 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0
224 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1
225 ; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
226 ; ENABLED-NEXT: [[D:%.*]] = load double, double* [[IDXD]], align 8
227 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
228 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1
229 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
230 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
231 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
232 ; ENABLED-NEXT: ret void
235 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
236 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
237 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
238 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
239 %idxC = getelementptr inbounds double, double* %Carray, i64 0
240 %idxD = getelementptr inbounds double, double* %Darray, i64 0
241 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
242 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
245 %A0 = load double, double *%idxA0, align 8
246 %C = load double, double *%idxC, align 8
247 %t1 = fadd fast double %A0, %C
248 %B0 = load double, double *%idxB0, align 8
249 %t2 = fadd fast double %t1, %B0
250 %A1 = load double, double *%idxA1, align 8
251 %B1 = load double, double *%idxB1, align 8
252 %t3 = fadd fast double %A1, %B1
253 %D = load double, double *%idxD, align 8
254 %t4 = fadd fast double %t3, %D
256 store double %t2, double *%idxS0, align 8
257 store double %t4, double *%idxS1, align 8
262 ; The SLP scheduler has trouble moving instructions across blocks.
263 ; Even though we can build a SuperNode for this example, we should not because the scheduler
264 ; cannot handle the cross-block instruction motion that is required once the operands of the
265 ; SuperNode are reordered.
278 define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray, double *%Sarray) {
279 ; ENABLED-LABEL: @supernode_scheduling_cross_block(
280 ; ENABLED-NEXT: entry:
281 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
282 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
283 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
284 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
285 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
286 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
287 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
288 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
289 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
290 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
291 ; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
292 ; ENABLED-NEXT: br label [[BB:%.*]]
294 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
295 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
296 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
297 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
298 ; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
299 ; ENABLED-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
300 ; ENABLED-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
301 ; ENABLED-NEXT: ret void
304 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
305 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
306 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
307 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
308 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
309 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
311 %A0 = load double, double *%idxA0, align 8
312 %B1 = load double, double *%idxB1, align 8
313 %Tmp0 = fadd fast double %A0, 2.0
314 %Tmp1 = fadd fast double %B1, 2.0
318 %A1 = load double, double *%idxA1, align 8
319 %B0 = load double, double *%idxB0, align 8
321 %Sum0 = fadd fast double %Tmp0, %B0
322 %Sum1 = fadd fast double %Tmp1, %A1
324 store double %Sum0, double *%idxS0, align 8
325 store double %Sum1, double *%idxS1, align 8