1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer,verify -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s -check-prefix=ENABLED
4 ; Without supernode operand reordering, this does not get fully vectorized.
5 ; S[0] = (A[0] + B[0]) + C[0]
6 ; S[1] = (B[1] + C[1]) + A[1]
7 define void @test_supernode_add(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
8 ; ENABLED-LABEL: @test_supernode_add(
10 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
11 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
12 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
13 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
14 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
15 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
16 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
17 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
18 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
19 ; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
20 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
21 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
22 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
23 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
24 ; ENABLED-NEXT: ret void
27 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
28 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
29 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
30 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
32 %A0 = load double, ptr %Aarray, align 8
33 %A1 = load double, ptr %idxA1, align 8
35 %B0 = load double, ptr %Barray, align 8
36 %B1 = load double, ptr %idxB1, align 8
38 %C0 = load double, ptr %Carray, align 8
39 %C1 = load double, ptr %idxC1, align 8
41 %addA0B0 = fadd fast double %A0, %B0
42 %addB1C1 = fadd fast double %B1, %C1
43 %add0 = fadd fast double %addA0B0, %C0
44 %add1 = fadd fast double %addB1C1, %A1
45 store double %add0, ptr %Sarray, align 8
46 store double %add1, ptr %idxS1, align 8
51 ; Without supernode operand reordering, this does not get fully vectorized.
52 ; S[0] = (A[0] - B[0]) + C[0]
53 ; S[1] = (C[1] - B[1]) + A[1]
54 define void @test_supernode_addsub(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
55 ; ENABLED-LABEL: @test_supernode_addsub(
56 ; ENABLED-NEXT: entry:
57 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
58 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
59 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
60 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
61 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
62 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
63 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
64 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
65 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
66 ; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
67 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
68 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
69 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
70 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
71 ; ENABLED-NEXT: ret void
74 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
75 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
76 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
77 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
79 %A0 = load double, ptr %Aarray, align 8
80 %A1 = load double, ptr %idxA1, align 8
82 %B0 = load double, ptr %Barray, align 8
83 %B1 = load double, ptr %idxB1, align 8
85 %C0 = load double, ptr %Carray, align 8
86 %C1 = load double, ptr %idxC1, align 8
88 %subA0B0 = fsub fast double %A0, %B0
89 %subC1B1 = fsub fast double %C1, %B1
90 %add0 = fadd fast double %subA0B0, %C0
91 %add1 = fadd fast double %subC1B1, %A1
92 store double %add0, ptr %Sarray, align 8
93 store double %add1, ptr %idxS1, align 8
97 ; Without supernode operand reordering, this does not get fully vectorized.
98 ; This checks that the super-node works with alternate sequences.
100 ; S[0] = (A[0] - B[0]) - C[0]
101 ; S[1] = (B[1] + C[1]) + A[1]
102 define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
103 ; ENABLED-LABEL: @test_supernode_addsub_alt(
104 ; ENABLED-NEXT: entry:
105 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
106 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
107 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
108 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, ptr [[SARRAY:%.*]], i64 1
109 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
110 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
111 ; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
112 ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
113 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
114 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
115 ; ENABLED-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
116 ; ENABLED-NEXT: [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
117 ; ENABLED-NEXT: [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
118 ; ENABLED-NEXT: [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
119 ; ENABLED-NEXT: store double [[SUB0]], ptr [[SARRAY]], align 8
120 ; ENABLED-NEXT: store double [[ADD1]], ptr [[IDXS1]], align 8
121 ; ENABLED-NEXT: ret void
124 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
125 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
126 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
127 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
129 %A0 = load double, ptr %Aarray, align 8
130 %A1 = load double, ptr %idxA1, align 8
132 %B0 = load double, ptr %Barray, align 8
133 %B1 = load double, ptr %idxB1, align 8
135 %C0 = load double, ptr %Carray, align 8
136 %C1 = load double, ptr %idxC1, align 8
138 %subA0B0 = fsub fast double %A0, %B0
139 %addB1C1 = fadd fast double %B1, %C1
140 %sub0 = fsub fast double %subA0B0, %C0
141 %add1 = fadd fast double %addB1C1, %A1
142 store double %sub0, ptr %Sarray, align 8
143 store double %add1, ptr %idxS1, align 8
147 ; This checks that vectorizeTree() works correctly with the supernode
148 ; and does not generate uses before defs.
149 ; If all of the operands of the supernode are vectorizable, then the scheduler
150 ; will fix their position in the program. If not, then the scheduler may not
151 ; touch them, leading to uses before defs.
165 ; A0 C A1 B1 A0 C A1 D A0:1 C,D
166 ; \ / \ / Reorder \ / \ / Bundles \ /
167 ; t1 + B0 t3 + D -------> t1 + B0 t3 + B1 ------> t1:3 + B0:1
169 ; t2 + t4 + t2 + t4 + t2:4 +
171 ; After reordering, 'D' conceptually becomes an operand of t3:
173 ; But D is defined *after* its use.
175 define void @supernode_scheduling(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Darray, ptr %Sarray) {
176 ; ENABLED-LABEL: @supernode_scheduling(
177 ; ENABLED-NEXT: entry:
178 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
179 ; ENABLED-NEXT: [[C:%.*]] = load double, ptr [[CARRAY:%.*]], align 8
180 ; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
181 ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
182 ; ENABLED-NEXT: [[D:%.*]] = load double, ptr [[DARRAY:%.*]], align 8
183 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
184 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
185 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B1]], i32 1
186 ; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
187 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
188 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
189 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
190 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
191 ; ENABLED-NEXT: ret void
194 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
195 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
196 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
199 %A0 = load double, ptr %Aarray, align 8
200 %C = load double, ptr %Carray, align 8
201 %t1 = fadd fast double %A0, %C
202 %B0 = load double, ptr %Barray, align 8
203 %t2 = fadd fast double %t1, %B0
204 %A1 = load double, ptr %idxA1, align 8
205 %B1 = load double, ptr %idxB1, align 8
206 %t3 = fadd fast double %A1, %B1
207 %D = load double, ptr %Darray, align 8
208 %t4 = fadd fast double %t3, %D
210 store double %t2, ptr %Sarray, align 8
211 store double %t4, ptr %idxS1, align 8
216 ; The SLP scheduler has trouble moving instructions across blocks.
217 ; Even though we can build a SuperNode for this example, we should not because the scheduler
218 ; cannot handle the cross-block instruction motion that is required once the operands of the
219 ; SuperNode are reordered.
232 define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sarray) {
233 ; ENABLED-LABEL: @supernode_scheduling_cross_block(
234 ; ENABLED-NEXT: entry:
235 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
236 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
237 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
238 ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
239 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
240 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
241 ; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
242 ; ENABLED-NEXT: br label [[BB:%.*]]
244 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
245 ; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
246 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
247 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
248 ; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
249 ; ENABLED-NEXT: store <2 x double> [[TMP5]], ptr [[SARRAY:%.*]], align 8
250 ; ENABLED-NEXT: ret void
253 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
254 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
255 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
257 %A0 = load double, ptr %Aarray, align 8
258 %B1 = load double, ptr %idxB1, align 8
259 %Tmp0 = fadd fast double %A0, 2.0
260 %Tmp1 = fadd fast double %B1, 2.0
264 %A1 = load double, ptr %idxA1, align 8
265 %B0 = load double, ptr %Barray, align 8
267 %Sum0 = fadd fast double %Tmp0, %B0
268 %Sum1 = fadd fast double %Tmp1, %A1
270 store double %Sum0, ptr %Sarray, align 8
271 store double %Sum1, ptr %idxS1, align 8