1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer,verify -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s -check-prefix=ENABLED
4 ; Without supernode operand reordering, this does not get fully vectorized.
5 ; S[0] = (A[0] + B[0]) + C[0]
6 ; S[1] = (B[1] + C[1]) + A[1]
7 define void @test_supernode_add(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
8 ; ENABLED-LABEL: @test_supernode_add(
10 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
11 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
12 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
13 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
14 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
15 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
16 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
17 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
18 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
19 ; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
20 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
21 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
22 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
23 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
24 ; ENABLED-NEXT: ret void
27 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
28 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
29 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
30 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
32 %A0 = load double, ptr %Aarray, align 8
33 %A1 = load double, ptr %idxA1, align 8
35 %B0 = load double, ptr %Barray, align 8
36 %B1 = load double, ptr %idxB1, align 8
38 %C0 = load double, ptr %Carray, align 8
39 %C1 = load double, ptr %idxC1, align 8
41 %addA0B0 = fadd fast double %A0, %B0
42 %addB1C1 = fadd fast double %B1, %C1
43 %add0 = fadd fast double %addA0B0, %C0
44 %add1 = fadd fast double %addB1C1, %A1
45 store double %add0, ptr %Sarray, align 8
46 store double %add1, ptr %idxS1, align 8
51 ; Without supernode operand reordering, this does not get fully vectorized.
52 ; S[0] = (A[0] - B[0]) + C[0]
53 ; S[1] = (C[1] - B[1]) + A[1]
54 define void @test_supernode_addsub(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
55 ; ENABLED-LABEL: @test_supernode_addsub(
56 ; ENABLED-NEXT: entry:
57 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
58 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
59 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
60 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
61 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
62 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
63 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
64 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
65 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
66 ; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
67 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
68 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
69 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
70 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
71 ; ENABLED-NEXT: ret void
74 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
75 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
76 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
77 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
79 %A0 = load double, ptr %Aarray, align 8
80 %A1 = load double, ptr %idxA1, align 8
82 %B0 = load double, ptr %Barray, align 8
83 %B1 = load double, ptr %idxB1, align 8
85 %C0 = load double, ptr %Carray, align 8
86 %C1 = load double, ptr %idxC1, align 8
88 %subA0B0 = fsub fast double %A0, %B0
89 %subC1B1 = fsub fast double %C1, %B1
90 %add0 = fadd fast double %subA0B0, %C0
91 %add1 = fadd fast double %subC1B1, %A1
92 store double %add0, ptr %Sarray, align 8
93 store double %add1, ptr %idxS1, align 8
97 ; Without supernode operand reordering, this does not get fully vectorized.
98 ; This checks that the super-node works with alternate sequences.
100 ; S[0] = (A[0] - B[0]) - C[0]
101 ; S[1] = (B[1] + C[1]) + A[1]
102 define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
103 ; ENABLED-LABEL: @test_supernode_addsub_alt(
104 ; ENABLED-NEXT: entry:
105 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
106 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
107 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
108 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
109 ; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
110 ; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
111 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
112 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
113 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
114 ; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
115 ; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP0]]
116 ; ENABLED-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <2 x i32> <i32 0, i32 3>
117 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
118 ; ENABLED-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
119 ; ENABLED-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
120 ; ENABLED-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
121 ; ENABLED-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
122 ; ENABLED-NEXT: store <2 x double> [[TMP10]], ptr [[SARRAY:%.*]], align 8
123 ; ENABLED-NEXT: ret void
126 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
127 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
128 %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
129 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
131 %A0 = load double, ptr %Aarray, align 8
132 %A1 = load double, ptr %idxA1, align 8
134 %B0 = load double, ptr %Barray, align 8
135 %B1 = load double, ptr %idxB1, align 8
137 %C0 = load double, ptr %Carray, align 8
138 %C1 = load double, ptr %idxC1, align 8
140 %subA0B0 = fsub fast double %A0, %B0
141 %addB1C1 = fadd fast double %B1, %C1
142 %sub0 = fsub fast double %subA0B0, %C0
143 %add1 = fadd fast double %addB1C1, %A1
144 store double %sub0, ptr %Sarray, align 8
145 store double %add1, ptr %idxS1, align 8
149 ; This checks that vectorizeTree() works correctly with the supernode
150 ; and does not generate uses before defs.
151 ; If all of the operands of the supernode are vectorizable, then the scheduler
152 ; will fix their position in the program. If not, then the scheduler may not
153 ; touch them, leading to uses before defs.
167 ; A0 C A1 B1 A0 C A1 D A0:1 C,D
168 ; \ / \ / Reorder \ / \ / Bundles \ /
169 ; t1 + B0 t3 + D -------> t1 + B0 t3 + B1 ------> t1:3 + B0:1
171 ; t2 + t4 + t2 + t4 + t2:4 +
173 ; After reordering, 'D' conceptually becomes an operand of t3:
175 ; But D is defined *after* its use.
177 define void @supernode_scheduling(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Darray, ptr %Sarray) {
178 ; ENABLED-LABEL: @supernode_scheduling(
179 ; ENABLED-NEXT: entry:
180 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
181 ; ENABLED-NEXT: [[C:%.*]] = load double, ptr [[CARRAY:%.*]], align 8
182 ; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
183 ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
184 ; ENABLED-NEXT: [[D:%.*]] = load double, ptr [[DARRAY:%.*]], align 8
185 ; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
186 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
187 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B1]], i32 1
188 ; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
189 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
190 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
191 ; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
192 ; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
193 ; ENABLED-NEXT: ret void
196 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
197 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
198 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
201 %A0 = load double, ptr %Aarray, align 8
202 %C = load double, ptr %Carray, align 8
203 %t1 = fadd fast double %A0, %C
204 %B0 = load double, ptr %Barray, align 8
205 %t2 = fadd fast double %t1, %B0
206 %A1 = load double, ptr %idxA1, align 8
207 %B1 = load double, ptr %idxB1, align 8
208 %t3 = fadd fast double %A1, %B1
209 %D = load double, ptr %Darray, align 8
210 %t4 = fadd fast double %t3, %D
212 store double %t2, ptr %Sarray, align 8
213 store double %t4, ptr %idxS1, align 8
218 ; The SLP scheduler has trouble moving instructions across blocks.
219 ; Even though we can build a SuperNode for this example, we should not because the scheduler
220 ; cannot handle the cross-block instruction motion that is required once the operands of the
221 ; SuperNode are reordered.
234 define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sarray) {
235 ; ENABLED-LABEL: @supernode_scheduling_cross_block(
236 ; ENABLED-NEXT: entry:
237 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
238 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
239 ; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
240 ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
241 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
242 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
243 ; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
244 ; ENABLED-NEXT: br label [[BB:%.*]]
246 ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
247 ; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
248 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
249 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
250 ; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
251 ; ENABLED-NEXT: store <2 x double> [[TMP5]], ptr [[SARRAY:%.*]], align 8
252 ; ENABLED-NEXT: ret void
255 %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
256 %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
257 %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
259 %A0 = load double, ptr %Aarray, align 8
260 %B1 = load double, ptr %idxB1, align 8
261 %Tmp0 = fadd fast double %A0, 2.0
262 %Tmp1 = fadd fast double %B1, 2.0
266 %A1 = load double, ptr %idxA1, align 8
267 %B0 = load double, ptr %Barray, align 8
269 %Sum0 = fadd fast double %Tmp0, %B0
270 %Sum1 = fadd fast double %Tmp1, %A1
272 store double %Sum0, ptr %Sarray, align 8
273 store double %Sum1, ptr %idxS1, align 8