1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
4 ; This file tests the look-ahead operand reordering heuristic.
7 ; This checks that operand reordering will reorder the operands of the adds
8 ; by taking into consideration the instructions beyond the immediate
11 ; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
19 define void @lookahead_basic(double* %array) {
20 ; CHECK-LABEL: @lookahead_basic(
22 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
23 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
24 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
25 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
26 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
27 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
28 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
29 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
30 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
31 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
32 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
33 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
34 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
35 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
36 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
37 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
38 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
39 ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
40 ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP8]], [[TMP9]]
41 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
42 ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
43 ; CHECK-NEXT: ret void
46 %idx0 = getelementptr inbounds double, double* %array, i64 0
47 %idx1 = getelementptr inbounds double, double* %array, i64 1
48 %idx2 = getelementptr inbounds double, double* %array, i64 2
49 %idx3 = getelementptr inbounds double, double* %array, i64 3
50 %idx4 = getelementptr inbounds double, double* %array, i64 4
51 %idx5 = getelementptr inbounds double, double* %array, i64 5
52 %idx6 = getelementptr inbounds double, double* %array, i64 6
53 %idx7 = getelementptr inbounds double, double* %array, i64 7
55 %A_0 = load double, double *%idx0, align 8
56 %A_1 = load double, double *%idx1, align 8
57 %B_0 = load double, double *%idx2, align 8
58 %B_1 = load double, double *%idx3, align 8
59 %C_0 = load double, double *%idx4, align 8
60 %C_1 = load double, double *%idx5, align 8
61 %D_0 = load double, double *%idx6, align 8
62 %D_1 = load double, double *%idx7, align 8
64 %subAB_0 = fsub fast double %A_0, %B_0
65 %subCD_0 = fsub fast double %C_0, %D_0
67 %subAB_1 = fsub fast double %A_1, %B_1
68 %subCD_1 = fsub fast double %C_1, %D_1
70 %addABCD_0 = fadd fast double %subAB_0, %subCD_0
71 %addCDAB_1 = fadd fast double %subCD_1, %subAB_1
73 store double %addABCD_0, double *%idx0, align 8
74 store double %addCDAB_1, double *%idx1, align 8
79 ; Check whether the look-ahead operand reordering heuristic will avoid
80 ; bundling the alt opcodes. The vectorized code should have no shuffles.
82 ; A[0] B[0] A[0] B[0] A[1] A[1] A[1] B[1]
90 define void @lookahead_alt1(double* %array) {
91 ; CHECK-LABEL: @lookahead_alt1(
93 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
94 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
95 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
96 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
97 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
98 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
99 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
100 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
101 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
102 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
103 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
104 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
105 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
106 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
107 ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]]
108 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
109 ; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
110 ; CHECK-NEXT: ret void
113 %idx0 = getelementptr inbounds double, double* %array, i64 0
114 %idx1 = getelementptr inbounds double, double* %array, i64 1
115 %idx2 = getelementptr inbounds double, double* %array, i64 2
116 %idx3 = getelementptr inbounds double, double* %array, i64 3
117 %idx4 = getelementptr inbounds double, double* %array, i64 4
118 %idx5 = getelementptr inbounds double, double* %array, i64 5
119 %idx6 = getelementptr inbounds double, double* %array, i64 6
120 %idx7 = getelementptr inbounds double, double* %array, i64 7
122 %A_0 = load double, double *%idx0, align 8
123 %A_1 = load double, double *%idx1, align 8
124 %B_0 = load double, double *%idx2, align 8
125 %B_1 = load double, double *%idx3, align 8
127 %addAB_0_L = fadd fast double %A_0, %B_0
128 %subAB_0_R = fsub fast double %A_0, %B_0
130 %subAB_1_L = fsub fast double %A_1, %B_1
131 %addAB_1_R = fadd fast double %A_1, %B_1
133 %addABCD_0 = fadd fast double %addAB_0_L, %subAB_0_R
134 %addCDAB_1 = fadd fast double %subAB_1_L, %addAB_1_R
136 store double %addABCD_0, double *%idx0, align 8
137 store double %addCDAB_1, double *%idx1, align 8
142 ; This code should get vectorized all the way to the loads with shuffles for
145 ; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
153 define void @lookahead_alt2(double* %array) {
154 ; CHECK-LABEL: @lookahead_alt2(
156 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
157 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
158 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
159 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
160 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
161 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
162 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
163 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
164 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
165 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
166 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
167 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
168 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
169 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
170 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
171 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
172 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
173 ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
174 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
175 ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
176 ; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
177 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
178 ; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP10]]
179 ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
180 ; CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP15]], align 8
181 ; CHECK-NEXT: ret void
184 %idx0 = getelementptr inbounds double, double* %array, i64 0
185 %idx1 = getelementptr inbounds double, double* %array, i64 1
186 %idx2 = getelementptr inbounds double, double* %array, i64 2
187 %idx3 = getelementptr inbounds double, double* %array, i64 3
188 %idx4 = getelementptr inbounds double, double* %array, i64 4
189 %idx5 = getelementptr inbounds double, double* %array, i64 5
190 %idx6 = getelementptr inbounds double, double* %array, i64 6
191 %idx7 = getelementptr inbounds double, double* %array, i64 7
193 %A_0 = load double, double *%idx0, align 8
194 %A_1 = load double, double *%idx1, align 8
195 %B_0 = load double, double *%idx2, align 8
196 %B_1 = load double, double *%idx3, align 8
197 %C_0 = load double, double *%idx4, align 8
198 %C_1 = load double, double *%idx5, align 8
199 %D_0 = load double, double *%idx6, align 8
200 %D_1 = load double, double *%idx7, align 8
202 %addAB_0 = fadd fast double %A_0, %B_0
203 %subCD_0 = fsub fast double %C_0, %D_0
205 %addCD_1 = fadd fast double %C_1, %D_1
206 %subAB_1 = fsub fast double %A_1, %B_1
208 %addABCD_0 = fadd fast double %addAB_0, %subCD_0
209 %addCDAB_1 = fadd fast double %addCD_1, %subAB_1
211 store double %addABCD_0, double *%idx0, align 8
212 store double %addCDAB_1, double *%idx1, align 8
218 ; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1]
226 ; SLP should reorder the operands of the RHS add taking into consideration the cost of external uses.
227 ; It is more profitable to reorder the operands of the RHS add, because A[1] has an external use.
229 define void @lookahead_external_uses(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2) {
230 ; CHECK-LABEL: @lookahead_external_uses(
232 ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
233 ; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
234 ; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
235 ; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
236 ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
237 ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
238 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
239 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
240 ; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
241 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
242 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
243 ; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
244 ; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
245 ; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
246 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
247 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
248 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
249 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[A1]], i32 1
250 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
251 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[B2]], i32 1
252 ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
253 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
254 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
255 ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP1]]
256 ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
257 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
258 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
259 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
260 ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
261 ; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
262 ; CHECK-NEXT: ret void
265 %IdxA0 = getelementptr inbounds double, double* %A, i64 0
266 %IdxB0 = getelementptr inbounds double, double* %B, i64 0
267 %IdxC0 = getelementptr inbounds double, double* %C, i64 0
268 %IdxD0 = getelementptr inbounds double, double* %D, i64 0
270 %IdxA1 = getelementptr inbounds double, double* %A, i64 1
271 %IdxB2 = getelementptr inbounds double, double* %B, i64 2
272 %IdxA2 = getelementptr inbounds double, double* %A, i64 2
273 %IdxB1 = getelementptr inbounds double, double* %B, i64 1
275 %A0 = load double, double *%IdxA0, align 8
276 %B0 = load double, double *%IdxB0, align 8
277 %C0 = load double, double *%IdxC0, align 8
278 %D0 = load double, double *%IdxD0, align 8
280 %A1 = load double, double *%IdxA1, align 8
281 %B2 = load double, double *%IdxB2, align 8
282 %A2 = load double, double *%IdxA2, align 8
283 %B1 = load double, double *%IdxB1, align 8
285 %subA0B0 = fsub fast double %A0, %B0
286 %subC0D0 = fsub fast double %C0, %D0
288 %subA1B2 = fsub fast double %A1, %B2
289 %subA2B1 = fsub fast double %A2, %B1
291 %add0 = fadd fast double %subA0B0, %subC0D0
292 %add1 = fadd fast double %subA1B2, %subA2B1
294 %IdxS0 = getelementptr inbounds double, double* %S, i64 0
295 %IdxS1 = getelementptr inbounds double, double* %S, i64 1
297 store double %add0, double *%IdxS0, align 8
298 store double %add1, double *%IdxS1, align 8
301 store double %A1, double *%Ext1, align 8
305 ; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1]
306 ; \ / \ / / \ / \ / \
307 ; - - U1,U2,U3 - - U4,U5
314 ; If we limit the users budget for the look-ahead heuristic to 2, then the
315 ; look-ahead heuristic has no way of choosing B[1] (with 2 external users)
316 ; over A[1] (with 3 external users).
317 ; The result is that the operands are of the Add not reordered and the loads
318 ; from A get vectorized instead of the loads from B.
320 define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2, double *%Ext3, double *%Ext4, double *%Ext5) {
321 ; CHECK-LABEL: @lookahead_limit_users_budget(
323 ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
324 ; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
325 ; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
326 ; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
327 ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
328 ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
329 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
330 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
331 ; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
332 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
333 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
334 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
335 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
336 ; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
337 ; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
338 ; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
339 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
340 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
341 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
342 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
343 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1
344 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
345 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
346 ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
347 ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
348 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
349 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
350 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
351 ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
352 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
353 ; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8
354 ; CHECK-NEXT: store double [[TMP12]], double* [[EXT2:%.*]], align 8
355 ; CHECK-NEXT: store double [[TMP12]], double* [[EXT3:%.*]], align 8
356 ; CHECK-NEXT: store double [[B1]], double* [[EXT4:%.*]], align 8
357 ; CHECK-NEXT: store double [[B1]], double* [[EXT5:%.*]], align 8
358 ; CHECK-NEXT: ret void
361 %IdxA0 = getelementptr inbounds double, double* %A, i64 0
362 %IdxB0 = getelementptr inbounds double, double* %B, i64 0
363 %IdxC0 = getelementptr inbounds double, double* %C, i64 0
364 %IdxD0 = getelementptr inbounds double, double* %D, i64 0
366 %IdxA1 = getelementptr inbounds double, double* %A, i64 1
367 %IdxB2 = getelementptr inbounds double, double* %B, i64 2
368 %IdxA2 = getelementptr inbounds double, double* %A, i64 2
369 %IdxB1 = getelementptr inbounds double, double* %B, i64 1
371 %A0 = load double, double *%IdxA0, align 8
372 %B0 = load double, double *%IdxB0, align 8
373 %C0 = load double, double *%IdxC0, align 8
374 %D0 = load double, double *%IdxD0, align 8
376 %A1 = load double, double *%IdxA1, align 8
377 %B2 = load double, double *%IdxB2, align 8
378 %A2 = load double, double *%IdxA2, align 8
379 %B1 = load double, double *%IdxB1, align 8
381 %subA0B0 = fsub fast double %A0, %B0
382 %subC0D0 = fsub fast double %C0, %D0
384 %subA1B2 = fsub fast double %A1, %B2
385 %subA2B1 = fsub fast double %A2, %B1
387 %add0 = fadd fast double %subA0B0, %subC0D0
388 %add1 = fadd fast double %subA1B2, %subA2B1
390 %IdxS0 = getelementptr inbounds double, double* %S, i64 0
391 %IdxS1 = getelementptr inbounds double, double* %S, i64 1
393 store double %add0, double *%IdxS0, align 8
394 store double %add1, double *%IdxS1, align 8
396 ; External uses of A1
397 store double %A1, double *%Ext1, align 8
398 store double %A1, double *%Ext2, align 8
399 store double %A1, double *%Ext3, align 8
401 ; External uses of B1
402 store double %B1, double *%Ext4, align 8
403 store double %B1, double *%Ext5, align 8
408 ; This checks that the lookahead code does not crash when instructions with the same opcodes have different numbers of operands (in this case the calls).
411 declare double @_ZN1i2ayEv(%Class*)
412 declare double @_ZN1i2axEv()
414 define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) {
415 ; CHECK-LABEL: @lookahead_crash(
416 ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
417 ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
418 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
419 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
420 ; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]])
421 ; CHECK-NEXT: [[C1:%.*]] = call double @_ZN1i2axEv()
422 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
423 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1
424 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
425 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
426 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
427 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
428 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
429 ; CHECK-NEXT: ret void
431 %IdxA0 = getelementptr inbounds double, double* %A, i64 0
432 %IdxA1 = getelementptr inbounds double, double* %A, i64 1
434 %A0 = load double, double *%IdxA0, align 8
435 %A1 = load double, double *%IdxA1, align 8
437 %C0 = call double @_ZN1i2ayEv(%Class *%Arg0)
438 %C1 = call double @_ZN1i2axEv()
440 %add0 = fadd fast double %A0, %C0
441 %add1 = fadd fast double %A1, %C1
443 %IdxS0 = getelementptr inbounds double, double* %S, i64 0
444 %IdxS1 = getelementptr inbounds double, double* %S, i64 1
445 store double %add0, double *%IdxS0, align 8
446 store double %add1, double *%IdxS1, align 8