1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
5 @p = external local_unnamed_addr global [257 x i32], align 16
6 @q = external local_unnamed_addr global [257 x i32], align 16
8 ; Test case for PR43398.
10 define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
11 ; CHECK-LABEL: @can_sink_after_store(
13 ; CHECK-NEXT: br label [[PREHEADER:%.*]]
15 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
16 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
17 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
19 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
20 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
21 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
22 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
24 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
26 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
27 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
28 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
29 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
30 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
31 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
32 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
33 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
34 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
35 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
36 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
37 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
38 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
39 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40 ; CHECK: middle.block:
41 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
42 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
44 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
45 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
46 ; CHECK-NEXT: br label [[FOR:%.*]]
48 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
49 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
50 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SCALAR_RECUR]], [[X]]
51 ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
52 ; CHECK-NEXT: [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
53 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
54 ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
55 ; CHECK-NEXT: store i32 [[ADD_2]], ptr [[IDX_2]], align 4
56 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
57 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
58 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP3:![0-9]+]]
60 ; CHECK-NEXT: ret void
67 %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
68 %.pre = load i32, ptr %idx.phi.trans, align 4
72 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
73 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
74 %add.1 = add i32 %pre.phi, %x
75 %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
76 %pre.next = load i32, ptr %idx.1, align 4
77 %add.2 = add i32 %add.1, %pre.next
78 %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
79 store i32 %add.2, ptr %idx.2, align 4
80 %iv.next = add nuw nsw i64 %iv, 1
81 %exitcond = icmp eq i64 %iv.next, 2000
82 br i1 %exitcond, label %exit, label %for
88 ; We can sink potential trapping instructions, as this will only delay the trap
89 ; and not introduce traps on additional paths.
90 define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
91 ; CHECK-LABEL: @sink_sdiv(
93 ; CHECK-NEXT: br label [[PREHEADER:%.*]]
95 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
96 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
97 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
100 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
101 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
102 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
103 ; CHECK: vector.body:
104 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
105 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
106 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
107 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
108 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
109 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
110 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
111 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
112 ; CHECK-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
113 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
114 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
115 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
116 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
117 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
118 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
119 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
120 ; CHECK: middle.block:
121 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
122 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
124 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
125 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
126 ; CHECK-NEXT: br label [[FOR:%.*]]
128 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
129 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
130 ; CHECK-NEXT: [[DIV_1:%.*]] = sdiv i32 [[SCALAR_RECUR]], [[X]]
131 ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
132 ; CHECK-NEXT: [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
133 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[DIV_1]], [[PRE_NEXT]]
134 ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
135 ; CHECK-NEXT: store i32 [[ADD_2]], ptr [[IDX_2]], align 4
136 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
137 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
138 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP5:![0-9]+]]
140 ; CHECK-NEXT: ret void
147 %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
148 %.pre = load i32, ptr %idx.phi.trans, align 4
152 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
153 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
154 %div.1 = sdiv i32 %pre.phi, %x
155 %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
156 %pre.next = load i32, ptr %idx.1, align 4
157 %add.2 = add i32 %div.1, %pre.next
158 %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
159 store i32 %add.2, ptr %idx.2, align 4
160 %iv.next = add nuw nsw i64 %iv, 1
161 %exitcond = icmp eq i64 %iv.next, 2000
162 br i1 %exitcond, label %exit, label %for
168 ; Sink users of %pre.phi recursively.
169 define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) {
170 ; CHECK-LABEL: @can_sink_with_additional_user(
172 ; CHECK-NEXT: br label [[PREHEADER:%.*]]
174 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
175 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
176 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
178 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
179 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
180 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
181 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
182 ; CHECK: vector.body:
183 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
184 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
185 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
186 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
187 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
188 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
189 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
190 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
191 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
192 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
193 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
194 ; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP5]], [[TMP6]]
195 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
196 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
197 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
198 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
199 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
200 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
201 ; CHECK: middle.block:
202 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
203 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
205 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
206 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
207 ; CHECK-NEXT: br label [[FOR:%.*]]
209 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
210 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
211 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SCALAR_RECUR]], [[X]]
212 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[X]]
213 ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
214 ; CHECK-NEXT: [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
215 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
216 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_2]], [[ADD_3]]
217 ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
218 ; CHECK-NEXT: store i32 [[ADD_4]], ptr [[IDX_2]], align 4
219 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
220 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
221 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP7:![0-9]+]]
223 ; CHECK-NEXT: ret void
232 %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
233 %.pre = load i32, ptr %idx.phi.trans, align 4
237 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
238 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
239 %add.1 = add i32 %pre.phi, %x
240 %add.2 = add i32 %add.1, %x
241 %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
242 %pre.next = load i32, ptr %idx.1, align 4
243 %add.3 = add i32 %add.1, %pre.next
244 %add.4 = add i32 %add.2, %add.3
245 %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
246 store i32 %add.4, ptr %idx.2, align 4
247 %iv.next = add nuw nsw i64 %iv, 1
248 %exitcond = icmp eq i64 %iv.next, 2000
249 br i1 %exitcond, label %exit, label %for
255 ; FIXME: We can sink a store, if we can guarantee that it does not alias any
256 ; loads/stores in between.
257 define void @cannot_sink_store(i32 %x, ptr %ptr, i64 %tc) {
258 ; CHECK-LABEL: @cannot_sink_store(
260 ; CHECK-NEXT: br label [[PREHEADER:%.*]]
262 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
263 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
264 ; CHECK-NEXT: br label [[FOR:%.*]]
266 ; CHECK-NEXT: [[PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
267 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
268 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[PRE_PHI]], [[X:%.*]]
269 ; CHECK-NEXT: store i32 [[ADD_1]], ptr [[PTR:%.*]], align 4
270 ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
271 ; CHECK-NEXT: [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
272 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
273 ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
274 ; CHECK-NEXT: store i32 [[ADD_2]], ptr [[IDX_2]], align 4
275 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
276 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
277 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR]]
279 ; CHECK-NEXT: ret void
288 %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
289 %.pre = load i32, ptr %idx.phi.trans, align 4
293 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
294 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
295 %add.1 = add i32 %pre.phi, %x
296 store i32 %add.1, ptr %ptr
297 %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
298 %pre.next = load i32, ptr %idx.1, align 4
299 %add.2 = add i32 %add.1, %pre.next
300 %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
301 store i32 %add.2, ptr %idx.2, align 4
302 %iv.next = add nuw nsw i64 %iv, 1
303 %exitcond = icmp eq i64 %iv.next, 2000
304 br i1 %exitcond, label %exit, label %for
310 ; Some kinds of reductions are not detected by IVDescriptors. If we have a
311 ; cycle, we cannot sink it.
312 define void @cannot_sink_reduction(i32 %x, ptr %ptr, i64 %tc) {
313 ; CHECK-LABEL: @cannot_sink_reduction(
315 ; CHECK-NEXT: br label [[PREHEADER:%.*]]
317 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
318 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
319 ; CHECK-NEXT: br label [[FOR:%.*]]
321 ; CHECK-NEXT: [[PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[D:%.*]], [[FOR]] ]
322 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
323 ; CHECK-NEXT: [[D]] = sdiv i32 [[PRE_PHI]], [[X:%.*]]
324 ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
325 ; CHECK-NEXT: [[PRE_NEXT:%.*]] = load i32, ptr [[IDX_1]], align 4
326 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[X]], [[PRE_NEXT]]
327 ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
328 ; CHECK-NEXT: store i32 [[ADD_2]], ptr [[IDX_2]], align 4
329 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
330 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
331 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR]]
333 ; CHECK-NEXT: ret void
339 %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
340 %.pre = load i32, ptr %idx.phi.trans, align 4
344 %pre.phi = phi i32 [ %.pre, %preheader ], [ %d, %for ]
345 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
346 %d = sdiv i32 %pre.phi, %x
347 %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
348 %pre.next = load i32, ptr %idx.1, align 4
349 %add.2 = add i32 %x, %pre.next
350 %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
351 store i32 %add.2, ptr %idx.2, align 4
352 %iv.next = add nuw nsw i64 %iv, 1
353 %exitcond = icmp eq i64 %iv.next, 2000
354 br i1 %exitcond, label %exit, label %for
360 ; Sink %tmp38 after %tmp60, then it enable the loop vectorization.
361 define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
362 ; CHECK-LABEL: @instruction_with_2_FOR_operands(
364 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
366 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
367 ; CHECK: vector.body:
368 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
369 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
370 ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
371 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
372 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]]
373 ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[A:%.*]], align 4
374 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
375 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
376 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
377 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[B:%.*]], align 4
378 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0
379 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
380 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
381 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP3]]
382 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
383 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP7]], align 4
384 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
386 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
387 ; CHECK: middle.block:
388 ; CHECK-NEXT: br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]]
390 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
391 ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[BB]] ]
392 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
393 ; CHECK-NEXT: br label [[BB13:%.*]]
395 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
396 ; CHECK-NEXT: [[SCALAR_RECUR5:%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ]
397 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BB13]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
398 ; CHECK-NEXT: [[TMP38:%.*]] = fmul fast float [[SCALAR_RECUR]], [[SCALAR_RECUR5]]
399 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
400 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[IV]]
401 ; CHECK-NEXT: [[TMP49]] = load float, ptr [[A]], align 4
402 ; CHECK-NEXT: [[TMP60]] = load float, ptr [[B]], align 4
403 ; CHECK-NEXT: store float [[TMP38]], ptr [[GEP]], align 4
404 ; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[IV]], 1000
405 ; CHECK-NEXT: br i1 [[TMP12]], label [[BB13]], label [[BB74]], !llvm.loop [[LOOP9:![0-9]+]]
407 ; CHECK-NEXT: ret void
412 bb13: ; preds = %bb13, %bb
413 %tmp37 = phi float [ %tmp60, %bb13 ], [ 0.0, %bb ]
414 %tmp27 = phi float [ %tmp49, %bb13 ], [ 1.0, %bb ]
415 %iv = phi i64 [ %iv.next, %bb13 ], [ 0, %bb ]
416 %tmp38 = fmul fast float %tmp37, %tmp27
417 %iv.next = add nuw nsw i64 %iv, 1
418 %gep = getelementptr inbounds float, ptr %C, i64 %iv
419 %tmp49 = load float, ptr %A, align 4
420 %tmp60 = load float, ptr %B, align 4
421 store float %tmp38, ptr %gep
422 %tmp12 = icmp slt i64 %iv, 1000
423 br i1 %tmp12, label %bb13, label %bb74
425 bb74: ; preds = %bb13
429 define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias %dst.1, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %for.ptr.1, ptr noalias %for.ptr.2) {
430 ; CHECK-LABEL: @instruction_with_2_FOR_operands_and_multiple_other_uses(
432 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
434 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
435 ; CHECK: vector.body:
436 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
437 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
438 ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
439 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
440 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
441 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
442 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
443 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
444 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
445 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
446 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
447 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
448 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
449 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
450 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
451 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
452 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
453 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP9]], align 4
454 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[DST_2:%.*]], i64 [[TMP0]]
455 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0
456 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP11]], align 4
457 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[DST_3:%.*]], i64 [[TMP0]]
458 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
459 ; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP13]], align 4
460 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
461 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
462 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
463 ; CHECK: middle.block:
464 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
466 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
467 ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ]
468 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
469 ; CHECK-NEXT: br label [[LOOP:%.*]]
471 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
472 ; CHECK-NEXT: [[SCALAR_RECUR5:%.*]] = phi float [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
473 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
474 ; CHECK-NEXT: [[FOR_1_USE_1:%.*]] = fmul fast float [[SCALAR_RECUR]], 2.000000e+00
475 ; CHECK-NEXT: [[USED_BY_BOTH:%.*]] = fmul fast float [[SCALAR_RECUR]], [[SCALAR_RECUR5]]
476 ; CHECK-NEXT: [[FOR_2_NEXT]] = load float, ptr [[FOR_PTR_2]], align 4
477 ; CHECK-NEXT: [[FOR_1_USE_3:%.*]] = fadd fast float [[SCALAR_RECUR]], 1.000000e+00
478 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
479 ; CHECK-NEXT: [[FOR_1_NEXT]] = load float, ptr [[FOR_PTR_1]], align 4
480 ; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST_1]], i64 [[IV]]
481 ; CHECK-NEXT: store float [[USED_BY_BOTH]], ptr [[GEP_DST_1]], align 4
482 ; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST_2]], i64 [[IV]]
483 ; CHECK-NEXT: store float [[FOR_1_USE_1]], ptr [[GEP_DST_2]], align 4
484 ; CHECK-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST_3]], i64 [[IV]]
485 ; CHECK-NEXT: store float [[FOR_1_USE_3]], ptr [[GEP_DST_3]], align 4
486 ; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], 1000
487 ; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
489 ; CHECK-NEXT: ret void
495 %for.1 = phi float [ 0.0, %bb ], [ %for.1.next, %loop]
496 %for.2 = phi float [ 0.0, %bb ], [ %for.2.next, %loop]
497 %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
498 %for.1.use.1 = fmul fast float %for.1, 2.0
499 %used.by.both = fmul fast float %for.1, %for.2
500 %for.2.next = load float, ptr %for.ptr.2, align 4
501 %for.1.use.3 = fadd fast float %for.1, 1.0
502 %iv.next = add nuw nsw i64 %iv, 1
503 %for.1.next = load float, ptr %for.ptr.1, align 4
504 %gep.dst.1 = getelementptr inbounds float, ptr %dst.1, i64 %iv
505 store float %used.by.both, ptr %gep.dst.1
506 %gep.dst.2 = getelementptr inbounds float, ptr %dst.2, i64 %iv
507 store float %for.1.use.1, ptr %gep.dst.2
508 %gep.dst.3 = getelementptr inbounds float, ptr %dst.3, i64 %iv
509 store float %for.1.use.3, ptr %gep.dst.3
510 %ec = icmp slt i64 %iv, 1000
511 br i1 %ec, label %loop, label %exit
517 ; Variation of @instruction_with_2_FOR_operands_and_multiple_other_uses, with
518 ; multiple instructions in a chain from for.1 to %used.by.both.
519 define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr noalias %dst.1, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %for.ptr.1, ptr noalias %for.ptr.2) {
520 ; CHECK-LABEL: @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(
522 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
524 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
525 ; CHECK: vector.body:
526 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
527 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
528 ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
529 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
530 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
531 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
532 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
533 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
534 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
535 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
536 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
537 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
538 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
539 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], splat (float 2.000000e+00)
540 ; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP2]]
541 ; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
542 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
543 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
544 ; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP10]], align 4
545 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[DST_2:%.*]], i64 [[TMP0]]
546 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
547 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP12]], align 4
548 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[DST_3:%.*]], i64 [[TMP0]]
549 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0
550 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP14]], align 4
551 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
552 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
553 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
554 ; CHECK: middle.block:
555 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
557 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
558 ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ]
559 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
560 ; CHECK-NEXT: br label [[LOOP:%.*]]
562 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
563 ; CHECK-NEXT: [[SCALAR_RECUR5:%.*]] = phi float [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
564 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
565 ; CHECK-NEXT: [[FOR_1_USE_1:%.*]] = fmul fast float [[SCALAR_RECUR]], 2.000000e+00
566 ; CHECK-NEXT: [[FOR_1_USE_C:%.*]] = fmul fast float [[FOR_1_USE_1]], 2.000000e+00
567 ; CHECK-NEXT: [[USED_BY_BOTH:%.*]] = fmul fast float [[FOR_1_USE_C]], [[SCALAR_RECUR5]]
568 ; CHECK-NEXT: [[FOR_2_NEXT]] = load float, ptr [[FOR_PTR_2]], align 4
569 ; CHECK-NEXT: [[FOR_1_USE_3:%.*]] = fadd fast float [[SCALAR_RECUR]], 1.000000e+00
570 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
571 ; CHECK-NEXT: [[FOR_1_NEXT]] = load float, ptr [[FOR_PTR_1]], align 4
572 ; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST_1]], i64 [[IV]]
573 ; CHECK-NEXT: store float [[USED_BY_BOTH]], ptr [[GEP_DST_1]], align 4
574 ; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST_2]], i64 [[IV]]
575 ; CHECK-NEXT: store float [[FOR_1_USE_1]], ptr [[GEP_DST_2]], align 4
576 ; CHECK-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST_3]], i64 [[IV]]
577 ; CHECK-NEXT: store float [[FOR_1_USE_3]], ptr [[GEP_DST_3]], align 4
578 ; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], 1000
579 ; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
581 ; CHECK-NEXT: ret void
587 %for.1 = phi float [ 0.0, %bb ], [ %for.1.next, %loop]
588 %for.2 = phi float [ 0.0, %bb ], [ %for.2.next, %loop]
589 %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
590 %for.1.use.1 = fmul fast float %for.1, 2.0
591 %for.1.use.c = fmul fast float %for.1.use.1, 2.0
592 %used.by.both = fmul fast float %for.1.use.c, %for.2
593 %for.2.next = load float, ptr %for.ptr.2, align 4
594 %for.1.use.3 = fadd fast float %for.1, 1.0
595 %iv.next = add nuw nsw i64 %iv, 1
596 %for.1.next = load float, ptr %for.ptr.1, align 4
597 %gep.dst.1 = getelementptr inbounds float, ptr %dst.1, i64 %iv
598 store float %used.by.both, ptr %gep.dst.1
599 %gep.dst.2 = getelementptr inbounds float, ptr %dst.2, i64 %iv
600 store float %for.1.use.1, ptr %gep.dst.2
601 %gep.dst.3 = getelementptr inbounds float, ptr %dst.3, i64 %iv
602 store float %for.1.use.3, ptr %gep.dst.3
603 %ec = icmp slt i64 %iv, 1000
604 br i1 %ec, label %loop, label %exit
610 ; The (first) reason `%first_time.1` cannot be sunk is because it appears outside
611 ; the header and is not dominated by Previous. The fact that it feeds Previous
612 ; is a second sinking-preventing reason.
613 define void @cannot_sink_phi(ptr %ptr) {
614 ; CHECK-LABEL: @cannot_sink_phi(
616 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
617 ; CHECK: loop.header:
618 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
619 ; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_NEXT:%.*]], [[LOOP_LATCH]] ]
620 ; CHECK-NEXT: [[C_1:%.*]] = icmp ult i64 [[IV]], 500
621 ; CHECK-NEXT: br i1 [[C_1]], label [[IF_TRUEBB:%.*]], label [[IF_FALSEBB:%.*]]
623 ; CHECK-NEXT: br label [[LOOP_LATCH]]
625 ; CHECK-NEXT: br label [[LOOP_LATCH]]
627 ; CHECK-NEXT: [[FIRST_TIME_1:%.*]] = phi i32 [ 20, [[IF_TRUEBB]] ], [ [[FOR]], [[IF_FALSEBB]] ]
628 ; CHECK-NEXT: [[C_2:%.*]] = icmp ult i64 [[IV]], 800
629 ; CHECK-NEXT: [[FOR_NEXT]] = select i1 [[C_2]], i32 30, i32 [[FIRST_TIME_1]]
630 ; CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[IV]]
631 ; CHECK-NEXT: store i32 [[FOR_NEXT]], ptr [[PTR_IDX]], align 4
632 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
633 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
634 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
636 ; CHECK-NEXT: ret void
639 br label %loop.header
642 %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
643 %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ]
644 %c.1 = icmp ult i64 %iv, 500
645 br i1 %c.1, label %if.truebb, label %if.falsebb
654 %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ]
655 %c.2 = icmp ult i64 %iv, 800
656 %for.next = select i1 %c.2, i32 30, i32 %first_time.1
657 %ptr.idx = getelementptr i32, ptr %ptr, i64 %iv
658 store i32 %for.next, ptr %ptr.idx
659 %iv.next = add nuw nsw i64 %iv, 1
660 %exitcond.not = icmp eq i64 %iv.next, 1000
661 br i1 %exitcond.not, label %exit, label %loop.header
667 ; A recurrence in a multiple exit loop.
668 define i16 @multiple_exit(ptr %p, i32 %n) {
669 ; CHECK-LABEL: @multiple_exit(
671 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
672 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
673 ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
674 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
675 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
677 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
678 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
679 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
680 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
681 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
682 ; CHECK: vector.body:
683 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
684 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
685 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
686 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
687 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
688 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
689 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2
690 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
691 ; CHECK-NEXT: store <4 x i16> [[TMP7]], ptr [[TMP6]], align 4
692 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
693 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
694 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
695 ; CHECK: middle.block:
696 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
697 ; CHECK-NEXT: br label [[SCALAR_PH]]
699 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
700 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
701 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
703 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
704 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ]
705 ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
706 ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
707 ; CHECK-NEXT: [[REC_NEXT]] = load i16, ptr [[B]], align 2
708 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
709 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
711 ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], ptr [[B]], align 4
712 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
713 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
714 ; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP15:![0-9]+]]
716 ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ]
717 ; CHECK-NEXT: ret i16 [[REC_LCSSA]]
723 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
724 %rec = phi i16 [0, %entry], [ %rec.next, %for.body ]
725 %iprom = sext i32 %i to i64
726 %b = getelementptr inbounds i16, ptr %p, i64 %iprom
727 %rec.next = load i16, ptr %b
728 %cmp = icmp slt i32 %i, %n
729 br i1 %cmp, label %for.body, label %if.end
732 store i16 %rec , ptr %b, align 4
733 %inc = add nsw i32 %i, 1
734 %cmp2 = icmp slt i32 %i, 2096
735 br i1 %cmp2, label %for.cond, label %if.end
742 ; A multiple exit case where one of the exiting edges involves a value
743 ; from the recurrence and one does not.
744 define i16 @multiple_exit2(ptr %p, i32 %n) {
745 ; CHECK-LABEL: @multiple_exit2(
747 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
748 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
749 ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
750 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
751 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
753 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
754 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
755 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
756 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
757 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
758 ; CHECK: vector.body:
759 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
760 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
761 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
762 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
763 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
764 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
765 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2
766 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
767 ; CHECK-NEXT: store <4 x i16> [[TMP7]], ptr [[TMP6]], align 4
768 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
769 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
770 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
771 ; CHECK: middle.block:
772 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
773 ; CHECK-NEXT: br label [[SCALAR_PH]]
775 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
776 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
777 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
779 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
780 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ]
781 ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
782 ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
783 ; CHECK-NEXT: [[REC_NEXT]] = load i16, ptr [[B]], align 2
784 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
785 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
787 ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], ptr [[B]], align 4
788 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
789 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
790 ; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP17:![0-9]+]]
792 ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ]
793 ; CHECK-NEXT: ret i16 [[REC_LCSSA]]
799 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
800 %rec = phi i16 [0, %entry], [ %rec.next, %for.body ]
801 %iprom = sext i32 %i to i64
802 %b = getelementptr inbounds i16, ptr %p, i64 %iprom
803 %rec.next = load i16, ptr %b
804 %cmp = icmp slt i32 %i, %n
805 br i1 %cmp, label %for.body, label %if.end
808 store i16 %rec , ptr %b, align 4
809 %inc = add nsw i32 %i, 1
810 %cmp2 = icmp slt i32 %i, 2096
811 br i1 %cmp2, label %for.cond, label %if.end
814 %rec.lcssa = phi i16 [ %rec, %for.cond ], [ 10, %for.body ]
818 ; A test where the instructions to sink may not be visited in dominance order.
819 define void @sink_dominance(ptr %ptr, i32 %N) {
820 ; CHECK-LABEL: @sink_dominance(
822 ; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
823 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
824 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
825 ; CHECK: vector.scevcheck:
826 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
827 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
828 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
829 ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
831 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
832 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
833 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
834 ; CHECK: vector.body:
835 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
836 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
837 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
838 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP2]]
839 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
840 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
841 ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
842 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
843 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
844 ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
845 ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> splat (i32 22)
846 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP4]], align 4
847 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
848 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
849 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
850 ; CHECK: middle.block:
851 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
852 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
853 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
855 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
856 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
857 ; CHECK-NEXT: br label [[LOOP:%.*]]
859 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
860 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
861 ; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
862 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
863 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 22
864 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
865 ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[GEP]], align 4
866 ; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64
867 ; CHECK-NEXT: store i32 [[SELECT]], ptr [[GEP]], align 4
868 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
869 ; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
870 ; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP19:![0-9]+]]
872 ; CHECK-NEXT: ret void
878 %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
879 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
881 %for.trunc = trunc i64 %for to i32
882 %cmp = icmp slt i32 %for.trunc, 213
883 %select = select i1 %cmp, i32 %for.trunc, i32 22
885 %gep = getelementptr inbounds i32, ptr %ptr, i32 %iv
886 %lv = load i32, ptr %gep, align 4
887 %for.next = zext i32 %lv to i64
888 store i32 %select, ptr %gep
890 %iv.next = add i32 %iv, 1
891 %cmp73 = icmp ugt i32 %N, %iv.next
892 br i1 %cmp73, label %loop, label %exit
898 ; Similar to @sink_dominance, but with 2 separate chains that merge at %select
899 ; with a different number of instructions in between.
900 define void @sink_dominance_2(ptr %ptr, i32 %N) {
901 ; CHECK-LABEL: @sink_dominance_2(
903 ; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
904 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
905 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
906 ; CHECK: vector.scevcheck:
907 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
908 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
909 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
910 ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
912 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
913 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
914 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
915 ; CHECK: vector.body:
916 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
917 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
918 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
919 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP2]]
920 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
921 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
922 ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
923 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
924 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
925 ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], splat (i32 2)
926 ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], splat (i32 99)
927 ; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
928 ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
929 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP4]], align 4
930 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
931 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
932 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
933 ; CHECK: middle.block:
934 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
935 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
936 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
938 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
939 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
940 ; CHECK-NEXT: br label [[LOOP:%.*]]
942 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
943 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
944 ; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
945 ; CHECK-NEXT: [[STEP:%.*]] = add i32 [[FOR_TRUNC]], 2
946 ; CHECK-NEXT: [[STEP_2:%.*]] = mul i32 [[STEP]], 99
947 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
948 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 [[STEP_2]]
949 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
950 ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[GEP]], align 4
951 ; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64
952 ; CHECK-NEXT: store i32 [[SELECT]], ptr [[GEP]], align 4
953 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
954 ; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
955 ; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP21:![0-9]+]]
957 ; CHECK-NEXT: ret void
963 %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
964 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
966 %for.trunc = trunc i64 %for to i32
967 %step = add i32 %for.trunc, 2
968 %step.2 = mul i32 %step, 99
970 %cmp = icmp slt i32 %for.trunc, 213
971 %select = select i1 %cmp, i32 %for.trunc, i32 %step.2
973 %gep = getelementptr inbounds i32, ptr %ptr, i32 %iv
974 %lv = load i32, ptr %gep, align 4
975 %for.next = zext i32 %lv to i64
976 store i32 %select, ptr %gep
978 %iv.next = add i32 %iv, 1
979 %cmp73 = icmp ugt i32 %N, %iv.next
980 br i1 %cmp73, label %loop, label %exit
986 define void @cannot_sink_load_past_store(ptr %ptr, i32 %N) {
987 ; CHECK-LABEL: @cannot_sink_load_past_store(
989 ; CHECK-NEXT: br label [[LOOP:%.*]]
991 ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
992 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
993 ; CHECK-NEXT: [[GEP_FOR:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[FOR]]
994 ; CHECK-NEXT: [[LV_FOR:%.*]] = load i32, ptr [[GEP_FOR]], align 4
995 ; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[FOR]] to i32
996 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LV_FOR]], [[FOR_TRUNC]]
997 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[LV_FOR]], i32 22
998 ; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
999 ; CHECK-NEXT: store i32 0, ptr [[GEP_IV]], align 4
1000 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
1001 ; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[IV]] to i64
1002 ; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N:%.*]], [[IV_NEXT]]
1003 ; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT:%.*]]
1005 ; CHECK-NEXT: ret void
1011 %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
1012 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
1014 %gep.for = getelementptr inbounds i32, ptr %ptr, i64 %for
1015 %lv.for = load i32, ptr %gep.for, align 4
1016 %for.trunc = trunc i64 %for to i32
1017 %cmp = icmp slt i32 %lv.for, %for.trunc
1018 %select = select i1 %cmp, i32 %lv.for, i32 22
1020 %gep.iv = getelementptr inbounds i32, ptr %ptr, i32 %iv
1021 store i32 0, ptr %gep.iv
1022 %iv.next = add i32 %iv, 1
1023 %for.next = zext i32 %iv to i64
1025 %cmp73 = icmp ugt i32 %N, %iv.next
1026 br i1 %cmp73, label %loop, label %exit
1032 define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) {
1033 ; CHECK-LABEL: @test_for_sink_instruction_after_same_incoming_1(
1034 ; CHECK-NEXT: entry:
1035 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1037 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1038 ; CHECK: vector.body:
1039 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1040 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 1.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1041 ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 2.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD]], [[VECTOR_BODY]] ]
1042 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1043 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1044 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[PTR:%.*]], i64 [[TMP0]]
1045 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1046 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
1047 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1048 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP3]]
1049 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1050 ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP4]], [[TMP5]]
1051 ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
1052 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1053 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1054 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1055 ; CHECK: middle.block:
1056 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1057 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1058 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1060 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
1061 ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ]
1062 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
1063 ; CHECK-NEXT: br label [[LOOP:%.*]]
1065 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
1066 ; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi double [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT]], [[LOOP]] ]
1067 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1068 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd double 1.000000e+01, [[SCALAR_RECUR4]]
1069 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd double [[ADD_1]], [[SCALAR_RECUR]]
1070 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1071 ; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[IV]]
1072 ; CHECK-NEXT: [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
1073 ; CHECK-NEXT: store double [[ADD_2]], ptr [[GEP_PTR]], align 8
1074 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1075 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
1077 ; CHECK-NEXT: ret void
1083 %for.1 = phi double [ 10.0, %entry ], [ %for.1.next, %loop ]
1084 %for.2 = phi double [ 20.0, %entry ], [ %for.1.next, %loop ]
1085 %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
1086 %add.1 = fadd double 10.0, %for.2
1087 %add.2 = fadd double %add.1, %for.1
1088 %iv.next = add nuw nsw i64 %iv, 1
1089 %gep.ptr = getelementptr inbounds double, ptr %ptr, i64 %iv
1090 %for.1.next = load double, ptr %gep.ptr, align 8
1091 store double %add.2, ptr %gep.ptr
1092 %exitcond.not = icmp eq i64 %iv.next, 1000
1093 br i1 %exitcond.not, label %exit, label %loop
1100 define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
1101 ; CHECK-LABEL: @test_for_sink_instruction_after_same_incoming_2(
1102 ; CHECK-NEXT: entry:
1103 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1105 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1106 ; CHECK: vector.body:
1107 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1108 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 2.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1109 ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 1.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD]], [[VECTOR_BODY]] ]
1110 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1111 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1112 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[PTR:%.*]], i64 [[TMP0]]
1113 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1114 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
1115 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1116 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1117 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]]
1118 ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP3]]
1119 ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
1120 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1121 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1122 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1123 ; CHECK: middle.block:
1124 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1125 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1126 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1128 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
1129 ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ]
1130 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
1131 ; CHECK-NEXT: br label [[LOOP:%.*]]
1133 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
1134 ; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi double [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT]], [[LOOP]] ]
1135 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1136 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd double 1.000000e+01, [[SCALAR_RECUR]]
1137 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd double [[ADD_1]], [[SCALAR_RECUR4]]
1138 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1139 ; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[IV]]
1140 ; CHECK-NEXT: [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
1141 ; CHECK-NEXT: store double [[ADD_2]], ptr [[GEP_PTR]], align 8
1142 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1143 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
1145 ; CHECK-NEXT: ret void
1151 %for.2 = phi double [ 20.0, %entry ], [ %for.1.next, %loop ]
1152 %for.1 = phi double [ 10.0, %entry ], [ %for.1.next, %loop ]
1153 %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
1154 %add.1 = fadd double 10.0, %for.2
1155 %add.2 = fadd double %add.1, %for.1
1156 %iv.next = add nuw nsw i64 %iv, 1
1157 %gep.ptr = getelementptr inbounds double, ptr %ptr, i64 %iv
1158 %for.1.next = load double, ptr %gep.ptr, align 8
1159 store double %add.2, ptr %gep.ptr
1160 %exitcond.not = icmp eq i64 %iv.next, 1000
1161 br i1 %exitcond.not, label %exit, label %loop