1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
3 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
4 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
5 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
6 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
8 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
10 ; Make sure that we can handle multiple integer induction variables.
12 define void @multi_int_induction(i32* %A, i32 %N) {
13 ; CHECK-LABEL: @multi_int_induction(
14 ; CHECK-NEXT: for.body.lr.ph:
15 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
16 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
17 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
18 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
19 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
21 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
22 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
23 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
24 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 190, [[CAST_VTC]]
25 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
27 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
28 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
29 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
30 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
31 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
32 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
33 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4
34 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
35 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
36 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38 ; CHECK: middle.block:
39 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
40 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
42 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
43 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
44 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
46 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
47 ; CHECK-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
48 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
49 ; CHECK-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
50 ; CHECK-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1
51 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
52 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
53 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
54 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
56 ; CHECK-NEXT: ret void
58 ; IND-LABEL: @multi_int_induction(
59 ; IND-NEXT: for.body.lr.ph:
60 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
61 ; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
62 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
63 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
64 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
66 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
67 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
68 ; IND-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190
69 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
71 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
72 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
73 ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
74 ; IND-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
75 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
76 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
77 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
78 ; IND-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
79 ; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
81 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
82 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
84 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
85 ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
86 ; IND-NEXT: br label [[FOR_BODY:%.*]]
88 ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
89 ; IND-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
90 ; IND-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
91 ; IND-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
92 ; IND-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1
93 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
94 ; IND-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
95 ; IND-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
96 ; IND-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
100 ; UNROLL-LABEL: @multi_int_induction(
101 ; UNROLL-NEXT: for.body.lr.ph:
102 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
103 ; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
104 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
105 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
106 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
108 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
109 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
110 ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190
111 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
112 ; UNROLL: vector.body:
113 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
114 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
115 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
116 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
117 ; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
118 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
119 ; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 2
120 ; UNROLL-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
121 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP6]], align 4
122 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
123 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
124 ; UNROLL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
125 ; UNROLL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
126 ; UNROLL: middle.block:
127 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
128 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
130 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
131 ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
132 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
134 ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
135 ; UNROLL-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
136 ; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
137 ; UNROLL-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
138 ; UNROLL-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1
139 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
140 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
141 ; UNROLL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
142 ; UNROLL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
144 ; UNROLL-NEXT: ret void
146 ; UNROLL-NO-IC-LABEL: @multi_int_induction(
147 ; UNROLL-NO-IC-NEXT: for.body.lr.ph:
148 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
149 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
150 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
151 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
152 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
153 ; UNROLL-NO-IC: vector.ph:
154 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
155 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
156 ; UNROLL-NO-IC-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
157 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 190, [[CAST_VTC]]
158 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
159 ; UNROLL-NO-IC: vector.body:
160 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
161 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
162 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
163 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
164 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
165 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
166 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
167 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
168 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>*
169 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP8]], align 4
170 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 2
171 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
172 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP10]], align 4
173 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
174 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
175 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
176 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
177 ; UNROLL-NO-IC: middle.block:
178 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
179 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
180 ; UNROLL-NO-IC: scalar.ph:
181 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
182 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
183 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
184 ; UNROLL-NO-IC: for.body:
185 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
186 ; UNROLL-NO-IC-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
187 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
188 ; UNROLL-NO-IC-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
189 ; UNROLL-NO-IC-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1
190 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
191 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
192 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
193 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
194 ; UNROLL-NO-IC: for.end:
195 ; UNROLL-NO-IC-NEXT: ret void
197 ; INTERLEAVE-LABEL: @multi_int_induction(
198 ; INTERLEAVE-NEXT: for.body.lr.ph:
199 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
200 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
201 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
202 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
203 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
204 ; INTERLEAVE: vector.ph:
205 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
206 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
207 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190
208 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
209 ; INTERLEAVE: vector.body:
210 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
211 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 190, i32 191, i32 192, i32 193>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
212 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
213 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
214 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
215 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP4]], align 4
216 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
217 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
218 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP6]], align 4
219 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
220 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
221 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
222 ; INTERLEAVE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
223 ; INTERLEAVE: middle.block:
224 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
225 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
226 ; INTERLEAVE: scalar.ph:
227 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
228 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
229 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
230 ; INTERLEAVE: for.body:
231 ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
232 ; INTERLEAVE-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
233 ; INTERLEAVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
234 ; INTERLEAVE-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
235 ; INTERLEAVE-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1
236 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
237 ; INTERLEAVE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
238 ; INTERLEAVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
239 ; INTERLEAVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
240 ; INTERLEAVE: for.end:
241 ; INTERLEAVE-NEXT: ret void
247 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
248 %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
249 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
250 store i32 %count.09, i32* %arrayidx2, align 4
251 %inc = add nsw i32 %count.09, 1
252 %indvars.iv.next = add i64 %indvars.iv, 1
253 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
254 %exitcond = icmp ne i32 %lftr.wideiv, %N
255 br i1 %exitcond, label %for.body, label %for.end
261 ; Make sure we remove unneeded vectorization of induction variables.
262 ; In order for instcombine to cleanup the vectorized induction variables that we
263 ; create in the loop vectorizer we need to perform some form of redundancy
264 ; elimination to get rid of multiple uses.
267 ; Vectorized induction variable.
269 define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
270 ; CHECK-LABEL: @scalar_use(
272 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
273 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
274 ; CHECK: vector.memcheck:
275 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
276 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8*
277 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
278 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
279 ; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8*
280 ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
281 ; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
282 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
283 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
284 ; CHECK-NEXT: [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
285 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
286 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
287 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
288 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
290 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
291 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
292 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
293 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
294 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
295 ; CHECK: vector.body:
296 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
297 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
298 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[OFFSET]]
299 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
300 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0
301 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
302 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
303 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP2]], [[OFFSET2]]
304 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
305 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0
306 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <2 x float>*
307 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4, !alias.scope !7
308 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
309 ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP11]]
310 ; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
311 ; CHECK-NEXT: store <2 x float> [[TMP12]], <2 x float>* [[TMP13]], align 4, !alias.scope !4, !noalias !7
312 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
313 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
314 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
315 ; CHECK: middle.block:
316 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
317 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
319 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
320 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
322 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
323 ; CHECK-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
324 ; CHECK-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
325 ; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
326 ; CHECK-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
327 ; CHECK-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
328 ; CHECK-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
329 ; CHECK-NEXT: [[M:%.*]] = fmul fast float [[B]], [[L2]]
330 ; CHECK-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]]
331 ; CHECK-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4
332 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
333 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
334 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
336 ; CHECK-NEXT: ret void
338 ; IND-LABEL: @scalar_use(
340 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
341 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
342 ; IND: vector.memcheck:
343 ; IND-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
344 ; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
345 ; IND-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
346 ; IND-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
347 ; IND-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
348 ; IND-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
349 ; IND-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
350 ; IND-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
351 ; IND-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
352 ; IND-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
354 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2
355 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
356 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
357 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
359 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
360 ; IND-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
361 ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
362 ; IND-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
363 ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
364 ; IND-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
365 ; IND-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
366 ; IND-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
367 ; IND-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 4, !alias.scope !7
368 ; IND-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
369 ; IND-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP8]]
370 ; IND-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
371 ; IND-NEXT: store <2 x float> [[TMP9]], <2 x float>* [[TMP10]], align 4, !alias.scope !4, !noalias !7
372 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
373 ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
374 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
376 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
377 ; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
379 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
380 ; IND-NEXT: br label [[FOR_BODY:%.*]]
382 ; IND-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
383 ; IND-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
384 ; IND-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
385 ; IND-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
386 ; IND-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
387 ; IND-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
388 ; IND-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
389 ; IND-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]]
390 ; IND-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]]
391 ; IND-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4
392 ; IND-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
393 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
394 ; IND-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
398 ; UNROLL-LABEL: @scalar_use(
399 ; UNROLL-NEXT: entry:
400 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
401 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
402 ; UNROLL: vector.memcheck:
403 ; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
404 ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
405 ; UNROLL-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
406 ; UNROLL-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
407 ; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
408 ; UNROLL-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
409 ; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
410 ; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
411 ; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
412 ; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
414 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
415 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
416 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
417 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i64 0
418 ; UNROLL-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer
419 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
420 ; UNROLL: vector.body:
421 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
422 ; UNROLL-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
423 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
424 ; UNROLL-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
425 ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
426 ; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 2
427 ; UNROLL-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
428 ; UNROLL-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
429 ; UNROLL-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
430 ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
431 ; UNROLL-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
432 ; UNROLL-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !7
433 ; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 2
434 ; UNROLL-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
435 ; UNROLL-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !7
436 ; UNROLL-NEXT: [[TMP12:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
437 ; UNROLL-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
438 ; UNROLL-NEXT: [[TMP14:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP12]]
439 ; UNROLL-NEXT: [[TMP15:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP13]]
440 ; UNROLL-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
441 ; UNROLL-NEXT: store <2 x float> [[TMP14]], <2 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7
442 ; UNROLL-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
443 ; UNROLL-NEXT: store <2 x float> [[TMP15]], <2 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7
444 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
445 ; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
446 ; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
447 ; UNROLL: middle.block:
448 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
449 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
451 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
452 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
454 ; UNROLL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
455 ; UNROLL-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
456 ; UNROLL-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
457 ; UNROLL-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
458 ; UNROLL-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
459 ; UNROLL-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
460 ; UNROLL-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
461 ; UNROLL-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]]
462 ; UNROLL-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]]
463 ; UNROLL-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4
464 ; UNROLL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
465 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
466 ; UNROLL-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
468 ; UNROLL-NEXT: ret void
470 ; UNROLL-NO-IC-LABEL: @scalar_use(
471 ; UNROLL-NO-IC-NEXT: entry:
472 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
473 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
474 ; UNROLL-NO-IC: vector.memcheck:
475 ; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
476 ; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8*
477 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
478 ; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
479 ; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8*
480 ; UNROLL-NO-IC-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
481 ; UNROLL-NO-IC-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
482 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
483 ; UNROLL-NO-IC-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
484 ; UNROLL-NO-IC-NEXT: [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
485 ; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
486 ; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
487 ; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
488 ; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
489 ; UNROLL-NO-IC: vector.ph:
490 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
491 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
492 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
493 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
494 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i32 0
495 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer
496 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
497 ; UNROLL-NO-IC: vector.body:
498 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
499 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
500 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
501 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[OFFSET]]
502 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[OFFSET]]
503 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
504 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
505 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0
506 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
507 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !4, !noalias !7
508 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 2
509 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
510 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !4, !noalias !7
511 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[TMP2]], [[OFFSET2]]
512 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i64 [[TMP3]], [[OFFSET2]]
513 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
514 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
515 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 0
516 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <2 x float>*
517 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP17]], align 4, !alias.scope !7
518 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 2
519 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>*
520 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7
521 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
522 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
523 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP20]]
524 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP21]]
525 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
526 ; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP22]], <2 x float>* [[TMP24]], align 4, !alias.scope !4, !noalias !7
527 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
528 ; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP23]], <2 x float>* [[TMP25]], align 4, !alias.scope !4, !noalias !7
529 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
530 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
531 ; UNROLL-NO-IC-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
532 ; UNROLL-NO-IC: middle.block:
533 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
534 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
535 ; UNROLL-NO-IC: scalar.ph:
536 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
537 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
538 ; UNROLL-NO-IC: for.body:
539 ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
540 ; UNROLL-NO-IC-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
541 ; UNROLL-NO-IC-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
542 ; UNROLL-NO-IC-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
543 ; UNROLL-NO-IC-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
544 ; UNROLL-NO-IC-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
545 ; UNROLL-NO-IC-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
546 ; UNROLL-NO-IC-NEXT: [[M:%.*]] = fmul fast float [[B]], [[L2]]
547 ; UNROLL-NO-IC-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]]
548 ; UNROLL-NO-IC-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4
549 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
550 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
551 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
552 ; UNROLL-NO-IC: loopexit:
553 ; UNROLL-NO-IC-NEXT: ret void
555 ; INTERLEAVE-LABEL: @scalar_use(
556 ; INTERLEAVE-NEXT: entry:
557 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
558 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
559 ; INTERLEAVE: vector.memcheck:
560 ; INTERLEAVE-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
561 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
562 ; INTERLEAVE-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
563 ; INTERLEAVE-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
564 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
565 ; INTERLEAVE-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
566 ; INTERLEAVE-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
567 ; INTERLEAVE-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
568 ; INTERLEAVE-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
569 ; INTERLEAVE-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
570 ; INTERLEAVE: vector.ph:
571 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
572 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
573 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
574 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x float> poison, float [[B]], i64 0
575 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT11]], <4 x float> poison, <4 x i32> zeroinitializer
576 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
577 ; INTERLEAVE: vector.body:
578 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
579 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
580 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
581 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
582 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
583 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 4
584 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
585 ; INTERLEAVE-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
586 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
587 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
588 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
589 ; INTERLEAVE-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !7
590 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4
591 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
592 ; INTERLEAVE-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4, !alias.scope !7
593 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
594 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
595 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[TMP12]]
596 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[TMP13]]
597 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
598 ; INTERLEAVE-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7
599 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
600 ; INTERLEAVE-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7
601 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
602 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
603 ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
604 ; INTERLEAVE: middle.block:
605 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
606 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
607 ; INTERLEAVE: scalar.ph:
608 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
609 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
610 ; INTERLEAVE: for.body:
611 ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
612 ; INTERLEAVE-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
613 ; INTERLEAVE-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
614 ; INTERLEAVE-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
615 ; INTERLEAVE-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
616 ; INTERLEAVE-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
617 ; INTERLEAVE-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
618 ; INTERLEAVE-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]]
619 ; INTERLEAVE-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]]
620 ; INTERLEAVE-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4
621 ; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
622 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
623 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
624 ; INTERLEAVE: loopexit:
625 ; INTERLEAVE-NEXT: ret void
631 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
632 %ind.sum = add i64 %iv, %offset
633 %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
634 %l1 = load float, float* %arr.idx, align 4
635 %ind.sum2 = add i64 %iv, %offset2
636 %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
637 %l2 = load float, float* %arr.idx2, align 4
638 %m = fmul fast float %b, %l2
639 %ad = fadd fast float %l1, %m
640 store float %ad, float* %arr.idx, align 4
641 %iv.next = add nuw nsw i64 %iv, 1
642 %exitcond = icmp eq i64 %iv.next, %n
643 br i1 %exitcond, label %loopexit, label %for.body
649 ; Make sure we don't create a vector induction phi node that is unused.
650 ; Scalarize the step vectors instead.
652 ; for (int i = 0; i < n; ++i)
659 define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
660 ; CHECK-LABEL: @scalarize_induction_variable_01(
662 ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
663 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
664 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
666 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
667 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
668 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
669 ; CHECK: vector.body:
670 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
671 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
672 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
673 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]]
674 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
675 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>*
676 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
677 ; CHECK-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
678 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
679 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
680 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
681 ; CHECK: middle.block:
682 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP4]])
683 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
684 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
686 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
687 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
688 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
690 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
691 ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
692 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
693 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
694 ; CHECK-NEXT: [[TMP9]] = add i64 [[TMP8]], [[SUM]]
695 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
696 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
697 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
699 ; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP9]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
700 ; CHECK-NEXT: ret i64 [[TMP10]]
702 ; IND-LABEL: @scalarize_induction_variable_01(
704 ; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
705 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
706 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
708 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
709 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
711 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
712 ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
713 ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
714 ; IND-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
715 ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
716 ; IND-NEXT: [[TMP2]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
717 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
718 ; IND-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
719 ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
721 ; IND-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]])
722 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
723 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
725 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
726 ; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
727 ; IND-NEXT: br label [[FOR_BODY:%.*]]
729 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
730 ; IND-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
731 ; IND-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
732 ; IND-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
733 ; IND-NEXT: [[TMP7]] = add i64 [[TMP6]], [[SUM]]
734 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
735 ; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
736 ; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
738 ; IND-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
739 ; IND-NEXT: ret i64 [[TMP8]]
741 ; UNROLL-LABEL: @scalarize_induction_variable_01(
742 ; UNROLL-NEXT: entry:
743 ; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
744 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
745 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
747 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
748 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
749 ; UNROLL: vector.body:
750 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
751 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
752 ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
753 ; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
754 ; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
755 ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
756 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2
757 ; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>*
758 ; UNROLL-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
759 ; UNROLL-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
760 ; UNROLL-NEXT: [[TMP5]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
761 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
762 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
763 ; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
764 ; UNROLL: middle.block:
765 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP5]], [[TMP4]]
766 ; UNROLL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
767 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
768 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
770 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
771 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
772 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
774 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
775 ; UNROLL-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
776 ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
777 ; UNROLL-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
778 ; UNROLL-NEXT: [[TMP10]] = add i64 [[TMP9]], [[SUM]]
779 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
780 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
781 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
783 ; UNROLL-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
784 ; UNROLL-NEXT: ret i64 [[TMP11]]
786 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
787 ; UNROLL-NO-IC-NEXT: entry:
788 ; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
789 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
790 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
791 ; UNROLL-NO-IC: vector.ph:
792 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
793 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
794 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
795 ; UNROLL-NO-IC: vector.body:
796 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
797 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
798 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
799 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
800 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
801 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]]
802 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP1]]
803 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 0
804 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
805 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
806 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 2
807 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>*
808 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 8
809 ; UNROLL-NO-IC-NEXT: [[TMP8]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
810 ; UNROLL-NO-IC-NEXT: [[TMP9]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
811 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
812 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
813 ; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
814 ; UNROLL-NO-IC: middle.block:
815 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]]
816 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
817 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
818 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
819 ; UNROLL-NO-IC: scalar.ph:
820 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
821 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
822 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
823 ; UNROLL-NO-IC: for.body:
824 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
825 ; UNROLL-NO-IC-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP14:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
826 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
827 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
828 ; UNROLL-NO-IC-NEXT: [[TMP14]] = add i64 [[TMP13]], [[SUM]]
829 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
830 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
831 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
832 ; UNROLL-NO-IC: for.end:
833 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi i64 [ [[TMP14]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
834 ; UNROLL-NO-IC-NEXT: ret i64 [[TMP15]]
836 ; INTERLEAVE-LABEL: @scalarize_induction_variable_01(
837 ; INTERLEAVE-NEXT: entry:
838 ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
839 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
840 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
841 ; INTERLEAVE: vector.ph:
842 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800
843 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
844 ; INTERLEAVE: vector.body:
845 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
846 ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
847 ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
848 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
849 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <4 x i64>*
850 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 8
851 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 4
852 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>*
853 ; INTERLEAVE-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 8
854 ; INTERLEAVE-NEXT: [[TMP4]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
855 ; INTERLEAVE-NEXT: [[TMP5]] = add <4 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
856 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
857 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
858 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
859 ; INTERLEAVE: middle.block:
860 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP5]], [[TMP4]]
861 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]])
862 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
863 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
864 ; INTERLEAVE: scalar.ph:
865 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
866 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
867 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
868 ; INTERLEAVE: for.body:
869 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
870 ; INTERLEAVE-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
871 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
872 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
873 ; INTERLEAVE-NEXT: [[TMP10]] = add i64 [[TMP9]], [[SUM]]
874 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
875 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
876 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
877 ; INTERLEAVE: for.end:
878 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
879 ; INTERLEAVE-NEXT: ret i64 [[TMP11]]
885 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
886 %sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
887 %0 = getelementptr inbounds i64, i64* %a, i64 %i
888 %1 = load i64, i64* %0, align 8
889 %2 = add i64 %1, %sum
890 %i.next = add nuw nsw i64 %i, 1
891 %cond = icmp slt i64 %i.next, %n
892 br i1 %cond, label %for.body, label %for.end
895 %3 = phi i64 [ %2, %for.body ]
899 ; Make sure we scalarize the step vectors used for the pointer arithmetic. We
900 ; can't easily simplify vectorized step vectors.
903 ; for (int i ; 0; i < n; i += 8)
904 ; s += (a[i] + b[i] + 1.0f);
910 define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
911 ; CHECK-LABEL: @scalarize_induction_variable_02(
913 ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
914 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
915 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
916 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
917 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
918 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
920 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
921 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
922 ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8
923 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
924 ; CHECK: vector.body:
925 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
926 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
927 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
928 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
929 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
930 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
931 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
932 ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
933 ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[TMP6]], align 4
934 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
935 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1
936 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
937 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
938 ; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP11]], align 4
939 ; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP12]], align 4
940 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
941 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1
942 ; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
943 ; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]]
944 ; CHECK-NEXT: [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]]
945 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
946 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
947 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
948 ; CHECK: middle.block:
949 ; CHECK-NEXT: [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP19]])
950 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
951 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
953 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
954 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
955 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
957 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
958 ; CHECK-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
959 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
960 ; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
961 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
962 ; CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP24]], align 4
963 ; CHECK-NEXT: [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00
964 ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]]
965 ; CHECK-NEXT: [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]]
966 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8
967 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
968 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
970 ; CHECK-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
971 ; CHECK-NEXT: ret float [[S_LCSSA]]
973 ; IND-LABEL: @scalarize_induction_variable_02(
975 ; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
976 ; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
977 ; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
978 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
979 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
980 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
982 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902
983 ; IND-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
984 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
986 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
987 ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
988 ; IND-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
989 ; IND-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8
990 ; IND-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
991 ; IND-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
992 ; IND-NEXT: [[TMP6:%.*]] = load float, float* [[TMP4]], align 4
993 ; IND-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
994 ; IND-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
995 ; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP7]], i64 1
996 ; IND-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
997 ; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
998 ; IND-NEXT: [[TMP12:%.*]] = load float, float* [[TMP10]], align 4
999 ; IND-NEXT: [[TMP13:%.*]] = load float, float* [[TMP11]], align 4
1000 ; IND-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
1001 ; IND-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1
1002 ; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1003 ; IND-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]]
1004 ; IND-NEXT: [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]]
1005 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1006 ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1007 ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1008 ; IND: middle.block:
1009 ; IND-NEXT: [[TMP20:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP18]])
1010 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1011 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1013 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1014 ; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1015 ; IND-NEXT: br label [[FOR_BODY:%.*]]
1017 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1018 ; IND-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
1019 ; IND-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1020 ; IND-NEXT: [[TMP22:%.*]] = load float, float* [[TMP21]], align 4
1021 ; IND-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1022 ; IND-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4
1023 ; IND-NEXT: [[TMP25:%.*]] = fadd fast float [[S]], 1.000000e+00
1024 ; IND-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP25]], [[TMP22]]
1025 ; IND-NEXT: [[TMP27]] = fadd fast float [[TMP26]], [[TMP24]]
1026 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1027 ; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1028 ; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1030 ; IND-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP27]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
1031 ; IND-NEXT: ret float [[S_LCSSA]]
1033 ; UNROLL-LABEL: @scalarize_induction_variable_02(
1034 ; UNROLL-NEXT: entry:
1035 ; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1036 ; UNROLL-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1037 ; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1038 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1039 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24
1040 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1041 ; UNROLL: vector.ph:
1042 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387900
1043 ; UNROLL-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
1044 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
1045 ; UNROLL: vector.body:
1046 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1047 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
1048 ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
1049 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
1050 ; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8
1051 ; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 16
1052 ; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 24
1053 ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
1054 ; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
1055 ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
1056 ; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1057 ; UNROLL-NEXT: [[TMP10:%.*]] = load float, float* [[TMP6]], align 4
1058 ; UNROLL-NEXT: [[TMP11:%.*]] = load float, float* [[TMP7]], align 4
1059 ; UNROLL-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
1060 ; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1
1061 ; UNROLL-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4
1062 ; UNROLL-NEXT: [[TMP15:%.*]] = load float, float* [[TMP9]], align 4
1063 ; UNROLL-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
1064 ; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP15]], i64 1
1065 ; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
1066 ; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
1067 ; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
1068 ; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1069 ; UNROLL-NEXT: [[TMP22:%.*]] = load float, float* [[TMP18]], align 4
1070 ; UNROLL-NEXT: [[TMP23:%.*]] = load float, float* [[TMP19]], align 4
1071 ; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <2 x float> poison, float [[TMP22]], i64 0
1072 ; UNROLL-NEXT: [[TMP25:%.*]] = insertelement <2 x float> [[TMP24]], float [[TMP23]], i64 1
1073 ; UNROLL-NEXT: [[TMP26:%.*]] = load float, float* [[TMP20]], align 4
1074 ; UNROLL-NEXT: [[TMP27:%.*]] = load float, float* [[TMP21]], align 4
1075 ; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
1076 ; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1
1077 ; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1078 ; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
1079 ; UNROLL-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]]
1080 ; UNROLL-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]]
1081 ; UNROLL-NEXT: [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]]
1082 ; UNROLL-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP29]]
1083 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1084 ; UNROLL-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1085 ; UNROLL-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1086 ; UNROLL: middle.block:
1087 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP35]], [[TMP34]]
1088 ; UNROLL-NEXT: [[TMP37:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]])
1089 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1090 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1091 ; UNROLL: scalar.ph:
1092 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1093 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP37]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1094 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
1096 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1097 ; UNROLL-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ]
1098 ; UNROLL-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1099 ; UNROLL-NEXT: [[TMP39:%.*]] = load float, float* [[TMP38]], align 4
1100 ; UNROLL-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1101 ; UNROLL-NEXT: [[TMP41:%.*]] = load float, float* [[TMP40]], align 4
1102 ; UNROLL-NEXT: [[TMP42:%.*]] = fadd fast float [[S]], 1.000000e+00
1103 ; UNROLL-NEXT: [[TMP43:%.*]] = fadd fast float [[TMP42]], [[TMP39]]
1104 ; UNROLL-NEXT: [[TMP44]] = fadd fast float [[TMP43]], [[TMP41]]
1105 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1106 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1107 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1109 ; UNROLL-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP44]], [[FOR_BODY]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
1110 ; UNROLL-NEXT: ret float [[S_LCSSA]]
1112 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
1113 ; UNROLL-NO-IC-NEXT: entry:
1114 ; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1115 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1116 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1117 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1118 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1119 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1120 ; UNROLL-NO-IC: vector.ph:
1121 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1122 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1123 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8
1124 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
1125 ; UNROLL-NO-IC: vector.body:
1126 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1127 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
1128 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ]
1129 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1130 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1131 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
1132 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
1133 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
1134 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
1135 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
1136 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1137 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
1138 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = load float, float* [[TMP7]], align 4
1139 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = load float, float* [[TMP8]], align 4
1140 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0
1141 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP12]], i32 1
1142 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load float, float* [[TMP9]], align 4
1143 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, float* [[TMP10]], align 4
1144 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
1145 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1
1146 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
1147 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
1148 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1149 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP6]]
1150 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load float, float* [[TMP19]], align 4
1151 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load float, float* [[TMP20]], align 4
1152 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i32 0
1153 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i32 1
1154 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load float, float* [[TMP21]], align 4
1155 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load float, float* [[TMP22]], align 4
1156 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0
1157 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1
1158 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1159 ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
1160 ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]]
1161 ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]]
1162 ; UNROLL-NO-IC-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]]
1163 ; UNROLL-NO-IC-NEXT: [[TMP36]] = fadd fast <2 x float> [[TMP34]], [[TMP30]]
1164 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1165 ; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1166 ; UNROLL-NO-IC-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1167 ; UNROLL-NO-IC: middle.block:
1168 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP36]], [[TMP35]]
1169 ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]])
1170 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1171 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1172 ; UNROLL-NO-IC: scalar.ph:
1173 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1174 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ]
1175 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
1176 ; UNROLL-NO-IC: for.body:
1177 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1178 ; UNROLL-NO-IC-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP45:%.*]], [[FOR_BODY]] ]
1179 ; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1180 ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = load float, float* [[TMP39]], align 4
1181 ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1182 ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = load float, float* [[TMP41]], align 4
1183 ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = fadd fast float [[S]], 1.000000e+00
1184 ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = fadd fast float [[TMP43]], [[TMP40]]
1185 ; UNROLL-NO-IC-NEXT: [[TMP45]] = fadd fast float [[TMP44]], [[TMP42]]
1186 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1187 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1188 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1189 ; UNROLL-NO-IC: for.end:
1190 ; UNROLL-NO-IC-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP45]], [[FOR_BODY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ]
1191 ; UNROLL-NO-IC-NEXT: ret float [[S_LCSSA]]
1193 ; INTERLEAVE-LABEL: @scalarize_induction_variable_02(
1194 ; INTERLEAVE-NEXT: entry:
1195 ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1196 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1197 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
1198 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1199 ; INTERLEAVE: vector.ph:
1200 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1201 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1202 ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7
1203 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1204 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]
1205 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]]
1206 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
1207 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
1208 ; INTERLEAVE: vector.body:
1209 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1210 ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
1211 ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
1212 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
1213 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 32
1214 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
1215 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1216 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP6]] to <32 x float>*
1217 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP7]] to <32 x float>*
1218 ; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <32 x float>, <32 x float>* [[TMP8]], align 4
1219 ; INTERLEAVE-NEXT: [[WIDE_VEC2:%.*]] = load <32 x float>, <32 x float>* [[TMP9]], align 4
1220 ; INTERLEAVE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x float> [[WIDE_VEC]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1221 ; INTERLEAVE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x float> [[WIDE_VEC2]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1222 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
1223 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1224 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP10]] to <32 x float>*
1225 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP11]] to <32 x float>*
1226 ; INTERLEAVE-NEXT: [[WIDE_VEC4:%.*]] = load <32 x float>, <32 x float>* [[TMP12]], align 4
1227 ; INTERLEAVE-NEXT: [[WIDE_VEC5:%.*]] = load <32 x float>, <32 x float>* [[TMP13]], align 4
1228 ; INTERLEAVE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1229 ; INTERLEAVE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC5]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1230 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1231 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1232 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[STRIDED_VEC]]
1233 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[STRIDED_VEC3]]
1234 ; INTERLEAVE-NEXT: [[TMP18]] = fadd fast <4 x float> [[TMP16]], [[STRIDED_VEC6]]
1235 ; INTERLEAVE-NEXT: [[TMP19]] = fadd fast <4 x float> [[TMP17]], [[STRIDED_VEC7]]
1236 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1237 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1238 ; INTERLEAVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1239 ; INTERLEAVE: middle.block:
1240 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP18]]
1241 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]])
1242 ; INTERLEAVE-NEXT: br label [[SCALAR_PH]]
1243 ; INTERLEAVE: scalar.ph:
1244 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1245 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1246 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
1247 ; INTERLEAVE: for.body:
1248 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1249 ; INTERLEAVE-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
1250 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1251 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
1252 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1253 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = load float, float* [[TMP24]], align 4
1254 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00
1255 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]]
1256 ; INTERLEAVE-NEXT: [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]]
1257 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1258 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1259 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP14:![0-9]+]]
1260 ; INTERLEAVE: for.end:
1261 ; INTERLEAVE-NEXT: ret float [[TMP28]]
1267 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1268 %s = phi float [ 0.0, %entry ], [ %6, %for.body ]
1269 %0 = getelementptr inbounds float, float* %a, i64 %i
1270 %1 = load float, float* %0, align 4
1271 %2 = getelementptr inbounds float, float* %b, i64 %i
1272 %3 = load float, float* %2, align 4
1273 %4 = fadd fast float %s, 1.0
1274 %5 = fadd fast float %4, %1
1275 %6 = fadd fast float %5, %3
1276 %i.next = add nuw nsw i64 %i, 8
1277 %cond = icmp slt i64 %i.next, %n
1278 br i1 %cond, label %for.body, label %for.end
1281 %s.lcssa = phi float [ %6, %for.body ]
1285 ; Make sure we scalarize the step vectors used for the pointer arithmetic. We
1286 ; can't easily simplify vectorized step vectors. (Interleaved accesses.)
1288 ; for (int i = 0; i < n; ++i)
1292 %pair.i32 = type { i32, i32 }
1293 define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) {
1294 ; CHECK-LABEL: @scalarize_induction_variable_03(
1295 ; CHECK-NEXT: entry:
1296 ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1297 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1298 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1300 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
1301 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
1302 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
1303 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1304 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1305 ; CHECK: vector.body:
1306 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1307 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1308 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1309 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1
1310 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1311 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8
1312 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 8
1313 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
1314 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
1315 ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[TMP7]], [[BROADCAST_SPLAT]]
1316 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
1317 ; CHECK-NEXT: store i32 [[TMP9]], i32* [[TMP2]], align 8
1318 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
1319 ; CHECK-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 8
1320 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1321 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1322 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1323 ; CHECK: middle.block:
1324 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1325 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1327 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1328 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1330 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1331 ; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1332 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[F]], align 8
1333 ; CHECK-NEXT: [[TMP13:%.*]] = xor i32 [[TMP12]], [[Y]]
1334 ; CHECK-NEXT: store i32 [[TMP13]], i32* [[F]], align 8
1335 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1336 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1337 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1339 ; CHECK-NEXT: ret void
1341 ; IND-LABEL: @scalarize_induction_variable_03(
1343 ; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1344 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1345 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1347 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
1348 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
1349 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1350 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
1352 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1353 ; IND-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1
1354 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1355 ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1
1356 ; IND-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 8
1357 ; IND-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8
1358 ; IND-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0
1359 ; IND-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1
1360 ; IND-NEXT: [[TMP7:%.*]] = xor <2 x i32> [[TMP6]], [[BROADCAST_SPLAT]]
1361 ; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i64 0
1362 ; IND-NEXT: store i32 [[TMP8]], i32* [[TMP1]], align 8
1363 ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i64 1
1364 ; IND-NEXT: store i32 [[TMP9]], i32* [[TMP2]], align 8
1365 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1366 ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1367 ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1368 ; IND: middle.block:
1369 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1370 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1372 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1373 ; IND-NEXT: br label [[FOR_BODY:%.*]]
1375 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1376 ; IND-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1377 ; IND-NEXT: [[TMP11:%.*]] = load i32, i32* [[F]], align 8
1378 ; IND-NEXT: [[TMP12:%.*]] = xor i32 [[TMP11]], [[Y]]
1379 ; IND-NEXT: store i32 [[TMP12]], i32* [[F]], align 8
1380 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1381 ; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1382 ; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1384 ; IND-NEXT: ret void
1386 ; UNROLL-LABEL: @scalarize_induction_variable_03(
1387 ; UNROLL-NEXT: entry:
1388 ; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1389 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
1390 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1391 ; UNROLL: vector.ph:
1392 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
1393 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
1394 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1395 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0
1396 ; UNROLL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
1397 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
1398 ; UNROLL: vector.body:
1399 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1400 ; UNROLL-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1
1401 ; UNROLL-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2
1402 ; UNROLL-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3
1403 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1404 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1
1405 ; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1406 ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1407 ; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 8
1408 ; UNROLL-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8
1409 ; UNROLL-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0
1410 ; UNROLL-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i64 1
1411 ; UNROLL-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP5]], align 8
1412 ; UNROLL-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8
1413 ; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0
1414 ; UNROLL-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1
1415 ; UNROLL-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
1416 ; UNROLL-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP14]], [[BROADCAST_SPLAT2]]
1417 ; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP15]], i64 0
1418 ; UNROLL-NEXT: store i32 [[TMP17]], i32* [[TMP3]], align 8
1419 ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP15]], i64 1
1420 ; UNROLL-NEXT: store i32 [[TMP18]], i32* [[TMP4]], align 8
1421 ; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i64 0
1422 ; UNROLL-NEXT: store i32 [[TMP19]], i32* [[TMP5]], align 8
1423 ; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP16]], i64 1
1424 ; UNROLL-NEXT: store i32 [[TMP20]], i32* [[TMP6]], align 8
1425 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1426 ; UNROLL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1427 ; UNROLL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1428 ; UNROLL: middle.block:
1429 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1430 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1431 ; UNROLL: scalar.ph:
1432 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1433 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
1435 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1436 ; UNROLL-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1437 ; UNROLL-NEXT: [[TMP22:%.*]] = load i32, i32* [[F]], align 8
1438 ; UNROLL-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], [[Y]]
1439 ; UNROLL-NEXT: store i32 [[TMP23]], i32* [[F]], align 8
1440 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1441 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1442 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1444 ; UNROLL-NEXT: ret void
1446 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_03(
1447 ; UNROLL-NO-IC-NEXT: entry:
1448 ; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1449 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
1450 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1451 ; UNROLL-NO-IC: vector.ph:
1452 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
1453 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
1454 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
1455 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1456 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i32 0
1457 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
1458 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
1459 ; UNROLL-NO-IC: vector.body:
1460 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1461 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1462 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1463 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
1464 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
1465 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1
1466 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1467 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1468 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1
1469 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8
1470 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 8
1471 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
1472 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1
1473 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8
1474 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP7]], align 8
1475 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
1476 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1
1477 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP11]], [[BROADCAST_SPLAT]]
1478 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = xor <2 x i32> [[TMP15]], [[BROADCAST_SPLAT2]]
1479 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
1480 ; UNROLL-NO-IC-NEXT: store i32 [[TMP18]], i32* [[TMP4]], align 8
1481 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
1482 ; UNROLL-NO-IC-NEXT: store i32 [[TMP19]], i32* [[TMP5]], align 8
1483 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
1484 ; UNROLL-NO-IC-NEXT: store i32 [[TMP20]], i32* [[TMP6]], align 8
1485 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
1486 ; UNROLL-NO-IC-NEXT: store i32 [[TMP21]], i32* [[TMP7]], align 8
1487 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1488 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1489 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1490 ; UNROLL-NO-IC: middle.block:
1491 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1492 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1493 ; UNROLL-NO-IC: scalar.ph:
1494 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1495 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
1496 ; UNROLL-NO-IC: for.body:
1497 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1498 ; UNROLL-NO-IC-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1499 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, i32* [[F]], align 8
1500 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = xor i32 [[TMP23]], [[Y]]
1501 ; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], i32* [[F]], align 8
1502 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1503 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1504 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1505 ; UNROLL-NO-IC: for.end:
1506 ; UNROLL-NO-IC-NEXT: ret void
1508 ; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
1509 ; INTERLEAVE-NEXT: entry:
1510 ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1511 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 9
1512 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1513 ; INTERLEAVE: vector.ph:
1514 ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 7
1515 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1516 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 8, i64 [[N_MOD_VF]]
1517 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]]
1518 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
1519 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1520 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
1521 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
1522 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
1523 ; INTERLEAVE: vector.body:
1524 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1525 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1
1526 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2
1527 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3
1528 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 4
1529 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 5
1530 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 6
1531 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 7
1532 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1533 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1534 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1
1535 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1
1536 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1
1537 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP6]], i32 1
1538 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP7]], i32 1
1539 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1540 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
1541 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP13]] to <8 x i32>*
1542 ; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP17]], align 8
1543 ; INTERLEAVE-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP18]], align 8
1544 ; INTERLEAVE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1545 ; INTERLEAVE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1546 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
1547 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <4 x i32> [[STRIDED_VEC2]], [[BROADCAST_SPLAT4]]
1548 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP19]], i64 0
1549 ; INTERLEAVE-NEXT: store i32 [[TMP21]], i32* [[TMP9]], align 8
1550 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP19]], i64 1
1551 ; INTERLEAVE-NEXT: store i32 [[TMP22]], i32* [[TMP10]], align 8
1552 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP19]], i64 2
1553 ; INTERLEAVE-NEXT: store i32 [[TMP23]], i32* [[TMP11]], align 8
1554 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP19]], i64 3
1555 ; INTERLEAVE-NEXT: store i32 [[TMP24]], i32* [[TMP12]], align 8
1556 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i64 0
1557 ; INTERLEAVE-NEXT: store i32 [[TMP25]], i32* [[TMP13]], align 8
1558 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP20]], i64 1
1559 ; INTERLEAVE-NEXT: store i32 [[TMP26]], i32* [[TMP14]], align 8
1560 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP20]], i64 2
1561 ; INTERLEAVE-NEXT: store i32 [[TMP27]], i32* [[TMP15]], align 8
1562 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP20]], i64 3
1563 ; INTERLEAVE-NEXT: store i32 [[TMP28]], i32* [[TMP16]], align 8
1564 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1565 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1566 ; INTERLEAVE-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1567 ; INTERLEAVE: middle.block:
1568 ; INTERLEAVE-NEXT: br label [[SCALAR_PH]]
1569 ; INTERLEAVE: scalar.ph:
1570 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1571 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
1572 ; INTERLEAVE: for.body:
1573 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1574 ; INTERLEAVE-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1575 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = load i32, i32* [[F]], align 8
1576 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = xor i32 [[TMP30]], [[Y]]
1577 ; INTERLEAVE-NEXT: store i32 [[TMP31]], i32* [[F]], align 8
1578 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1579 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1580 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP16:![0-9]+]]
1581 ; INTERLEAVE: for.end:
1582 ; INTERLEAVE-NEXT: ret void
1588 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
1589 %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
1590 %0 = load i32, i32* %f, align 8
1592 store i32 %1, i32* %f, align 8
1593 %i.next = add nuw nsw i64 %i, 1
1594 %cond = icmp slt i64 %i.next, %n
1595 br i1 %cond, label %for.body, label %for.end
1601 ; Make sure we scalarize the step vectors used for the pointer arithmetic. We
1602 ; can't easily simplify vectorized step vectors. (Interleaved accesses.)
1604 ; for (int i = 0; i < n; ++i)
1608 define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) {
1609 ; CHECK-LABEL: @scalarize_induction_variable_04(
1610 ; CHECK-NEXT: entry:
1611 ; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
1612 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1613 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1614 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1615 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
1616 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1617 ; CHECK: vector.memcheck:
1618 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1619 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
1620 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
1621 ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1622 ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1623 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]]
1624 ; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8*
1625 ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1626 ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1
1627 ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]]
1628 ; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
1629 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
1630 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]]
1631 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1632 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1634 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
1635 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1636 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1637 ; CHECK: vector.body:
1638 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1639 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1640 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
1641 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
1642 ; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1643 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
1644 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
1645 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
1646 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1647 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17
1648 ; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17
1649 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1650 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1651 ; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17
1652 ; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17
1653 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1654 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1655 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1656 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1657 ; CHECK: middle.block:
1658 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1659 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1661 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1662 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1664 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1665 ; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i64 [[I]], 2
1666 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1667 ; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1
1668 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1669 ; CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP23]], align 1
1670 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1671 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
1672 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
1673 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1675 ; CHECK-NEXT: ret void
1677 ; IND-LABEL: @scalarize_induction_variable_04(
1679 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1680 ; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1681 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1682 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
1683 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1684 ; IND: vector.memcheck:
1685 ; IND-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1686 ; IND-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
1687 ; IND-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1688 ; IND-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1689 ; IND-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1690 ; IND-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1
1691 ; IND-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1692 ; IND-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1693 ; IND-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1694 ; IND-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1695 ; IND-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1696 ; IND-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1698 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
1699 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
1701 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1702 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1703 ; IND-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1
1704 ; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1705 ; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0
1706 ; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
1707 ; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1
1708 ; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1709 ; IND-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17
1710 ; IND-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17
1711 ; IND-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1712 ; IND-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1713 ; IND-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17
1714 ; IND-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17
1715 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1716 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1717 ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1718 ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1719 ; IND: middle.block:
1720 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1721 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1723 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1724 ; IND-NEXT: br label [[FOR_BODY:%.*]]
1726 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1727 ; IND-NEXT: [[TMP20:%.*]] = shl nsw i64 [[I]], 2
1728 ; IND-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1729 ; IND-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1
1730 ; IND-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1731 ; IND-NEXT: store i32 [[TMP22]], i32* [[TMP23]], align 1
1732 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1733 ; IND-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
1734 ; IND-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
1735 ; IND-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1737 ; IND-NEXT: ret void
1739 ; UNROLL-LABEL: @scalarize_induction_variable_04(
1740 ; UNROLL-NEXT: entry:
1741 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1742 ; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1743 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1744 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
1745 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1746 ; UNROLL: vector.memcheck:
1747 ; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1748 ; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
1749 ; UNROLL-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1750 ; UNROLL-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1751 ; UNROLL-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1752 ; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1
1753 ; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1754 ; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1755 ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1756 ; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1757 ; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1758 ; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1759 ; UNROLL: vector.ph:
1760 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
1761 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
1762 ; UNROLL: vector.body:
1763 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1764 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1765 ; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1
1766 ; UNROLL-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 2
1767 ; UNROLL-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 3
1768 ; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1769 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1770 ; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
1771 ; UNROLL-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0
1772 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1773 ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1
1774 ; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1775 ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i64 0
1776 ; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1777 ; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i64 1
1778 ; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1779 ; UNROLL-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17
1780 ; UNROLL-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17
1781 ; UNROLL-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17
1782 ; UNROLL-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17
1783 ; UNROLL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1784 ; UNROLL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1785 ; UNROLL-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1
1786 ; UNROLL-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1787 ; UNROLL-NEXT: store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17
1788 ; UNROLL-NEXT: store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17
1789 ; UNROLL-NEXT: store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17
1790 ; UNROLL-NEXT: store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17
1791 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1792 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
1793 ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1794 ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1795 ; UNROLL: middle.block:
1796 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1797 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1798 ; UNROLL: scalar.ph:
1799 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1800 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
1802 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1803 ; UNROLL-NEXT: [[TMP31:%.*]] = shl nsw i64 [[I]], 2
1804 ; UNROLL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
1805 ; UNROLL-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1
1806 ; UNROLL-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1807 ; UNROLL-NEXT: store i32 [[TMP33]], i32* [[TMP34]], align 1
1808 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1809 ; UNROLL-NEXT: [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32
1810 ; UNROLL-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1811 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1813 ; UNROLL-NEXT: ret void
1815 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_04(
1816 ; UNROLL-NO-IC-NEXT: entry:
1817 ; UNROLL-NO-IC-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
1818 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1819 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1820 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1821 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1822 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1823 ; UNROLL-NO-IC: vector.memcheck:
1824 ; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1825 ; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
1826 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
1827 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1828 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1829 ; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]]
1830 ; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8*
1831 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1832 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1
1833 ; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]]
1834 ; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
1835 ; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
1836 ; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]]
1837 ; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1838 ; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1839 ; UNROLL-NO-IC: vector.ph:
1840 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1841 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1842 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
1843 ; UNROLL-NO-IC: vector.body:
1844 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1845 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1846 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1847 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
1848 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
1849 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
1850 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3
1851 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1852 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
1853 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
1854 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1855 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
1856 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1857 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
1858 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1859 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
1860 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1861 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17
1862 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17
1863 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17
1864 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17
1865 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1866 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1867 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1
1868 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1869 ; UNROLL-NO-IC-NEXT: store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17
1870 ; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17
1871 ; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17
1872 ; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17
1873 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1874 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
1875 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1876 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1877 ; UNROLL-NO-IC: middle.block:
1878 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1879 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1880 ; UNROLL-NO-IC: scalar.ph:
1881 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1882 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
1883 ; UNROLL-NO-IC: for.body:
1884 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1885 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = shl nsw i64 [[I]], 2
1886 ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
1887 ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1
1888 ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1889 ; UNROLL-NO-IC-NEXT: store i32 [[TMP33]], i32* [[TMP34]], align 1
1890 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1891 ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32
1892 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1893 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1894 ; UNROLL-NO-IC: for.end:
1895 ; UNROLL-NO-IC-NEXT: ret void
1897 ; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
1898 ; INTERLEAVE-NEXT: entry:
1899 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1900 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1901 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1902 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
1903 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1904 ; INTERLEAVE: vector.memcheck:
1905 ; INTERLEAVE-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1906 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
1907 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1908 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1909 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1910 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1
1911 ; INTERLEAVE-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1912 ; INTERLEAVE-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1913 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1914 ; INTERLEAVE-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1915 ; INTERLEAVE-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1916 ; INTERLEAVE-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1917 ; INTERLEAVE: vector.ph:
1918 ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7
1919 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1920 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 8, i64 [[N_MOD_VF]]
1921 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP10]]
1922 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
1923 ; INTERLEAVE: vector.body:
1924 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1925 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1
1926 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2
1927 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 3
1928 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 4
1929 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 5
1930 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 6
1931 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 7
1932 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = shl nsw i64 [[INDEX]], 2
1933 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP14]], 2
1934 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1935 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
1936 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP20]] to <16 x i32>*
1937 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP21]] to <16 x i32>*
1938 ; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP22]], align 1
1939 ; INTERLEAVE-NEXT: [[WIDE_VEC7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP23]], align 1
1940 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1941 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1942 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP12]], i32 1
1943 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP13]], i32 1
1944 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP14]], i32 1
1945 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP15]], i32 1
1946 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP16]], i32 1
1947 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP17]], i32 1
1948 ; INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 0
1949 ; INTERLEAVE-NEXT: store i32 [[TMP32]], i32* [[TMP24]], align 1, !alias.scope !17, !noalias !20
1950 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 4
1951 ; INTERLEAVE-NEXT: store i32 [[TMP33]], i32* [[TMP25]], align 1, !alias.scope !17, !noalias !20
1952 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 8
1953 ; INTERLEAVE-NEXT: store i32 [[TMP34]], i32* [[TMP26]], align 1, !alias.scope !17, !noalias !20
1954 ; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 12
1955 ; INTERLEAVE-NEXT: store i32 [[TMP35]], i32* [[TMP27]], align 1, !alias.scope !17, !noalias !20
1956 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 0
1957 ; INTERLEAVE-NEXT: store i32 [[TMP36]], i32* [[TMP28]], align 1, !alias.scope !17, !noalias !20
1958 ; INTERLEAVE-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 4
1959 ; INTERLEAVE-NEXT: store i32 [[TMP37]], i32* [[TMP29]], align 1, !alias.scope !17, !noalias !20
1960 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 8
1961 ; INTERLEAVE-NEXT: store i32 [[TMP38]], i32* [[TMP30]], align 1, !alias.scope !17, !noalias !20
1962 ; INTERLEAVE-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 12
1963 ; INTERLEAVE-NEXT: store i32 [[TMP39]], i32* [[TMP31]], align 1, !alias.scope !17, !noalias !20
1964 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1965 ; INTERLEAVE-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1966 ; INTERLEAVE-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1967 ; INTERLEAVE: middle.block:
1968 ; INTERLEAVE-NEXT: br label [[SCALAR_PH]]
1969 ; INTERLEAVE: scalar.ph:
1970 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1971 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
1972 ; INTERLEAVE: for.body:
1973 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1974 ; INTERLEAVE-NEXT: [[TMP41:%.*]] = shl nsw i64 [[I]], 2
1975 ; INTERLEAVE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP41]]
1976 ; INTERLEAVE-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 1
1977 ; INTERLEAVE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1978 ; INTERLEAVE-NEXT: store i32 [[TMP43]], i32* [[TMP44]], align 1
1979 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1980 ; INTERLEAVE-NEXT: [[TMP45:%.*]] = trunc i64 [[I_NEXT]] to i32
1981 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP45]], [[N]]
1982 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1983 ; INTERLEAVE: for.end:
1984 ; INTERLEAVE-NEXT: ret void
1990 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
1991 %0 = shl nsw i64 %i, 2
1992 %1 = getelementptr inbounds i32, i32* %a, i64 %0
1993 %2 = load i32, i32* %1, align 1
1994 %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
1995 store i32 %2, i32* %3, align 1
1996 %i.next = add nuw nsw i64 %i, 1
1997 %4 = trunc i64 %i.next to i32
1998 %cond = icmp eq i32 %4, %n
1999 br i1 %cond, label %for.end, label %for.body
2005 ; PR30542. Ensure we generate all the scalar steps for the induction variable.
2006 ; The scalar induction variable is used by a getelementptr instruction
2007 ; (uniform), and a udiv (non-uniform).
2010 ; for (int i = 0; i < n; ++i) {
2021 define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) {
2022 ; CHECK-LABEL: @scalarize_induction_variable_05(
2023 ; CHECK-NEXT: entry:
2024 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2025 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2
2026 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2028 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2
2029 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
2030 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
2031 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
2032 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2033 ; CHECK: vector.body:
2034 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
2035 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ]
2036 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
2037 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
2038 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
2039 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
2040 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
2041 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
2042 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2043 ; CHECK: pred.udiv.if:
2044 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
2045 ; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP0]]
2046 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0
2047 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
2048 ; CHECK: pred.udiv.continue:
2049 ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
2050 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
2051 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
2052 ; CHECK: pred.udiv.if1:
2053 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1
2054 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
2055 ; CHECK-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2056 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1
2057 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]]
2058 ; CHECK: pred.udiv.continue2:
2059 ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF1]] ]
2060 ; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
2061 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]]
2062 ; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2063 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2064 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2065 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2066 ; CHECK: middle.block:
2067 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP16]])
2068 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2069 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2071 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2072 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
2073 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2075 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2076 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2077 ; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]]
2078 ; CHECK-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2079 ; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2081 ; CHECK-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2082 ; CHECK-NEXT: br label [[IF_END]]
2084 ; CHECK-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2085 ; CHECK-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2086 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2087 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2088 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2090 ; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
2091 ; CHECK-NEXT: ret i32 [[VAR5]]
2093 ; IND-LABEL: @scalarize_induction_variable_05(
2095 ; IND-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2096 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2
2097 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2099 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646
2100 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
2101 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
2103 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
2104 ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[PRED_UDIV_CONTINUE2]] ]
2105 ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
2106 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
2107 ; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
2108 ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
2109 ; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2110 ; IND: pred.udiv.if:
2111 ; IND-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0
2112 ; IND-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]]
2113 ; IND-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
2114 ; IND-NEXT: br label [[PRED_UDIV_CONTINUE]]
2115 ; IND: pred.udiv.continue:
2116 ; IND-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
2117 ; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
2118 ; IND: pred.udiv.if1:
2119 ; IND-NEXT: [[TMP7:%.*]] = or i32 [[INDEX]], 1
2120 ; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
2121 ; IND-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]]
2122 ; IND-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP9]], i64 1
2123 ; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]]
2124 ; IND: pred.udiv.continue2:
2125 ; IND-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ]
2126 ; IND-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
2127 ; IND-NEXT: [[TMP13:%.*]] = shufflevector <2 x i1> [[TMP12]], <2 x i1> poison, <2 x i32> zeroinitializer
2128 ; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP13]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]]
2129 ; IND-NEXT: [[TMP14]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2130 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2131 ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2132 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2133 ; IND: middle.block:
2134 ; IND-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP14]])
2135 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2136 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2138 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2139 ; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2140 ; IND-NEXT: br label [[FOR_BODY:%.*]]
2142 ; IND-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2143 ; IND-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2144 ; IND-NEXT: [[TMP17:%.*]] = zext i32 [[I]] to i64
2145 ; IND-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
2146 ; IND-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2147 ; IND-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2149 ; IND-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2150 ; IND-NEXT: br label [[IF_END]]
2152 ; IND-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2153 ; IND-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2154 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2155 ; IND-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2156 ; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2158 ; IND-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
2159 ; IND-NEXT: ret i32 [[VAR5]]
2161 ; UNROLL-LABEL: @scalarize_induction_variable_05(
2162 ; UNROLL-NEXT: entry:
2163 ; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2164 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4
2165 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2166 ; UNROLL: vector.ph:
2167 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644
2168 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
2169 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
2170 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
2171 ; UNROLL: vector.body:
2172 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE10:%.*]] ]
2173 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UDIV_CONTINUE10]] ]
2174 ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UDIV_CONTINUE10]] ]
2175 ; UNROLL-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 2
2176 ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
2177 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
2178 ; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
2179 ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
2180 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
2181 ; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
2182 ; UNROLL-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4
2183 ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2184 ; UNROLL: pred.udiv.if:
2185 ; UNROLL-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0
2186 ; UNROLL-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]]
2187 ; UNROLL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0
2188 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE]]
2189 ; UNROLL: pred.udiv.continue:
2190 ; UNROLL-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
2191 ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
2192 ; UNROLL: pred.udiv.if3:
2193 ; UNROLL-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1
2194 ; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
2195 ; UNROLL-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2196 ; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i64 1
2197 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE4]]
2198 ; UNROLL: pred.udiv.continue4:
2199 ; UNROLL-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF3]] ]
2200 ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
2201 ; UNROLL: pred.udiv.if7:
2202 ; UNROLL-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 0
2203 ; UNROLL-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP0]]
2204 ; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0
2205 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE8]]
2206 ; UNROLL: pred.udiv.continue8:
2207 ; UNROLL-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP17]], [[PRED_UDIV_IF7]] ]
2208 ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10]]
2209 ; UNROLL: pred.udiv.if9:
2210 ; UNROLL-NEXT: [[TMP19:%.*]] = or i32 [[INDEX]], 3
2211 ; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 1
2212 ; UNROLL-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP19]]
2213 ; UNROLL-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP21]], i64 1
2214 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE10]]
2215 ; UNROLL: pred.udiv.continue10:
2216 ; UNROLL-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP22]], [[PRED_UDIV_IF9]] ]
2217 ; UNROLL-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
2218 ; UNROLL-NEXT: [[TMP25:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> poison, <2 x i32> zeroinitializer
2219 ; UNROLL-NEXT: [[TMP26:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT5]], <i1 true, i1 poison>
2220 ; UNROLL-NEXT: [[TMP27:%.*]] = shufflevector <2 x i1> [[TMP26]], <2 x i1> poison, <2 x i32> zeroinitializer
2221 ; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]]
2222 ; UNROLL-NEXT: [[PREDPHI11:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP23]]
2223 ; UNROLL-NEXT: [[TMP28]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2224 ; UNROLL-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI11]], [[VEC_PHI1]]
2225 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2226 ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2227 ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2228 ; UNROLL: middle.block:
2229 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP29]], [[TMP28]]
2230 ; UNROLL-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2231 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2232 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2233 ; UNROLL: scalar.ph:
2234 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2235 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2236 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
2238 ; UNROLL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2239 ; UNROLL-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2240 ; UNROLL-NEXT: [[TMP32:%.*]] = zext i32 [[I]] to i64
2241 ; UNROLL-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP32]]
2242 ; UNROLL-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2243 ; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2245 ; UNROLL-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2246 ; UNROLL-NEXT: br label [[IF_END]]
2248 ; UNROLL-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2249 ; UNROLL-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2250 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2251 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2252 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2254 ; UNROLL-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
2255 ; UNROLL-NEXT: ret i32 [[VAR5]]
2257 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_05(
2258 ; UNROLL-NO-IC-NEXT: entry:
2259 ; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2260 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4
2261 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2262 ; UNROLL-NO-IC: vector.ph:
2263 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4
2264 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
2265 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
2266 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
2267 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i32 0
2268 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT5]], <2 x i1> poison, <2 x i32> zeroinitializer
2269 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
2270 ; UNROLL-NO-IC: vector.body:
2271 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE10:%.*]] ]
2272 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE10]] ]
2273 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE10]] ]
2274 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
2275 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2
2276 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
2277 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP1]]
2278 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
2279 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
2280 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4
2281 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2
2282 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
2283 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4
2284 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
2285 ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2286 ; UNROLL-NO-IC: pred.udiv.if:
2287 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
2288 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], [[TMP0]]
2289 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
2290 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
2291 ; UNROLL-NO-IC: pred.udiv.continue:
2292 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_UDIV_IF]] ]
2293 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
2294 ; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
2295 ; UNROLL-NO-IC: pred.udiv.if3:
2296 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 1
2297 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
2298 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP14]]
2299 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP16]], i32 1
2300 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE4]]
2301 ; UNROLL-NO-IC: pred.udiv.continue4:
2302 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF3]] ]
2303 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT6]], i32 0
2304 ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
2305 ; UNROLL-NO-IC: pred.udiv.if7:
2306 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 0
2307 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP1]]
2308 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
2309 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]]
2310 ; UNROLL-NO-IC: pred.udiv.continue8:
2311 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP22]], [[PRED_UDIV_IF7]] ]
2312 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT6]], i32 1
2313 ; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10]]
2314 ; UNROLL-NO-IC: pred.udiv.if9:
2315 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 3
2316 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 1
2317 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = udiv i32 [[TMP26]], [[TMP25]]
2318 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP27]], i32 1
2319 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE10]]
2320 ; UNROLL-NO-IC: pred.udiv.continue10:
2321 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP28]], [[PRED_UDIV_IF9]] ]
2322 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
2323 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT6]], <i1 true, i1 true>
2324 ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]]
2325 ; UNROLL-NO-IC-NEXT: [[PREDPHI11:%.*]] = select <2 x i1> [[TMP31]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP29]]
2326 ; UNROLL-NO-IC-NEXT: [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2327 ; UNROLL-NO-IC-NEXT: [[TMP33]] = add <2 x i32> [[PREDPHI11]], [[VEC_PHI1]]
2328 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2329 ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2330 ; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2331 ; UNROLL-NO-IC: middle.block:
2332 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP33]], [[TMP32]]
2333 ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2334 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2335 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2336 ; UNROLL-NO-IC: scalar.ph:
2337 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2338 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
2339 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
2340 ; UNROLL-NO-IC: for.body:
2341 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2342 ; UNROLL-NO-IC-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2343 ; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]]
2344 ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2345 ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2346 ; UNROLL-NO-IC: if.then:
2347 ; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2348 ; UNROLL-NO-IC-NEXT: br label [[IF_END]]
2349 ; UNROLL-NO-IC: if.end:
2350 ; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2351 ; UNROLL-NO-IC-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2352 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2353 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2354 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2355 ; UNROLL-NO-IC: for.end:
2356 ; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
2357 ; UNROLL-NO-IC-NEXT: ret i32 [[VAR5]]
2359 ; INTERLEAVE-LABEL: @scalarize_induction_variable_05(
2360 ; INTERLEAVE-NEXT: entry:
2361 ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2362 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 8
2363 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2364 ; INTERLEAVE: vector.ph:
2365 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640
2366 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
2367 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
2368 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
2369 ; INTERLEAVE: vector.body:
2370 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE18:%.*]] ]
2371 ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UDIV_CONTINUE18]] ]
2372 ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UDIV_CONTINUE18]] ]
2373 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 4
2374 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
2375 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
2376 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
2377 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
2378 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
2379 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
2380 ; INTERLEAVE-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
2381 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2382 ; INTERLEAVE: pred.udiv.if:
2383 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0
2384 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]]
2385 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
2386 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE]]
2387 ; INTERLEAVE: pred.udiv.continue:
2388 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
2389 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
2390 ; INTERLEAVE: pred.udiv.if3:
2391 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1
2392 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1
2393 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2394 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP12]], i64 1
2395 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE4]]
2396 ; INTERLEAVE: pred.udiv.continue4:
2397 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF3]] ]
2398 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
2399 ; INTERLEAVE: pred.udiv.if5:
2400 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i32 [[INDEX]], 2
2401 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2
2402 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = udiv i32 [[TMP16]], [[TMP15]]
2403 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP17]], i64 2
2404 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE6]]
2405 ; INTERLEAVE: pred.udiv.continue6:
2406 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP18]], [[PRED_UDIV_IF5]] ]
2407 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
2408 ; INTERLEAVE: pred.udiv.if7:
2409 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = or i32 [[INDEX]], 3
2410 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
2411 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = udiv i32 [[TMP21]], [[TMP20]]
2412 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP22]], i64 3
2413 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE8]]
2414 ; INTERLEAVE: pred.udiv.continue8:
2415 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP23]], [[PRED_UDIV_IF7]] ]
2416 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
2417 ; INTERLEAVE: pred.udiv.if11:
2418 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 0
2419 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = udiv i32 [[TMP25]], [[TMP0]]
2420 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i64 0
2421 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE12]]
2422 ; INTERLEAVE: pred.udiv.continue12:
2423 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP27]], [[PRED_UDIV_IF11]] ]
2424 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
2425 ; INTERLEAVE: pred.udiv.if13:
2426 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = or i32 [[INDEX]], 5
2427 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 1
2428 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = udiv i32 [[TMP30]], [[TMP29]]
2429 ; INTERLEAVE-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i64 1
2430 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE14]]
2431 ; INTERLEAVE: pred.udiv.continue14:
2432 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP32]], [[PRED_UDIV_IF13]] ]
2433 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]]
2434 ; INTERLEAVE: pred.udiv.if15:
2435 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = or i32 [[INDEX]], 6
2436 ; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 2
2437 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = udiv i32 [[TMP35]], [[TMP34]]
2438 ; INTERLEAVE-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 2
2439 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE16]]
2440 ; INTERLEAVE: pred.udiv.continue16:
2441 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP37]], [[PRED_UDIV_IF15]] ]
2442 ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18]]
2443 ; INTERLEAVE: pred.udiv.if17:
2444 ; INTERLEAVE-NEXT: [[TMP39:%.*]] = or i32 [[INDEX]], 7
2445 ; INTERLEAVE-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3
2446 ; INTERLEAVE-NEXT: [[TMP41:%.*]] = udiv i32 [[TMP40]], [[TMP39]]
2447 ; INTERLEAVE-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 3
2448 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE18]]
2449 ; INTERLEAVE: pred.udiv.continue18:
2450 ; INTERLEAVE-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP42]], [[PRED_UDIV_IF17]] ]
2451 ; INTERLEAVE-NEXT: [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison, i1 poison, i1 poison>
2452 ; INTERLEAVE-NEXT: [[TMP45:%.*]] = shufflevector <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i32> zeroinitializer
2453 ; INTERLEAVE-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT9]], <i1 true, i1 poison, i1 poison, i1 poison>
2454 ; INTERLEAVE-NEXT: [[TMP47:%.*]] = shufflevector <4 x i1> [[TMP46]], <4 x i1> poison, <4 x i32> zeroinitializer
2455 ; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP24]]
2456 ; INTERLEAVE-NEXT: [[PREDPHI19:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[TMP43]]
2457 ; INTERLEAVE-NEXT: [[TMP48]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]]
2458 ; INTERLEAVE-NEXT: [[TMP49]] = add <4 x i32> [[PREDPHI19]], [[VEC_PHI1]]
2459 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2460 ; INTERLEAVE-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2461 ; INTERLEAVE-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2462 ; INTERLEAVE: middle.block:
2463 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
2464 ; INTERLEAVE-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2465 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2466 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2467 ; INTERLEAVE: scalar.ph:
2468 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2469 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2470 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
2471 ; INTERLEAVE: for.body:
2472 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2473 ; INTERLEAVE-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2474 ; INTERLEAVE-NEXT: [[TMP52:%.*]] = zext i32 [[I]] to i64
2475 ; INTERLEAVE-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
2476 ; INTERLEAVE-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2477 ; INTERLEAVE-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2478 ; INTERLEAVE: if.then:
2479 ; INTERLEAVE-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2480 ; INTERLEAVE-NEXT: br label [[IF_END]]
2481 ; INTERLEAVE: if.end:
2482 ; INTERLEAVE-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2483 ; INTERLEAVE-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2484 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2485 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2486 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2487 ; INTERLEAVE: for.end:
2488 ; INTERLEAVE-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
2489 ; INTERLEAVE-NEXT: ret i32 [[VAR5]]
2495 %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
2496 %sum = phi i32 [ 0, %entry ], [ %var4, %if.end ]
2497 %var0 = getelementptr inbounds i32, i32* %a, i32 %i
2498 %var1 = load i32, i32* %var0, align 4
2499 br i1 %c, label %if.then, label %if.end
2502 %var2 = udiv i32 %var1, %i
2506 %var3 = phi i32 [ %var2, %if.then ], [ %var1, %for.body ]
2507 %var4 = add i32 %var3, %sum
2508 %i.next = add nuw nsw i32 %i, 1
2509 %cond = icmp slt i32 %i.next, %n
2510 br i1 %cond, label %for.body, label %for.end
2513 %var5 = phi i32 [ %var4, %if.end ]
2517 ; Ensure we generate both a vector and a scalar induction variable. In this
2518 ; test, the induction variable is used by an instruction that will be
2519 ; vectorized (trunc) as well as an instruction that will remain in scalar form
2525 %pair.i16 = type { i16, i16 }
2526 define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) {
2527 ; CHECK-LABEL: @iv_vector_and_scalar_users(
2528 ; CHECK-NEXT: entry:
2529 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2530 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2531 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2532 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
2533 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2535 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
2536 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
2537 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
2538 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2539 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2540 ; CHECK: vector.body:
2541 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2542 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2543 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
2544 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
2545 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
2546 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
2547 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1
2548 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2549 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0
2550 ; CHECK-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2
2551 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
2552 ; CHECK-NEXT: store i16 [[TMP10]], i16* [[TMP8]], align 2
2553 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2554 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
2555 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2556 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2557 ; CHECK: middle.block:
2558 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2559 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2561 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2562 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2564 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2565 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[I]] to i32
2566 ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[A]], [[TMP12]]
2567 ; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
2568 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2569 ; CHECK-NEXT: store i16 [[TMP14]], i16* [[TMP15]], align 2
2570 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2571 ; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[I_NEXT]] to i32
2572 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP16]], [[N]]
2573 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2575 ; CHECK-NEXT: ret void
2577 ; IND-LABEL: @iv_vector_and_scalar_users(
2579 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2580 ; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2581 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2582 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
2583 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2585 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
2586 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
2587 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2588 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
2590 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2591 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2592 ; IND-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
2593 ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
2594 ; IND-NEXT: [[TMP5:%.*]] = trunc <2 x i32> [[TMP4]] to <2 x i16>
2595 ; IND-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2596 ; IND-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2597 ; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i64 0
2598 ; IND-NEXT: store i16 [[TMP8]], i16* [[TMP6]], align 2
2599 ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1
2600 ; IND-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2
2601 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2602 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
2603 ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2604 ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2605 ; IND: middle.block:
2606 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2607 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2609 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2610 ; IND-NEXT: br label [[FOR_BODY:%.*]]
2612 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2613 ; IND-NEXT: [[TMP11:%.*]] = trunc i64 [[I]] to i32
2614 ; IND-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[A]]
2615 ; IND-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
2616 ; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2617 ; IND-NEXT: store i16 [[TMP13]], i16* [[TMP14]], align 2
2618 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2619 ; IND-NEXT: [[TMP15:%.*]] = trunc i64 [[I_NEXT]] to i32
2620 ; IND-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP15]], [[N]]
2621 ; IND-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2623 ; IND-NEXT: ret void
2625 ; UNROLL-LABEL: @iv_vector_and_scalar_users(
2626 ; UNROLL-NEXT: entry:
2627 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2628 ; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2629 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2630 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
2631 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2632 ; UNROLL: vector.ph:
2633 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
2634 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
2635 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2636 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0
2637 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
2638 ; UNROLL: vector.body:
2639 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2640 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2641 ; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
2642 ; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2
2643 ; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3
2644 ; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
2645 ; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLATINSERT2]], <i32 2, i32 poison>
2646 ; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer
2647 ; UNROLL-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[VEC_IND]]
2648 ; UNROLL-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16>
2649 ; UNROLL-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i16>
2650 ; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2651 ; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2652 ; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2653 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2654 ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP10]], i64 0
2655 ; UNROLL-NEXT: store i16 [[TMP16]], i16* [[TMP12]], align 2
2656 ; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i64 1
2657 ; UNROLL-NEXT: store i16 [[TMP17]], i16* [[TMP13]], align 2
2658 ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP11]], i64 0
2659 ; UNROLL-NEXT: store i16 [[TMP18]], i16* [[TMP14]], align 2
2660 ; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <2 x i16> [[TMP11]], i64 1
2661 ; UNROLL-NEXT: store i16 [[TMP19]], i16* [[TMP15]], align 2
2662 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2663 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
2664 ; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2665 ; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2666 ; UNROLL: middle.block:
2667 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2668 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2669 ; UNROLL: scalar.ph:
2670 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2671 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
2673 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2674 ; UNROLL-NEXT: [[TMP21:%.*]] = trunc i64 [[I]] to i32
2675 ; UNROLL-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], [[A]]
2676 ; UNROLL-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
2677 ; UNROLL-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2678 ; UNROLL-NEXT: store i16 [[TMP23]], i16* [[TMP24]], align 2
2679 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2680 ; UNROLL-NEXT: [[TMP25:%.*]] = trunc i64 [[I_NEXT]] to i32
2681 ; UNROLL-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP25]], [[N]]
2682 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2684 ; UNROLL-NEXT: ret void
2686 ; UNROLL-NO-IC-LABEL: @iv_vector_and_scalar_users(
2687 ; UNROLL-NO-IC-NEXT: entry:
2688 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2689 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2690 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2691 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
2692 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2693 ; UNROLL-NO-IC: vector.ph:
2694 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
2695 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
2696 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
2697 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2698 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
2699 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
2700 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
2701 ; UNROLL-NO-IC: vector.body:
2702 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2703 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2704 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
2705 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
2706 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
2707 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
2708 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
2709 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
2710 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[BROADCAST_SPLAT3]], [[STEP_ADD]]
2711 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = trunc <2 x i32> [[TMP7]] to <2 x i16>
2712 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16>
2713 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1
2714 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2715 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2716 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1
2717 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP9]], i32 0
2718 ; UNROLL-NO-IC-NEXT: store i16 [[TMP15]], i16* [[TMP11]], align 2
2719 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP9]], i32 1
2720 ; UNROLL-NO-IC-NEXT: store i16 [[TMP16]], i16* [[TMP12]], align 2
2721 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0
2722 ; UNROLL-NO-IC-NEXT: store i16 [[TMP17]], i16* [[TMP13]], align 2
2723 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1
2724 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], i16* [[TMP14]], align 2
2725 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2726 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
2727 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2728 ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2729 ; UNROLL-NO-IC: middle.block:
2730 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2731 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2732 ; UNROLL-NO-IC: scalar.ph:
2733 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2734 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
2735 ; UNROLL-NO-IC: for.body:
2736 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2737 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = trunc i64 [[I]] to i32
2738 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i32 [[A]], [[TMP20]]
2739 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = trunc i32 [[TMP21]] to i16
2740 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2741 ; UNROLL-NO-IC-NEXT: store i16 [[TMP22]], i16* [[TMP23]], align 2
2742 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2743 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
2744 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
2745 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2746 ; UNROLL-NO-IC: for.end:
2747 ; UNROLL-NO-IC-NEXT: ret void
2749 ; INTERLEAVE-LABEL: @iv_vector_and_scalar_users(
2750 ; INTERLEAVE-NEXT: entry:
2751 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2752 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2753 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2754 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
2755 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2756 ; INTERLEAVE: vector.ph:
2757 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
2758 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
2759 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2760 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
2761 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
2762 ; INTERLEAVE: vector.body:
2763 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2764 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2765 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
2766 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2
2767 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3
2768 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 4
2769 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 5
2770 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 6
2771 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 7
2772 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
2773 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = add <4 x i32> [[BROADCAST_SPLATINSERT2]], <i32 4, i32 poison, i32 poison, i32 poison>
2774 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> zeroinitializer
2775 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[VEC_IND]]
2776 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i16>
2777 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP13]] to <4 x i16>
2778 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2779 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2780 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2781 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2782 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1
2783 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP7]], i32 1
2784 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP8]], i32 1
2785 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP9]], i32 1
2786 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[TMP14]], i64 0
2787 ; INTERLEAVE-NEXT: store i16 [[TMP24]], i16* [[TMP16]], align 2
2788 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[TMP14]], i64 1
2789 ; INTERLEAVE-NEXT: store i16 [[TMP25]], i16* [[TMP17]], align 2
2790 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[TMP14]], i64 2
2791 ; INTERLEAVE-NEXT: store i16 [[TMP26]], i16* [[TMP18]], align 2
2792 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[TMP14]], i64 3
2793 ; INTERLEAVE-NEXT: store i16 [[TMP27]], i16* [[TMP19]], align 2
2794 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[TMP15]], i64 0
2795 ; INTERLEAVE-NEXT: store i16 [[TMP28]], i16* [[TMP20]], align 2
2796 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[TMP15]], i64 1
2797 ; INTERLEAVE-NEXT: store i16 [[TMP29]], i16* [[TMP21]], align 2
2798 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <4 x i16> [[TMP15]], i64 2
2799 ; INTERLEAVE-NEXT: store i16 [[TMP30]], i16* [[TMP22]], align 2
2800 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <4 x i16> [[TMP15]], i64 3
2801 ; INTERLEAVE-NEXT: store i16 [[TMP31]], i16* [[TMP23]], align 2
2802 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2803 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
2804 ; INTERLEAVE-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2805 ; INTERLEAVE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2806 ; INTERLEAVE: middle.block:
2807 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2808 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2809 ; INTERLEAVE: scalar.ph:
2810 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2811 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
2812 ; INTERLEAVE: for.body:
2813 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2814 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = trunc i64 [[I]] to i32
2815 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[A]]
2816 ; INTERLEAVE-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i16
2817 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2818 ; INTERLEAVE-NEXT: store i16 [[TMP35]], i16* [[TMP36]], align 2
2819 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2820 ; INTERLEAVE-NEXT: [[TMP37:%.*]] = trunc i64 [[I_NEXT]] to i32
2821 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP37]], [[N]]
2822 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2823 ; INTERLEAVE: for.end:
2824 ; INTERLEAVE-NEXT: ret void
2830 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
2831 %0 = trunc i64 %i to i32
2833 %2 = trunc i32 %1 to i16
2834 %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1
2835 store i16 %2, i16* %3, align 2
2836 %i.next = add nuw nsw i64 %i, 1
2837 %4 = trunc i64 %i.next to i32
2838 %cond = icmp eq i32 %4, %n
2839 br i1 %cond, label %for.end, label %for.body
2845 ; Make sure that the loop exit count computation does not overflow for i8 and
2846 ; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
2847 ; induction variable to a bigger type the exit count computation will overflow
2851 define i32 @i8_loop() nounwind readnone ssp uwtable {
2852 ; CHECK-LABEL: @i8_loop(
2853 ; CHECK-NEXT: entry:
2854 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2856 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2857 ; CHECK: vector.body:
2858 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2859 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
2860 ; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
2861 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2862 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2863 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2864 ; CHECK: middle.block:
2865 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]])
2866 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256
2867 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
2869 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2870 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
2871 ; CHECK-NEXT: br label [[LOOP:%.*]]
2873 ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
2874 ; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
2875 ; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
2876 ; CHECK-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1
2877 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0
2878 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2880 ; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
2881 ; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]]
2883 ; IND-LABEL: @i8_loop(
2885 ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2887 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
2889 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2890 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2891 ; IND-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2892 ; IND-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2893 ; IND: middle.block:
2894 ; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2896 ; IND-NEXT: br label [[LOOP:%.*]]
2898 ; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2900 ; IND-NEXT: ret i32 0
2902 ; UNROLL-LABEL: @i8_loop(
2903 ; UNROLL-NEXT: entry:
2904 ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2905 ; UNROLL: vector.ph:
2906 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
2907 ; UNROLL: vector.body:
2908 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2909 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2910 ; UNROLL-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2911 ; UNROLL-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2912 ; UNROLL: middle.block:
2913 ; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2914 ; UNROLL: scalar.ph:
2915 ; UNROLL-NEXT: br label [[LOOP:%.*]]
2917 ; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2919 ; UNROLL-NEXT: ret i32 0
2921 ; UNROLL-NO-IC-LABEL: @i8_loop(
2922 ; UNROLL-NO-IC-NEXT: entry:
2923 ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2924 ; UNROLL-NO-IC: vector.ph:
2925 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
2926 ; UNROLL-NO-IC: vector.body:
2927 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2928 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
2929 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
2930 ; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
2931 ; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
2932 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2933 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2934 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2935 ; UNROLL-NO-IC: middle.block:
2936 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]]
2937 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
2938 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256
2939 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
2940 ; UNROLL-NO-IC: scalar.ph:
2941 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2942 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
2943 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
2944 ; UNROLL-NO-IC: loop:
2945 ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
2946 ; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
2947 ; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
2948 ; UNROLL-NO-IC-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1
2949 ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0
2950 ; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2951 ; UNROLL-NO-IC: exit:
2952 ; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
2953 ; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]]
2955 ; INTERLEAVE-LABEL: @i8_loop(
2956 ; INTERLEAVE-NEXT: entry:
2957 ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2958 ; INTERLEAVE: vector.ph:
2959 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
2960 ; INTERLEAVE: vector.body:
2961 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2962 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2963 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2964 ; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2965 ; INTERLEAVE: middle.block:
2966 ; INTERLEAVE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2967 ; INTERLEAVE: scalar.ph:
2968 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
2970 ; INTERLEAVE-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2972 ; INTERLEAVE-NEXT: ret i32 0
2978 %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
2979 %b.0 = phi i8 [ 0, %entry ], [ %b.next, %loop ]
2980 %a.0.and = and i32 %a.0, 4
2981 %b.next = add i8 %b.0, -1
2982 %ec = icmp eq i8 %b.next, 0
2983 br i1 %ec, label %exit, label %loop
2990 define i32 @i16_loop() nounwind readnone ssp uwtable {
2991 ; CHECK-LABEL: @i16_loop(
2992 ; CHECK-NEXT: entry:
2993 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2995 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2996 ; CHECK: vector.body:
2997 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2998 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
2999 ; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3000 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3001 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3002 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3003 ; CHECK: middle.block:
3004 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]])
3005 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 65536, 65536
3006 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
3008 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3009 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
3010 ; CHECK-NEXT: br label [[LOOP:%.*]]
3012 ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
3013 ; CHECK-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ]
3014 ; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
3015 ; CHECK-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1
3016 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0
3017 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
3019 ; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
3020 ; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]]
3022 ; IND-LABEL: @i16_loop(
3024 ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3026 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
3028 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3029 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3030 ; IND-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3031 ; IND-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3032 ; IND: middle.block:
3033 ; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3035 ; IND-NEXT: br label [[LOOP:%.*]]
3037 ; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
3039 ; IND-NEXT: ret i32 0
3041 ; UNROLL-LABEL: @i16_loop(
3042 ; UNROLL-NEXT: entry:
3043 ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3044 ; UNROLL: vector.ph:
3045 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
3046 ; UNROLL: vector.body:
3047 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3048 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3049 ; UNROLL-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3050 ; UNROLL-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3051 ; UNROLL: middle.block:
3052 ; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3053 ; UNROLL: scalar.ph:
3054 ; UNROLL-NEXT: br label [[LOOP:%.*]]
3056 ; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
3058 ; UNROLL-NEXT: ret i32 0
3060 ; UNROLL-NO-IC-LABEL: @i16_loop(
3061 ; UNROLL-NO-IC-NEXT: entry:
3062 ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3063 ; UNROLL-NO-IC: vector.ph:
3064 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
3065 ; UNROLL-NO-IC: vector.body:
3066 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3067 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
3068 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
3069 ; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3070 ; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
3071 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3072 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3073 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3074 ; UNROLL-NO-IC: middle.block:
3075 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]]
3076 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3077 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 65536, 65536
3078 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
3079 ; UNROLL-NO-IC: scalar.ph:
3080 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3081 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
3082 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
3083 ; UNROLL-NO-IC: loop:
3084 ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
3085 ; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ]
3086 ; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
3087 ; UNROLL-NO-IC-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1
3088 ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0
3089 ; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
3090 ; UNROLL-NO-IC: exit:
3091 ; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
3092 ; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]]
3094 ; INTERLEAVE-LABEL: @i16_loop(
3095 ; INTERLEAVE-NEXT: entry:
3096 ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3097 ; INTERLEAVE: vector.ph:
3098 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
3099 ; INTERLEAVE: vector.body:
3100 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3101 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3102 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3103 ; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3104 ; INTERLEAVE: middle.block:
3105 ; INTERLEAVE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3106 ; INTERLEAVE: scalar.ph:
3107 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
3109 ; INTERLEAVE-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
3111 ; INTERLEAVE-NEXT: ret i32 0
3117 %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
3118 %b.0 = phi i16 [ 0, %entry ], [ %b.0.next, %loop ]
3119 %a.0.and = and i32 %a.0, 4
3120 %b.0.next = add i16 %b.0, -1
3121 %ec = icmp eq i16 %b.0.next, 0
3122 br i1 %ec, label %exit, label %loop
3128 ; This loop has a backedge taken count of i32_max. We need to check for this
3129 ; condition and branch directly to the scalar loop.
3133 define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
3134 ; CHECK-LABEL: @max_i32_backedgetaken(
3135 ; CHECK-NEXT: entry:
3136 ; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3138 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3139 ; CHECK: vector.body:
3140 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3141 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
3142 ; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3143 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3144 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
3145 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3146 ; CHECK: middle.block:
3147 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]])
3148 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0
3149 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
3151 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3152 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
3153 ; CHECK-NEXT: br label [[LOOP:%.*]]
3155 ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
3156 ; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
3157 ; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
3158 ; CHECK-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
3159 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
3160 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
3162 ; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
3163 ; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]]
3165 ; IND-LABEL: @max_i32_backedgetaken(
3167 ; IND-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3169 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
3171 ; IND-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3172 ; IND: middle.block:
3173 ; IND-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]]
3175 ; IND-NEXT: br label [[LOOP:%.*]]
3177 ; IND-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
3178 ; IND-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
3179 ; IND-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
3180 ; IND-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
3182 ; IND-NEXT: ret i32 0
3184 ; UNROLL-LABEL: @max_i32_backedgetaken(
3185 ; UNROLL-NEXT: entry:
3186 ; UNROLL-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3187 ; UNROLL: vector.ph:
3188 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
3189 ; UNROLL: vector.body:
3190 ; UNROLL-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3191 ; UNROLL: middle.block:
3192 ; UNROLL-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]]
3193 ; UNROLL: scalar.ph:
3194 ; UNROLL-NEXT: br label [[LOOP:%.*]]
3196 ; UNROLL-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
3197 ; UNROLL-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
3198 ; UNROLL-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
3199 ; UNROLL-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
3201 ; UNROLL-NEXT: ret i32 0
3203 ; UNROLL-NO-IC-LABEL: @max_i32_backedgetaken(
3204 ; UNROLL-NO-IC-NEXT: entry:
3205 ; UNROLL-NO-IC-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3206 ; UNROLL-NO-IC: vector.ph:
3207 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
3208 ; UNROLL-NO-IC: vector.body:
3209 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3210 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
3211 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
3212 ; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3213 ; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
3214 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3215 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
3216 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3217 ; UNROLL-NO-IC: middle.block:
3218 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]]
3219 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3220 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0
3221 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
3222 ; UNROLL-NO-IC: scalar.ph:
3223 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3224 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
3225 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
3226 ; UNROLL-NO-IC: loop:
3227 ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
3228 ; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
3229 ; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
3230 ; UNROLL-NO-IC-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
3231 ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
3232 ; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
3233 ; UNROLL-NO-IC: exit:
3234 ; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
3235 ; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]]
3237 ; INTERLEAVE-LABEL: @max_i32_backedgetaken(
3238 ; INTERLEAVE-NEXT: entry:
3239 ; INTERLEAVE-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3240 ; INTERLEAVE: vector.ph:
3241 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
3242 ; INTERLEAVE: vector.body:
3243 ; INTERLEAVE-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3244 ; INTERLEAVE: middle.block:
3245 ; INTERLEAVE-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]]
3246 ; INTERLEAVE: scalar.ph:
3247 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
3249 ; INTERLEAVE-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
3250 ; INTERLEAVE-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
3251 ; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
3252 ; INTERLEAVE-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
3254 ; INTERLEAVE-NEXT: ret i32 0
3260 %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
3261 %b.0 = phi i32 [ 0, %entry ], [ %b.next, %loop ]
3262 %a.0.and = and i32 %a.0, 4
3263 %b.next = add i32 %b.0, -1
3264 %ec = icmp eq i32 %b.next, 0
3265 br i1 %ec, label %exit, label %loop
3271 ; When generating the overflow check we must sure that the induction start value
3272 ; is defined before the branch to the scalar preheader.
3276 @e = global i8 1, align 1
3277 @d = common global i32 0, align 4
3278 @c = common global i32 0, align 4
3279 define i32 @testoverflowcheck() {
3280 ; CHECK-LABEL: @testoverflowcheck(
3281 ; CHECK-NEXT: entry:
3282 ; CHECK-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3283 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4
3284 ; CHECK-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3285 ; CHECK-NEXT: [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]]
3286 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3287 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3288 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 2
3289 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3291 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 2
3292 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
3293 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3294 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]]
3295 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
3296 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3297 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3298 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3299 ; CHECK: vector.body:
3300 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3301 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3302 ; CHECK-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
3303 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3304 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3305 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3306 ; CHECK: middle.block:
3307 ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP5]])
3308 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3309 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3311 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3312 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3313 ; CHECK-NEXT: br label [[COND_END_I:%.*]]
3314 ; CHECK: cond.end.i:
3315 ; CHECK-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3316 ; CHECK-NEXT: [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ]
3317 ; CHECK-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
3318 ; CHECK-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
3319 ; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3320 ; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3322 ; CHECK-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3323 ; CHECK-NEXT: ret i32 [[AND_I_LCSSA]]
3325 ; IND-LABEL: @testoverflowcheck(
3327 ; IND-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3328 ; IND-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4
3329 ; IND-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3330 ; IND-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3331 ; IND-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3332 ; IND-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3333 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i8 [[DOTPR_I]], -1
3334 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3336 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510
3337 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3338 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]]
3339 ; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3340 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3341 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3342 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
3344 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3345 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3346 ; IND-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3347 ; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3348 ; IND: middle.block:
3349 ; IND-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
3350 ; IND-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]])
3351 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3352 ; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3354 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3355 ; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3356 ; IND-NEXT: br label [[COND_END_I:%.*]]
3358 ; IND-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3359 ; IND-NEXT: [[TMP8:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3360 ; IND-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
3361 ; IND-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3362 ; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3364 ; IND-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP8]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3365 ; IND-NEXT: ret i32 [[AND_I_LCSSA]]
3367 ; UNROLL-LABEL: @testoverflowcheck(
3368 ; UNROLL-NEXT: entry:
3369 ; UNROLL-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3370 ; UNROLL-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4
3371 ; UNROLL-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3372 ; UNROLL-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3373 ; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3374 ; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3375 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -4
3376 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3377 ; UNROLL: vector.ph:
3378 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508
3379 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3380 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]]
3381 ; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3382 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3383 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3384 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
3385 ; UNROLL: vector.body:
3386 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3387 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3388 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3389 ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3390 ; UNROLL: middle.block:
3391 ; UNROLL-NEXT: [[TMP6:%.*]] = and <2 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]]
3392 ; UNROLL-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> zeroinitializer
3393 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP7]], [[TMP4]]
3394 ; UNROLL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3395 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3396 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3397 ; UNROLL: scalar.ph:
3398 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3399 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3400 ; UNROLL-NEXT: br label [[COND_END_I:%.*]]
3401 ; UNROLL: cond.end.i:
3402 ; UNROLL-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3403 ; UNROLL-NEXT: [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3404 ; UNROLL-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
3405 ; UNROLL-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3406 ; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3408 ; UNROLL-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3409 ; UNROLL-NEXT: ret i32 [[AND_I_LCSSA]]
3411 ; UNROLL-NO-IC-LABEL: @testoverflowcheck(
3412 ; UNROLL-NO-IC-NEXT: entry:
3413 ; UNROLL-NO-IC-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3414 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4
3415 ; UNROLL-NO-IC-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3416 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]]
3417 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3418 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3419 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
3420 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3421 ; UNROLL-NO-IC: vector.ph:
3422 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4
3423 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
3424 ; UNROLL-NO-IC-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3425 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]]
3426 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
3427 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3428 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3429 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3430 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
3431 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
3432 ; UNROLL-NO-IC: vector.body:
3433 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3434 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3435 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
3436 ; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
3437 ; UNROLL-NO-IC-NEXT: [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT3]], [[VEC_PHI1]]
3438 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3439 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3440 ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3441 ; UNROLL-NO-IC: middle.block:
3442 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP6]], [[TMP5]]
3443 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3444 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3445 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3446 ; UNROLL-NO-IC: scalar.ph:
3447 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3448 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3449 ; UNROLL-NO-IC-NEXT: br label [[COND_END_I:%.*]]
3450 ; UNROLL-NO-IC: cond.end.i:
3451 ; UNROLL-NO-IC-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3452 ; UNROLL-NO-IC-NEXT: [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ]
3453 ; UNROLL-NO-IC-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
3454 ; UNROLL-NO-IC-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
3455 ; UNROLL-NO-IC-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3456 ; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3457 ; UNROLL-NO-IC: loopexit:
3458 ; UNROLL-NO-IC-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3459 ; UNROLL-NO-IC-NEXT: ret i32 [[AND_I_LCSSA]]
3461 ; INTERLEAVE-LABEL: @testoverflowcheck(
3462 ; INTERLEAVE-NEXT: entry:
3463 ; INTERLEAVE-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3464 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4
3465 ; INTERLEAVE-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3466 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3467 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3468 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3469 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -8
3470 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3471 ; INTERLEAVE: vector.ph:
3472 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504
3473 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3474 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]]
3475 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3476 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3477 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3478 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
3479 ; INTERLEAVE: vector.body:
3480 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3481 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3482 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3483 ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3484 ; INTERLEAVE: middle.block:
3485 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = and <4 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]]
3486 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
3487 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP7]], [[TMP4]]
3488 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]])
3489 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3490 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3491 ; INTERLEAVE: scalar.ph:
3492 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3493 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3494 ; INTERLEAVE-NEXT: br label [[COND_END_I:%.*]]
3495 ; INTERLEAVE: cond.end.i:
3496 ; INTERLEAVE-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3497 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3498 ; INTERLEAVE-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
3499 ; INTERLEAVE-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3500 ; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3501 ; INTERLEAVE: loopexit:
3502 ; INTERLEAVE-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3503 ; INTERLEAVE-NEXT: ret i32 [[AND_I_LCSSA]]
3506 %.pr.i = load i8, i8* @e, align 1
3507 %0 = load i32, i32* @d, align 4
3508 %c.promoted.i = load i32, i32* @c, align 4
3509 br label %cond.end.i
3512 %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
3513 %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
3514 %and.i = and i32 %0, %and3.i
3515 %inc.i = add i8 %inc4.i, 1
3516 %tobool.i = icmp eq i8 %inc.i, 0
3517 br i1 %tobool.i, label %loopexit, label %cond.end.i
3523 ; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
3524 ; In order to recognize %sphi as an induction PHI and vectorize this loop,
3525 ; we need to convert the SCEV expression into an AddRecExpr.
3526 ; The expression gets converted to {zext i8 %t to i32,+,1}.
3528 define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
3529 ; CHECK-LABEL: @wrappingindvars1(
3530 ; CHECK-NEXT: entry:
3531 ; CHECK-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3532 ; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32
3533 ; CHECK-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3534 ; CHECK-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3535 ; CHECK: loop.preheader:
3536 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3537 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3538 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3539 ; CHECK: vector.scevcheck:
3540 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3541 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3542 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3543 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3544 ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3545 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3546 ; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
3547 ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3548 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3549 ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3550 ; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3551 ; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3553 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
3554 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
3555 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3556 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_VTC]]
3557 ; CHECK-NEXT: [[IND_END2:%.*]] = add i32 [[EXT]], [[N_VEC]]
3558 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
3559 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3560 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
3561 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3562 ; CHECK: vector.body:
3563 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3564 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3565 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8
3566 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP12]]
3567 ; CHECK-NEXT: [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0
3568 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP13]]
3569 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 0
3570 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>*
3571 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP16]], align 4
3572 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3573 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
3574 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3575 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3576 ; CHECK: middle.block:
3577 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3578 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3580 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3581 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3582 ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3583 ; CHECK-NEXT: br label [[LOOP:%.*]]
3585 ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3586 ; CHECK-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3587 ; CHECK-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3588 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
3589 ; CHECK-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3590 ; CHECK-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3591 ; CHECK-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3592 ; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3593 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3594 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3595 ; CHECK: exit.loopexit:
3596 ; CHECK-NEXT: br label [[EXIT]]
3598 ; CHECK-NEXT: ret void
3600 ; IND-LABEL: @wrappingindvars1(
3602 ; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3603 ; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3604 ; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3605 ; IND: loop.preheader:
3606 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3607 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3608 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3609 ; IND: vector.scevcheck:
3610 ; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3611 ; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
3612 ; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3613 ; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
3614 ; IND-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
3615 ; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
3616 ; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3617 ; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3618 ; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
3619 ; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3621 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2
3622 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3623 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
3624 ; IND-NEXT: [[IND_END2:%.*]] = add i32 [[N_VEC]], [[EXT]]
3625 ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
3626 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3627 ; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
3628 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
3630 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3631 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3632 ; IND-NEXT: [[TMP10:%.*]] = trunc i32 [[INDEX]] to i8
3633 ; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP10]], [[T]]
3634 ; IND-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3635 ; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
3636 ; IND-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>*
3637 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP13]], align 4
3638 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3639 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
3640 ; IND-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3641 ; IND-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3642 ; IND: middle.block:
3643 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3644 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3646 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3647 ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3648 ; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3649 ; IND-NEXT: br label [[LOOP:%.*]]
3651 ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3652 ; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3653 ; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3654 ; IND-NEXT: [[TMP15:%.*]] = sext i8 [[IDX]] to i64
3655 ; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
3656 ; IND-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3657 ; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3658 ; IND-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3659 ; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3660 ; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3661 ; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3662 ; IND: exit.loopexit:
3663 ; IND-NEXT: br label [[EXIT]]
3665 ; IND-NEXT: ret void
3667 ; UNROLL-LABEL: @wrappingindvars1(
3668 ; UNROLL-NEXT: entry:
3669 ; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3670 ; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3671 ; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3672 ; UNROLL: loop.preheader:
3673 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3674 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
3675 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3676 ; UNROLL: vector.scevcheck:
3677 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3678 ; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
3679 ; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3680 ; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
3681 ; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
3682 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
3683 ; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3684 ; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3685 ; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
3686 ; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3687 ; UNROLL: vector.ph:
3688 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4
3689 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3690 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
3691 ; UNROLL-NEXT: [[IND_END2:%.*]] = add i32 [[N_VEC]], [[EXT]]
3692 ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
3693 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3694 ; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
3695 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
3696 ; UNROLL: vector.body:
3697 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3698 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3699 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
3700 ; UNROLL-NEXT: [[TMP10:%.*]] = trunc i32 [[INDEX]] to i8
3701 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP10]], [[T]]
3702 ; UNROLL-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3703 ; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
3704 ; UNROLL-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>*
3705 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP13]], align 4
3706 ; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 2
3707 ; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
3708 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP15]], align 4
3709 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3710 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
3711 ; UNROLL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3712 ; UNROLL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3713 ; UNROLL: middle.block:
3714 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3715 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3716 ; UNROLL: scalar.ph:
3717 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3718 ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3719 ; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3720 ; UNROLL-NEXT: br label [[LOOP:%.*]]
3722 ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3723 ; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3724 ; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3725 ; UNROLL-NEXT: [[TMP17:%.*]] = sext i8 [[IDX]] to i64
3726 ; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
3727 ; UNROLL-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3728 ; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3729 ; UNROLL-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3730 ; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3731 ; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3732 ; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3733 ; UNROLL: exit.loopexit:
3734 ; UNROLL-NEXT: br label [[EXIT]]
3736 ; UNROLL-NEXT: ret void
3738 ; UNROLL-NO-IC-LABEL: @wrappingindvars1(
3739 ; UNROLL-NO-IC-NEXT: entry:
3740 ; UNROLL-NO-IC-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3741 ; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32
3742 ; UNROLL-NO-IC-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3743 ; UNROLL-NO-IC-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3744 ; UNROLL-NO-IC: loop.preheader:
3745 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3746 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
3747 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3748 ; UNROLL-NO-IC: vector.scevcheck:
3749 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3750 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3751 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3752 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3753 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3754 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3755 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
3756 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3757 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3758 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3759 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3760 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3761 ; UNROLL-NO-IC: vector.ph:
3762 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
3763 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
3764 ; UNROLL-NO-IC-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3765 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_VTC]]
3766 ; UNROLL-NO-IC-NEXT: [[IND_END2:%.*]] = add i32 [[EXT]], [[N_VEC]]
3767 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
3768 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3769 ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
3770 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
3771 ; UNROLL-NO-IC: vector.body:
3772 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3773 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3774 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
3775 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8
3776 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP12]]
3777 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0
3778 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 2
3779 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP13]]
3780 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP14]]
3781 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
3782 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
3783 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP18]], align 4
3784 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 2
3785 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>*
3786 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP20]], align 4
3787 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3788 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
3789 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3790 ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3791 ; UNROLL-NO-IC: middle.block:
3792 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3793 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3794 ; UNROLL-NO-IC: scalar.ph:
3795 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3796 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3797 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3798 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
3799 ; UNROLL-NO-IC: loop:
3800 ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3801 ; UNROLL-NO-IC-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3802 ; UNROLL-NO-IC-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3803 ; UNROLL-NO-IC-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
3804 ; UNROLL-NO-IC-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3805 ; UNROLL-NO-IC-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3806 ; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3807 ; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3808 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3809 ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3810 ; UNROLL-NO-IC: exit.loopexit:
3811 ; UNROLL-NO-IC-NEXT: br label [[EXIT]]
3812 ; UNROLL-NO-IC: exit:
3813 ; UNROLL-NO-IC-NEXT: ret void
3815 ; INTERLEAVE-LABEL: @wrappingindvars1(
3816 ; INTERLEAVE-NEXT: entry:
3817 ; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3818 ; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3819 ; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3820 ; INTERLEAVE: loop.preheader:
3821 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3822 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
3823 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3824 ; INTERLEAVE: vector.scevcheck:
3825 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3826 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
3827 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3828 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
3829 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
3830 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
3831 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3832 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3833 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
3834 ; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3835 ; INTERLEAVE: vector.ph:
3836 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8
3837 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3838 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
3839 ; INTERLEAVE-NEXT: [[IND_END2:%.*]] = add i32 [[N_VEC]], [[EXT]]
3840 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT]], i64 0
3841 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
3842 ; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
3843 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
3844 ; INTERLEAVE: vector.body:
3845 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3846 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3847 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
3848 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = trunc i32 [[INDEX]] to i8
3849 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP10]], [[T]]
3850 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3851 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
3852 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
3853 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP13]], align 4
3854 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 4
3855 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
3856 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP15]], align 4
3857 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3858 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
3859 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3860 ; INTERLEAVE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3861 ; INTERLEAVE: middle.block:
3862 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3863 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3864 ; INTERLEAVE: scalar.ph:
3865 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3866 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3867 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3868 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
3870 ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3871 ; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3872 ; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3873 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = sext i8 [[IDX]] to i64
3874 ; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
3875 ; INTERLEAVE-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3876 ; INTERLEAVE-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3877 ; INTERLEAVE-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3878 ; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3879 ; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3880 ; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3881 ; INTERLEAVE: exit.loopexit:
3882 ; INTERLEAVE-NEXT: br label [[EXIT]]
3884 ; INTERLEAVE-NEXT: ret void
3887 %st = zext i8 %t to i16
3888 %ext = zext i8 %t to i32
3889 %ecmp = icmp ult i16 %st, 42
3890 br i1 %ecmp, label %loop, label %exit
3894 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
3895 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
3896 %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
3898 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
3899 store i32 %sphi, i32* %ptr
3901 %idx.inc = add i8 %idx, 1
3902 %idx.inc.ext = zext i8 %idx.inc to i32
3903 %idx.b.inc = add nuw nsw i32 %idx.b, 1
3905 %c = icmp ult i32 %idx.b, %len
3906 br i1 %c, label %loop, label %exit
3912 ; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
3913 ; In order to recognize %sphi as an induction PHI and vectorize this loop,
3914 ; we need to convert the SCEV expression into an AddRecExpr.
3915 ; The expression gets converted to ({4 * (zext %t to i32),+,4}).
3916 define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
3917 ; CHECK-LABEL: @wrappingindvars2(
3918 ; CHECK-NEXT: entry:
3919 ; CHECK-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3920 ; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32
3921 ; CHECK-NEXT: [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4
3922 ; CHECK-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3923 ; CHECK-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3924 ; CHECK: loop.preheader:
3925 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3926 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3927 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3928 ; CHECK: vector.scevcheck:
3929 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3930 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3931 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3932 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3933 ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3934 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3935 ; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
3936 ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3937 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3938 ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3939 ; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3940 ; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3942 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
3943 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
3944 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
3945 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_VTC]]
3946 ; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
3947 ; CHECK-NEXT: [[IND_END1:%.*]] = add i32 [[EXT_MUL]], [[TMP12]]
3948 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
3949 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3950 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
3951 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3952 ; CHECK: vector.body:
3953 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3954 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3955 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8
3956 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP13]]
3957 ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 0
3958 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP14]]
3959 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
3960 ; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>*
3961 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP17]], align 4
3962 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3963 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
3964 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3965 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
3966 ; CHECK: middle.block:
3967 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3968 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3970 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3971 ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
3972 ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3973 ; CHECK-NEXT: br label [[LOOP:%.*]]
3975 ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3976 ; CHECK-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3977 ; CHECK-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3978 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
3979 ; CHECK-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
3980 ; CHECK-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
3981 ; CHECK-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
3982 ; CHECK-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
3983 ; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3984 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3985 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
3986 ; CHECK: exit.loopexit:
3987 ; CHECK-NEXT: br label [[EXIT]]
3989 ; CHECK-NEXT: ret void
3991 ; IND-LABEL: @wrappingindvars2(
3993 ; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3994 ; IND-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
3995 ; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3996 ; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3997 ; IND: loop.preheader:
3998 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3999 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4000 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4001 ; IND: vector.scevcheck:
4002 ; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4003 ; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
4004 ; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4005 ; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
4006 ; IND-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
4007 ; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
4008 ; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
4009 ; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4010 ; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
4011 ; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4013 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2
4014 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
4015 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
4016 ; IND-NEXT: [[TMP10:%.*]] = add i32 [[N_VEC]], [[EXT]]
4017 ; IND-NEXT: [[IND_END1:%.*]] = shl i32 [[TMP10]], 2
4018 ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
4019 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4020 ; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
4021 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
4023 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4024 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4025 ; IND-NEXT: [[TMP11:%.*]] = trunc i32 [[INDEX]] to i8
4026 ; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP11]], [[T]]
4027 ; IND-NEXT: [[TMP12:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4028 ; IND-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP12]]
4029 ; IND-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
4030 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP14]], align 4
4031 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4032 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
4033 ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4034 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4035 ; IND: middle.block:
4036 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4037 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4039 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4040 ; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4041 ; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4042 ; IND-NEXT: br label [[LOOP:%.*]]
4044 ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4045 ; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
4046 ; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4047 ; IND-NEXT: [[TMP16:%.*]] = sext i8 [[IDX]] to i64
4048 ; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
4049 ; IND-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
4050 ; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
4051 ; IND-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4052 ; IND-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4053 ; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4054 ; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4055 ; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4056 ; IND: exit.loopexit:
4057 ; IND-NEXT: br label [[EXIT]]
4059 ; IND-NEXT: ret void
4061 ; UNROLL-LABEL: @wrappingindvars2(
4062 ; UNROLL-NEXT: entry:
4063 ; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
4064 ; UNROLL-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
4065 ; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
4066 ; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4067 ; UNROLL: loop.preheader:
4068 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4069 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4070 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4071 ; UNROLL: vector.scevcheck:
4072 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4073 ; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
4074 ; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4075 ; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
4076 ; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
4077 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
4078 ; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
4079 ; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4080 ; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
4081 ; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4082 ; UNROLL: vector.ph:
4083 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4
4084 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
4085 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
4086 ; UNROLL-NEXT: [[TMP10:%.*]] = add i32 [[N_VEC]], [[EXT]]
4087 ; UNROLL-NEXT: [[IND_END1:%.*]] = shl i32 [[TMP10]], 2
4088 ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
4089 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4090 ; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
4091 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
4092 ; UNROLL: vector.body:
4093 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4094 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4095 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
4096 ; UNROLL-NEXT: [[TMP11:%.*]] = trunc i32 [[INDEX]] to i8
4097 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP11]], [[T]]
4098 ; UNROLL-NEXT: [[TMP12:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4099 ; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP12]]
4100 ; UNROLL-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
4101 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP14]], align 4
4102 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 2
4103 ; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>*
4104 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP16]], align 4
4105 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4106 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 16, i32 16>
4107 ; UNROLL-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4108 ; UNROLL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4109 ; UNROLL: middle.block:
4110 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4111 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4112 ; UNROLL: scalar.ph:
4113 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4114 ; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4115 ; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4116 ; UNROLL-NEXT: br label [[LOOP:%.*]]
4118 ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4119 ; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
4120 ; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4121 ; UNROLL-NEXT: [[TMP18:%.*]] = sext i8 [[IDX]] to i64
4122 ; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
4123 ; UNROLL-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
4124 ; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
4125 ; UNROLL-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4126 ; UNROLL-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4127 ; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4128 ; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4129 ; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4130 ; UNROLL: exit.loopexit:
4131 ; UNROLL-NEXT: br label [[EXIT]]
4133 ; UNROLL-NEXT: ret void
4135 ; UNROLL-NO-IC-LABEL: @wrappingindvars2(
4136 ; UNROLL-NO-IC-NEXT: entry:
4137 ; UNROLL-NO-IC-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16
4138 ; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32
4139 ; UNROLL-NO-IC-NEXT: [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4
4140 ; UNROLL-NO-IC-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
4141 ; UNROLL-NO-IC-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4142 ; UNROLL-NO-IC: loop.preheader:
4143 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4144 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4145 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4146 ; UNROLL-NO-IC: vector.scevcheck:
4147 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4148 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
4149 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
4150 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
4151 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
4152 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
4153 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
4154 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
4155 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
4156 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
4157 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
4158 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4159 ; UNROLL-NO-IC: vector.ph:
4160 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
4161 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4162 ; UNROLL-NO-IC-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
4163 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_VTC]]
4164 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
4165 ; UNROLL-NO-IC-NEXT: [[IND_END1:%.*]] = add i32 [[EXT_MUL]], [[TMP12]]
4166 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
4167 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4168 ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
4169 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
4170 ; UNROLL-NO-IC: vector.body:
4171 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4172 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4173 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
4174 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8
4175 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP13]]
4176 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 0
4177 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add i8 [[OFFSET_IDX]], 2
4178 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP14]]
4179 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP15]]
4180 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0
4181 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <2 x i32>*
4182 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP19]], align 4
4183 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 2
4184 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>*
4185 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP21]], align 4
4186 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4187 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 8, i32 8>
4188 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4189 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4190 ; UNROLL-NO-IC: middle.block:
4191 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4192 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4193 ; UNROLL-NO-IC: scalar.ph:
4194 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4195 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4196 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4197 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
4198 ; UNROLL-NO-IC: loop:
4199 ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4200 ; UNROLL-NO-IC-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
4201 ; UNROLL-NO-IC-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4202 ; UNROLL-NO-IC-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
4203 ; UNROLL-NO-IC-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
4204 ; UNROLL-NO-IC-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
4205 ; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4206 ; UNROLL-NO-IC-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
4207 ; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4208 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4209 ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4210 ; UNROLL-NO-IC: exit.loopexit:
4211 ; UNROLL-NO-IC-NEXT: br label [[EXIT]]
4212 ; UNROLL-NO-IC: exit:
4213 ; UNROLL-NO-IC-NEXT: ret void
4215 ; INTERLEAVE-LABEL: @wrappingindvars2(
4216 ; INTERLEAVE-NEXT: entry:
4217 ; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
4218 ; INTERLEAVE-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
4219 ; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42
4220 ; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4221 ; INTERLEAVE: loop.preheader:
4222 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4223 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
4224 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4225 ; INTERLEAVE: vector.scevcheck:
4226 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4227 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
4228 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4229 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
4230 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[T]]
4231 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
4232 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
4233 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4234 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
4235 ; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4236 ; INTERLEAVE: vector.ph:
4237 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8
4238 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8
4239 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[CAST_VTC]], [[T]]
4240 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = add i32 [[N_VEC]], [[EXT]]
4241 ; INTERLEAVE-NEXT: [[IND_END1:%.*]] = shl i32 [[TMP10]], 2
4242 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0
4243 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4244 ; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 4, i32 8, i32 12>
4245 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
4246 ; INTERLEAVE: vector.body:
4247 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4248 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4249 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
4250 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = trunc i32 [[INDEX]] to i8
4251 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP11]], [[T]]
4252 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4253 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP12]]
4254 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
4255 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP14]], align 4
4256 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 4
4257 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
4258 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP16]], align 4
4259 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4260 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 32, i32 32, i32 32, i32 32>
4261 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4262 ; INTERLEAVE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4263 ; INTERLEAVE: middle.block:
4264 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4265 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4266 ; INTERLEAVE: scalar.ph:
4267 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4268 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4269 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4270 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
4272 ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4273 ; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
4274 ; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4275 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext i8 [[IDX]] to i64
4276 ; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
4277 ; INTERLEAVE-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4
4278 ; INTERLEAVE-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
4279 ; INTERLEAVE-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4280 ; INTERLEAVE-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4281 ; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4282 ; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4283 ; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4284 ; INTERLEAVE: exit.loopexit:
4285 ; INTERLEAVE-NEXT: br label [[EXIT]]
4287 ; INTERLEAVE-NEXT: ret void
4290 %st = zext i8 %t to i16
4291 %ext = zext i8 %t to i32
4292 %ext.mul = mul i32 %ext, 4
4294 %ecmp = icmp ult i16 %st, 42
4295 br i1 %ecmp, label %loop, label %exit
4299 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
4300 %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
4301 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
4303 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
4304 store i32 %sphi, i32* %ptr
4306 %idx.inc = add i8 %idx, 1
4307 %idx.inc.ext = zext i8 %idx.inc to i32
4308 %mul = mul i32 %idx.inc.ext, 4
4309 %idx.b.inc = add nuw nsw i32 %idx.b, 1
4311 %c = icmp ult i32 %idx.b, %len
4312 br i1 %c, label %loop, label %exit
4318 ; Check that we generate vectorized IVs in the pre-header
4319 ; instead of widening the scalar IV inside the loop, when
4320 ; we know how to do that.
4321 define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
4322 ; CHECK-LABEL: @veciv(
4323 ; CHECK-NEXT: for.body.preheader:
4324 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2
4325 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4327 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[K]], 2
4328 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]]
4329 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4330 ; CHECK: vector.body:
4331 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4332 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4333 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
4334 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
4335 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
4336 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4337 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4338 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4339 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4340 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4341 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4342 ; CHECK: middle.block:
4343 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
4344 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4346 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4347 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4349 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4350 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4351 ; CHECK-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4352 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4353 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4354 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4356 ; CHECK-NEXT: ret void
4358 ; IND-LABEL: @veciv(
4359 ; IND-NEXT: for.body.preheader:
4360 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2
4361 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4363 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -2
4364 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
4366 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4367 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4368 ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4369 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4370 ; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
4371 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
4372 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4373 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4374 ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4375 ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4376 ; IND: middle.block:
4377 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4378 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4380 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4381 ; IND-NEXT: br label [[FOR_BODY:%.*]]
4383 ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4384 ; IND-NEXT: [[TMP4:%.*]] = sext i32 [[INDVARS_IV]] to i64
4385 ; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
4386 ; IND-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4387 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4388 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4389 ; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4391 ; IND-NEXT: ret void
4393 ; UNROLL-LABEL: @veciv(
4394 ; UNROLL-NEXT: for.body.preheader:
4395 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4
4396 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4397 ; UNROLL: vector.ph:
4398 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -4
4399 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
4400 ; UNROLL: vector.body:
4401 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4402 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4403 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4404 ; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4405 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4406 ; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
4407 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
4408 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2
4409 ; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4410 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP4]], align 4
4411 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4412 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
4413 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4414 ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4415 ; UNROLL: middle.block:
4416 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4417 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4418 ; UNROLL: scalar.ph:
4419 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4420 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
4422 ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4423 ; UNROLL-NEXT: [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64
4424 ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
4425 ; UNROLL-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4426 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4427 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4428 ; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4430 ; UNROLL-NEXT: ret void
4432 ; UNROLL-NO-IC-LABEL: @veciv(
4433 ; UNROLL-NO-IC-NEXT: for.body.preheader:
4434 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4
4435 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4436 ; UNROLL-NO-IC: vector.ph:
4437 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[K]], 4
4438 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]]
4439 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
4440 ; UNROLL-NO-IC: vector.body:
4441 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4442 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4443 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4444 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
4445 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2
4446 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
4447 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP1]]
4448 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
4449 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
4450 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP5]], align 4
4451 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2
4452 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
4453 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP7]], align 4
4454 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4455 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
4456 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4457 ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4458 ; UNROLL-NO-IC: middle.block:
4459 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
4460 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4461 ; UNROLL-NO-IC: scalar.ph:
4462 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4463 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
4464 ; UNROLL-NO-IC: for.body:
4465 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4466 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4467 ; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4468 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4469 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4470 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4471 ; UNROLL-NO-IC: exit:
4472 ; UNROLL-NO-IC-NEXT: ret void
4474 ; INTERLEAVE-LABEL: @veciv(
4475 ; INTERLEAVE-NEXT: for.body.preheader:
4476 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 8
4477 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4478 ; INTERLEAVE: vector.ph:
4479 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -8
4480 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
4481 ; INTERLEAVE: vector.body:
4482 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4483 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4484 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4485 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4486 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4487 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
4488 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP2]], align 4
4489 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
4490 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
4491 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP4]], align 4
4492 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4493 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
4494 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4495 ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4496 ; INTERLEAVE: middle.block:
4497 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4498 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4499 ; INTERLEAVE: scalar.ph:
4500 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4501 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
4502 ; INTERLEAVE: for.body:
4503 ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4504 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64
4505 ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
4506 ; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4507 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4508 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4509 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4511 ; INTERLEAVE-NEXT: ret void
4517 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
4518 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
4519 store i32 %indvars.iv, i32* %arrayidx, align 4
4520 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
4521 %exitcond = icmp eq i32 %indvars.iv.next, %k
4522 br i1 %exitcond, label %exit, label %for.body
4528 define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
4529 ; CHECK-LABEL: @trunciv(
4530 ; CHECK-NEXT: for.body.preheader:
4531 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
4532 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4533 ; CHECK: vector.scevcheck:
4534 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
4535 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
4536 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
4537 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
4538 ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
4539 ; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4541 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
4542 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
4543 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4544 ; CHECK: vector.body:
4545 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4546 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4547 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32
4548 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0
4549 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]]
4550 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
4551 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>*
4552 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP9]], align 4
4553 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4554 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4555 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4556 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4557 ; CHECK: middle.block:
4558 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
4559 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4561 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4562 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4564 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4565 ; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4566 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]]
4567 ; CHECK-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4568 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4569 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4570 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4572 ; CHECK-NEXT: ret void
4574 ; IND-LABEL: @trunciv(
4575 ; IND-NEXT: for.body.preheader:
4576 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
4577 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4578 ; IND: vector.scevcheck:
4579 ; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
4580 ; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4581 ; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4583 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2
4584 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
4586 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4587 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4588 ; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4589 ; IND-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
4590 ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4591 ; IND-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4592 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4593 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4594 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4595 ; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4596 ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4597 ; IND: middle.block:
4598 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4599 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4601 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4602 ; IND-NEXT: br label [[FOR_BODY:%.*]]
4604 ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4605 ; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4606 ; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32
4607 ; IND-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT1]], 32
4608 ; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
4609 ; IND-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4610 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4611 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4612 ; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4614 ; IND-NEXT: ret void
4616 ; UNROLL-LABEL: @trunciv(
4617 ; UNROLL-NEXT: for.body.preheader:
4618 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
4619 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4620 ; UNROLL: vector.scevcheck:
4621 ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
4622 ; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4623 ; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4624 ; UNROLL: vector.ph:
4625 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4
4626 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
4627 ; UNROLL: vector.body:
4628 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4629 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4630 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4631 ; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4632 ; UNROLL-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
4633 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4634 ; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4635 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4636 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
4637 ; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
4638 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4
4639 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4640 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
4641 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4642 ; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4643 ; UNROLL: middle.block:
4644 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4645 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4646 ; UNROLL: scalar.ph:
4647 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4648 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
4650 ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4651 ; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4652 ; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4653 ; UNROLL-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32
4654 ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
4655 ; UNROLL-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4656 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4657 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4658 ; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4660 ; UNROLL-NEXT: ret void
4662 ; UNROLL-NO-IC-LABEL: @trunciv(
4663 ; UNROLL-NO-IC-NEXT: for.body.preheader:
4664 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
4665 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4666 ; UNROLL-NO-IC: vector.scevcheck:
4667 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
4668 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
4669 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
4670 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
4671 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
4672 ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4673 ; UNROLL-NO-IC: vector.ph:
4674 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
4675 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
4676 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
4677 ; UNROLL-NO-IC: vector.body:
4678 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4679 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4680 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4681 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32
4682 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0
4683 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[TMP5]], 2
4684 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]]
4685 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
4686 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
4687 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <2 x i32>*
4688 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP11]], align 4
4689 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 2
4690 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>*
4691 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP13]], align 4
4692 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4693 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
4694 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4695 ; UNROLL-NO-IC-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4696 ; UNROLL-NO-IC: middle.block:
4697 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
4698 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4699 ; UNROLL-NO-IC: scalar.ph:
4700 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4701 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
4702 ; UNROLL-NO-IC: for.body:
4703 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4704 ; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4705 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]]
4706 ; UNROLL-NO-IC-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4707 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4708 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4709 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4710 ; UNROLL-NO-IC: exit:
4711 ; UNROLL-NO-IC-NEXT: ret void
4713 ; INTERLEAVE-LABEL: @trunciv(
4714 ; INTERLEAVE-NEXT: for.body.preheader:
4715 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
4716 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4717 ; INTERLEAVE: vector.scevcheck:
4718 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
4719 ; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4720 ; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4721 ; INTERLEAVE: vector.ph:
4722 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8
4723 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
4724 ; INTERLEAVE: vector.body:
4725 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4726 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4727 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4728 ; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4729 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
4730 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4731 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
4732 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4
4733 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
4734 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
4735 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4
4736 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
4737 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
4738 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4739 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4740 ; INTERLEAVE: middle.block:
4741 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4742 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4743 ; INTERLEAVE: scalar.ph:
4744 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4745 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
4746 ; INTERLEAVE: for.body:
4747 ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4748 ; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4749 ; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4750 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32
4751 ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
4752 ; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4753 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4754 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4755 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4757 ; INTERLEAVE-NEXT: ret void
4763 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
4764 %trunc.iv = trunc i64 %indvars.iv to i32
4765 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
4766 store i32 %trunc.iv, i32* %arrayidx, align 4
4767 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
4768 %exitcond = icmp eq i64 %indvars.iv.next, %k
4769 br i1 %exitcond, label %exit, label %for.body
4777 define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
4778 ; CHECK-LABEL: @nonprimary(
4779 ; CHECK-NEXT: for.body.preheader:
4780 ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4781 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4782 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4784 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
4785 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4786 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
4787 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
4788 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4789 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4790 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4791 ; CHECK: vector.body:
4792 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4793 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4794 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
4795 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
4796 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]]
4797 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
4798 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4799 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
4800 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4801 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4802 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4803 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4804 ; CHECK: middle.block:
4805 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4806 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4808 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4809 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4811 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4812 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4813 ; CHECK-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4814 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4815 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4816 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4818 ; CHECK-NEXT: ret void
4820 ; IND-LABEL: @nonprimary(
4821 ; IND-NEXT: for.body.preheader:
4822 ; IND-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4823 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4824 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4826 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2
4827 ; IND-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
4828 ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
4829 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4830 ; IND-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4831 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
4833 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4834 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4835 ; IND-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
4836 ; IND-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
4837 ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4838 ; IND-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4839 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4840 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4841 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4842 ; IND-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4843 ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4844 ; IND: middle.block:
4845 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4846 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4848 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4849 ; IND-NEXT: br label [[FOR_BODY:%.*]]
4851 ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4852 ; IND-NEXT: [[TMP5:%.*]] = sext i32 [[INDVARS_IV]] to i64
4853 ; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
4854 ; IND-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4855 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4856 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4857 ; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4859 ; IND-NEXT: ret void
4861 ; UNROLL-LABEL: @nonprimary(
4862 ; UNROLL-NEXT: for.body.preheader:
4863 ; UNROLL-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4864 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4865 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4866 ; UNROLL: vector.ph:
4867 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4
4868 ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
4869 ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
4870 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4871 ; UNROLL-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4872 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
4873 ; UNROLL: vector.body:
4874 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4875 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4876 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4877 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
4878 ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
4879 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4880 ; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4881 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4882 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
4883 ; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
4884 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4
4885 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4886 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
4887 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4888 ; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4889 ; UNROLL: middle.block:
4890 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4891 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4892 ; UNROLL: scalar.ph:
4893 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4894 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
4896 ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4897 ; UNROLL-NEXT: [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64
4898 ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
4899 ; UNROLL-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4900 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4901 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4902 ; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4904 ; UNROLL-NEXT: ret void
4906 ; UNROLL-NO-IC-LABEL: @nonprimary(
4907 ; UNROLL-NO-IC-NEXT: for.body.preheader:
4908 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4909 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4910 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4911 ; UNROLL-NO-IC: vector.ph:
4912 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
4913 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4914 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
4915 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
4916 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4917 ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4918 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
4919 ; UNROLL-NO-IC: vector.body:
4920 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4921 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4922 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4923 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
4924 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
4925 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 2
4926 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]]
4927 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP2]]
4928 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0
4929 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
4930 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4
4931 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 2
4932 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>*
4933 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP8]], align 4
4934 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4935 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
4936 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4937 ; UNROLL-NO-IC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4938 ; UNROLL-NO-IC: middle.block:
4939 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4940 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4941 ; UNROLL-NO-IC: scalar.ph:
4942 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4943 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
4944 ; UNROLL-NO-IC: for.body:
4945 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4946 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4947 ; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4948 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4949 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4950 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4951 ; UNROLL-NO-IC: exit:
4952 ; UNROLL-NO-IC-NEXT: ret void
4954 ; INTERLEAVE-LABEL: @nonprimary(
4955 ; INTERLEAVE-NEXT: for.body.preheader:
4956 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4957 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
4958 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4959 ; INTERLEAVE: vector.ph:
4960 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8
4961 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
4962 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i64 0
4963 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4964 ; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
4965 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
4966 ; INTERLEAVE: vector.body:
4967 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4968 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4969 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4970 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
4971 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
4972 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4973 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
4974 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4
4975 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
4976 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
4977 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4
4978 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4979 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
4980 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4981 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4982 ; INTERLEAVE: middle.block:
4983 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4984 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4985 ; INTERLEAVE: scalar.ph:
4986 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4987 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
4988 ; INTERLEAVE: for.body:
4989 ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4990 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64
4991 ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
4992 ; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4993 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4994 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4995 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4997 ; INTERLEAVE-NEXT: ret void
5003 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
5004 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
5005 store i32 %indvars.iv, i32* %arrayidx, align 4
5006 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
5007 %exitcond = icmp eq i32 %indvars.iv.next, %k
5008 br i1 %exitcond, label %exit, label %for.body
5014 define void @non_primary_iv_trunc(i32* %a, i64 %n) {
5015 ; CHECK-LABEL: @non_primary_iv_trunc(
5016 ; CHECK-NEXT: entry:
5017 ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5018 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
5019 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5021 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
5022 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
5023 ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
5024 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5025 ; CHECK: vector.body:
5026 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5027 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5028 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
5029 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
5030 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
5031 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
5032 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
5033 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5034 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5035 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5036 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5037 ; CHECK: middle.block:
5038 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5039 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5041 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5042 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5043 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5045 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5046 ; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5047 ; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5048 ; CHECK-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32
5049 ; CHECK-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4
5050 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5051 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5052 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5053 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5055 ; CHECK-NEXT: ret void
5057 ; IND-LABEL: @non_primary_iv_trunc(
5059 ; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5060 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
5061 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5063 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
5064 ; IND-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5065 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
5067 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5068 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5069 ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5070 ; IND-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>*
5071 ; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP1]], align 4
5072 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5073 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5074 ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5075 ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5076 ; IND: middle.block:
5077 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5078 ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5080 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5081 ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5082 ; IND-NEXT: br label [[FOR_BODY:%.*]]
5084 ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5085 ; IND-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5086 ; IND-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5087 ; IND-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32
5088 ; IND-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4
5089 ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5090 ; IND-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5091 ; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5092 ; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5094 ; IND-NEXT: ret void
5096 ; UNROLL-LABEL: @non_primary_iv_trunc(
5097 ; UNROLL-NEXT: entry:
5098 ; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5099 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
5100 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5101 ; UNROLL: vector.ph:
5102 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
5103 ; UNROLL-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5104 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
5105 ; UNROLL: vector.body:
5106 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5107 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5108 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5109 ; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5110 ; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>*
5111 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP1]], align 4
5112 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
5113 ; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
5114 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP3]], align 4
5115 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5116 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
5117 ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5118 ; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5119 ; UNROLL: middle.block:
5120 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5121 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5122 ; UNROLL: scalar.ph:
5123 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5124 ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5125 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
5127 ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5128 ; UNROLL-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5129 ; UNROLL-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5130 ; UNROLL-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32
5131 ; UNROLL-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4
5132 ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5133 ; UNROLL-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5134 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5135 ; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5137 ; UNROLL-NEXT: ret void
5139 ; UNROLL-NO-IC-LABEL: @non_primary_iv_trunc(
5140 ; UNROLL-NO-IC-NEXT: entry:
5141 ; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5142 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
5143 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5144 ; UNROLL-NO-IC: vector.ph:
5145 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
5146 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
5147 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
5148 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
5149 ; UNROLL-NO-IC: vector.body:
5150 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5151 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5152 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5153 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
5154 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
5155 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
5156 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
5157 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
5158 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
5159 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP5]], align 4
5160 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2
5161 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
5162 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP7]], align 4
5163 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5164 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 4, i32 4>
5165 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5166 ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5167 ; UNROLL-NO-IC: middle.block:
5168 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5169 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5170 ; UNROLL-NO-IC: scalar.ph:
5171 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5172 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5173 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
5174 ; UNROLL-NO-IC: for.body:
5175 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5176 ; UNROLL-NO-IC-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5177 ; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5178 ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32
5179 ; UNROLL-NO-IC-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4
5180 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5181 ; UNROLL-NO-IC-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5182 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5183 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5184 ; UNROLL-NO-IC: for.end:
5185 ; UNROLL-NO-IC-NEXT: ret void
5187 ; INTERLEAVE-LABEL: @non_primary_iv_trunc(
5188 ; INTERLEAVE-NEXT: entry:
5189 ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5190 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
5191 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5192 ; INTERLEAVE: vector.ph:
5193 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800
5194 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5195 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
5196 ; INTERLEAVE: vector.body:
5197 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5198 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5199 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
5200 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5201 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
5202 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP1]], align 4
5203 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
5204 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
5205 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP3]], align 4
5206 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
5207 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
5208 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5209 ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5210 ; INTERLEAVE: middle.block:
5211 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5212 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5213 ; INTERLEAVE: scalar.ph:
5214 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5215 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5216 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
5217 ; INTERLEAVE: for.body:
5218 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5219 ; INTERLEAVE-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5220 ; INTERLEAVE-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5221 ; INTERLEAVE-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32
5222 ; INTERLEAVE-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4
5223 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5224 ; INTERLEAVE-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5225 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5226 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5227 ; INTERLEAVE: for.end:
5228 ; INTERLEAVE-NEXT: ret void
5234 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
5235 %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ]
5236 %var0 = getelementptr inbounds i32, i32* %a, i64 %i
5237 %var1 = trunc i64 %j to i32
5238 store i32 %var1, i32* %var0, align 4
5239 %i.next = add nuw nsw i64 %i, 1
5240 %j.next = add nuw nsw i64 %j, 2
5241 %cond = icmp slt i64 %i.next, %n
5242 br i1 %cond, label %for.body, label %for.end
5248 ; PR32419. Ensure we transform truncated non-primary induction variables. In
5249 ; the test case below we replace %var1 with a new induction variable. Because
5250 ; the truncated value is non-primary, we must compute an offset from the
5251 ; primary induction variable.
5254 define i32 @PR32419(i32 %a, i16 %b) {
5255 ; CHECK-LABEL: @PR32419(
5256 ; CHECK-NEXT: entry:
5257 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5259 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0
5260 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5261 ; CHECK: vector.body:
5262 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE2:%.*]] ]
5263 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE2]] ]
5264 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE2]] ]
5265 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]]
5266 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
5267 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer
5268 ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5269 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
5270 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5271 ; CHECK: pred.urem.if:
5272 ; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], 0
5273 ; CHECK-NEXT: [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]]
5274 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
5275 ; CHECK-NEXT: br label [[PRED_UREM_CONTINUE]]
5276 ; CHECK: pred.urem.continue:
5277 ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ]
5278 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
5279 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF1:%.*]], label [[PRED_UREM_CONTINUE2]]
5280 ; CHECK: pred.urem.if1:
5281 ; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], 1
5282 ; CHECK-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]]
5283 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i32 1
5284 ; CHECK-NEXT: br label [[PRED_UREM_CONTINUE2]]
5285 ; CHECK: pred.urem.continue2:
5286 ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF1]] ]
5287 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]]
5288 ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5289 ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]]
5290 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5291 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
5292 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5293 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5294 ; CHECK: middle.block:
5295 ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]])
5296 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 20, 20
5297 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5299 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5300 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5301 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5303 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5304 ; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5305 ; CHECK-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16
5306 ; CHECK-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5307 ; CHECK-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5309 ; CHECK-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5310 ; CHECK-NEXT: br label [[FOR_INC]]
5312 ; CHECK-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5313 ; CHECK-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5314 ; CHECK-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5315 ; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
5316 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5317 ; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5319 ; CHECK-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5320 ; CHECK-NEXT: ret i32 [[VAR7]]
5322 ; IND-LABEL: @PR32419(
5324 ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5326 ; IND-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0
5327 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
5329 ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE2:%.*]] ]
5330 ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE2]] ]
5331 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE2]] ]
5332 ; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5333 ; IND-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer
5334 ; IND-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5335 ; IND-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
5336 ; IND-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5337 ; IND: pred.urem.if:
5338 ; IND-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], -20
5339 ; IND-NEXT: [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]]
5340 ; IND-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i64 0
5341 ; IND-NEXT: br label [[PRED_UREM_CONTINUE]]
5342 ; IND: pred.urem.continue:
5343 ; IND-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ]
5344 ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
5345 ; IND-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF1:%.*]], label [[PRED_UREM_CONTINUE2]]
5346 ; IND: pred.urem.if1:
5347 ; IND-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], -19
5348 ; IND-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]]
5349 ; IND-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i64 1
5350 ; IND-NEXT: br label [[PRED_UREM_CONTINUE2]]
5351 ; IND: pred.urem.continue2:
5352 ; IND-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF1]] ]
5353 ; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]]
5354 ; IND-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5355 ; IND-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]]
5356 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5357 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
5358 ; IND-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5359 ; IND-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5360 ; IND: middle.block:
5361 ; IND-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]])
5362 ; IND-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5364 ; IND-NEXT: br label [[FOR_BODY:%.*]]
5366 ; IND-NEXT: br i1 poison, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]]
5368 ; IND-NEXT: br label [[FOR_INC]]
5370 ; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5372 ; IND-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5373 ; IND-NEXT: ret i32 [[VAR7]]
5375 ; UNROLL-LABEL: @PR32419(
5376 ; UNROLL-NEXT: entry:
5377 ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5378 ; UNROLL: vector.ph:
5379 ; UNROLL-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0
5380 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
5381 ; UNROLL: vector.body:
5382 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE8:%.*]] ]
5383 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE8]] ]
5384 ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE8]] ]
5385 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE8]] ]
5386 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5387 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer
5388 ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[VEC_IND]], <i16 -2, i16 -2>
5389 ; UNROLL-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5390 ; UNROLL-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
5391 ; UNROLL-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
5392 ; UNROLL-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5393 ; UNROLL: pred.urem.if:
5394 ; UNROLL-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], -20
5395 ; UNROLL-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5396 ; UNROLL-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i64 0
5397 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE]]
5398 ; UNROLL: pred.urem.continue:
5399 ; UNROLL-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5400 ; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
5401 ; UNROLL-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]]
5402 ; UNROLL: pred.urem.if3:
5403 ; UNROLL-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19
5404 ; UNROLL-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5405 ; UNROLL-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i64 1
5406 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE4]]
5407 ; UNROLL: pred.urem.continue4:
5408 ; UNROLL-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF3]] ]
5409 ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i64 0
5410 ; UNROLL-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]]
5411 ; UNROLL: pred.urem.if5:
5412 ; UNROLL-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18
5413 ; UNROLL-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5414 ; UNROLL-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i64 0
5415 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE6]]
5416 ; UNROLL: pred.urem.continue6:
5417 ; UNROLL-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE4]] ], [ [[TMP19]], [[PRED_UREM_IF5]] ]
5418 ; UNROLL-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i64 1
5419 ; UNROLL-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8]]
5420 ; UNROLL: pred.urem.if7:
5421 ; UNROLL-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17
5422 ; UNROLL-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5423 ; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i64 1
5424 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE8]]
5425 ; UNROLL: pred.urem.continue8:
5426 ; UNROLL-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE6]] ], [ [[TMP24]], [[PRED_UREM_IF7]] ]
5427 ; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]]
5428 ; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]]
5429 ; UNROLL-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5430 ; UNROLL-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI9]] to <2 x i32>
5431 ; UNROLL-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]]
5432 ; UNROLL-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]]
5433 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5434 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 4, i16 4>
5435 ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5436 ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5437 ; UNROLL: middle.block:
5438 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]]
5439 ; UNROLL-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
5440 ; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5441 ; UNROLL: scalar.ph:
5442 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
5444 ; UNROLL-NEXT: br i1 poison, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]]
5446 ; UNROLL-NEXT: br label [[FOR_INC]]
5448 ; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5450 ; UNROLL-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5451 ; UNROLL-NEXT: ret i32 [[VAR7]]
5453 ; UNROLL-NO-IC-LABEL: @PR32419(
5454 ; UNROLL-NO-IC-NEXT: entry:
5455 ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5456 ; UNROLL-NO-IC: vector.ph:
5457 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0
5458 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
5459 ; UNROLL-NO-IC: vector.body:
5460 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE8:%.*]] ]
5461 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE8]] ]
5462 ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE8]] ]
5463 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE8]] ]
5464 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
5465 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]]
5466 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
5467 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer
5468 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD]], zeroinitializer
5469 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5470 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
5471 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
5472 ; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5473 ; UNROLL-NO-IC: pred.urem.if:
5474 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], 0
5475 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5476 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i32 0
5477 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE]]
5478 ; UNROLL-NO-IC: pred.urem.continue:
5479 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5480 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
5481 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]]
5482 ; UNROLL-NO-IC: pred.urem.if3:
5483 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], 1
5484 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5485 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i32 1
5486 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE4]]
5487 ; UNROLL-NO-IC: pred.urem.continue4:
5488 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF3]] ]
5489 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
5490 ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]]
5491 ; UNROLL-NO-IC: pred.urem.if5:
5492 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], 2
5493 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5494 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0
5495 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE6]]
5496 ; UNROLL-NO-IC: pred.urem.continue6:
5497 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE4]] ], [ [[TMP19]], [[PRED_UREM_IF5]] ]
5498 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
5499 ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8]]
5500 ; UNROLL-NO-IC: pred.urem.if7:
5501 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], 3
5502 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5503 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i32 1
5504 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE8]]
5505 ; UNROLL-NO-IC: pred.urem.continue8:
5506 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE6]] ], [ [[TMP24]], [[PRED_UREM_IF7]] ]
5507 ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]]
5508 ; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]]
5509 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5510 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI9]] to <2 x i32>
5511 ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]]
5512 ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]]
5513 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5514 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], <i16 2, i16 2>
5515 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5516 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5517 ; UNROLL-NO-IC: middle.block:
5518 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]]
5519 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
5520 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 20, 20
5521 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5522 ; UNROLL-NO-IC: scalar.ph:
5523 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5524 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5525 ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
5526 ; UNROLL-NO-IC: for.body:
5527 ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5528 ; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5529 ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16
5530 ; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5531 ; UNROLL-NO-IC-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5532 ; UNROLL-NO-IC: for.cond:
5533 ; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5534 ; UNROLL-NO-IC-NEXT: br label [[FOR_INC]]
5535 ; UNROLL-NO-IC: for.inc:
5536 ; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5537 ; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5538 ; UNROLL-NO-IC-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5539 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
5540 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5541 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5542 ; UNROLL-NO-IC: for.end:
5543 ; UNROLL-NO-IC-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5544 ; UNROLL-NO-IC-NEXT: ret i32 [[VAR7]]
5546 ; INTERLEAVE-LABEL: @PR32419(
5547 ; INTERLEAVE-NEXT: entry:
5548 ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5549 ; INTERLEAVE: vector.ph:
5550 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[A:%.*]], i64 0
5551 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
5552 ; INTERLEAVE: vector.body:
5553 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE16:%.*]] ]
5554 ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UREM_CONTINUE16]] ]
5555 ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UREM_CONTINUE16]] ]
5556 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -20, i16 -19, i16 -18, i16 -17>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE16]] ]
5557 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5558 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[VEC_IND]], zeroinitializer
5559 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq <4 x i16> [[VEC_IND]], <i16 -4, i16 -4, i16 -4, i16 -4>
5560 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], <i1 true, i1 true, i1 true, i1 true>
5561 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
5562 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
5563 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5564 ; INTERLEAVE: pred.urem.if:
5565 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], -20
5566 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5567 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = insertelement <4 x i16> poison, i16 [[TMP8]], i64 0
5568 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE]]
5569 ; INTERLEAVE: pred.urem.continue:
5570 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = phi <4 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5571 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
5572 ; INTERLEAVE-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]]
5573 ; INTERLEAVE: pred.urem.if3:
5574 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19
5575 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5576 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP10]], i16 [[TMP13]], i64 1
5577 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE4]]
5578 ; INTERLEAVE: pred.urem.continue4:
5579 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF3]] ]
5580 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
5581 ; INTERLEAVE-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]]
5582 ; INTERLEAVE: pred.urem.if5:
5583 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18
5584 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5585 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP18]], i64 2
5586 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE6]]
5587 ; INTERLEAVE: pred.urem.continue6:
5588 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = phi <4 x i16> [ [[TMP15]], [[PRED_UREM_CONTINUE4]] ], [ [[TMP19]], [[PRED_UREM_IF5]] ]
5589 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
5590 ; INTERLEAVE-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
5591 ; INTERLEAVE: pred.urem.if7:
5592 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17
5593 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5594 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP23]], i64 3
5595 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE8]]
5596 ; INTERLEAVE: pred.urem.continue8:
5597 ; INTERLEAVE-NEXT: [[TMP25:%.*]] = phi <4 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE6]] ], [ [[TMP24]], [[PRED_UREM_IF7]] ]
5598 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
5599 ; INTERLEAVE-NEXT: br i1 [[TMP26]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
5600 ; INTERLEAVE: pred.urem.if9:
5601 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = add i16 [[TMP1]], -16
5602 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = urem i16 [[B]], [[TMP27]]
5603 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i64 0
5604 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE10]]
5605 ; INTERLEAVE: pred.urem.continue10:
5606 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = phi <4 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP29]], [[PRED_UREM_IF9]] ]
5607 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
5608 ; INTERLEAVE-NEXT: br i1 [[TMP31]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]]
5609 ; INTERLEAVE: pred.urem.if11:
5610 ; INTERLEAVE-NEXT: [[TMP32:%.*]] = add i16 [[TMP1]], -15
5611 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = urem i16 [[B]], [[TMP32]]
5612 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP33]], i64 1
5613 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE12]]
5614 ; INTERLEAVE: pred.urem.continue12:
5615 ; INTERLEAVE-NEXT: [[TMP35:%.*]] = phi <4 x i16> [ [[TMP30]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP34]], [[PRED_UREM_IF11]] ]
5616 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
5617 ; INTERLEAVE-NEXT: br i1 [[TMP36]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]]
5618 ; INTERLEAVE: pred.urem.if13:
5619 ; INTERLEAVE-NEXT: [[TMP37:%.*]] = add i16 [[TMP1]], -14
5620 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = urem i16 [[B]], [[TMP37]]
5621 ; INTERLEAVE-NEXT: [[TMP39:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP38]], i64 2
5622 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE14]]
5623 ; INTERLEAVE: pred.urem.continue14:
5624 ; INTERLEAVE-NEXT: [[TMP40:%.*]] = phi <4 x i16> [ [[TMP35]], [[PRED_UREM_CONTINUE12]] ], [ [[TMP39]], [[PRED_UREM_IF13]] ]
5625 ; INTERLEAVE-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
5626 ; INTERLEAVE-NEXT: br i1 [[TMP41]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16]]
5627 ; INTERLEAVE: pred.urem.if15:
5628 ; INTERLEAVE-NEXT: [[TMP42:%.*]] = add i16 [[TMP1]], -13
5629 ; INTERLEAVE-NEXT: [[TMP43:%.*]] = urem i16 [[B]], [[TMP42]]
5630 ; INTERLEAVE-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP43]], i64 3
5631 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE16]]
5632 ; INTERLEAVE: pred.urem.continue16:
5633 ; INTERLEAVE-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP40]], [[PRED_UREM_CONTINUE14]] ], [ [[TMP44]], [[PRED_UREM_IF15]] ]
5634 ; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> zeroinitializer, <4 x i16> [[TMP25]]
5635 ; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP45]]
5636 ; INTERLEAVE-NEXT: [[TMP46:%.*]] = sext <4 x i16> [[PREDPHI]] to <4 x i32>
5637 ; INTERLEAVE-NEXT: [[TMP47:%.*]] = sext <4 x i16> [[PREDPHI17]] to <4 x i32>
5638 ; INTERLEAVE-NEXT: [[TMP48]] = or <4 x i32> [[VEC_PHI]], [[TMP46]]
5639 ; INTERLEAVE-NEXT: [[TMP49]] = or <4 x i32> [[VEC_PHI1]], [[TMP47]]
5640 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5641 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 8, i16 8, i16 8, i16 8>
5642 ; INTERLEAVE-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5643 ; INTERLEAVE-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5644 ; INTERLEAVE: middle.block:
5645 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP49]], [[TMP48]]
5646 ; INTERLEAVE-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]])
5647 ; INTERLEAVE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5648 ; INTERLEAVE: scalar.ph:
5649 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5650 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
5651 ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
5652 ; INTERLEAVE: for.body:
5653 ; INTERLEAVE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5654 ; INTERLEAVE-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5655 ; INTERLEAVE-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16
5656 ; INTERLEAVE-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5657 ; INTERLEAVE-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5658 ; INTERLEAVE: for.cond:
5659 ; INTERLEAVE-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5660 ; INTERLEAVE-NEXT: br label [[FOR_INC]]
5661 ; INTERLEAVE: for.inc:
5662 ; INTERLEAVE-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5663 ; INTERLEAVE-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5664 ; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5665 ; INTERLEAVE-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
5666 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5667 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5668 ; INTERLEAVE: for.end:
5669 ; INTERLEAVE-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
5670 ; INTERLEAVE-NEXT: ret i32 [[VAR7]]
5676 %i = phi i32 [ -20, %entry ], [ %i.next, %for.inc ]
5677 %var0 = phi i32 [ %a, %entry ], [ %var6, %for.inc ]
5678 %var1 = trunc i32 %i to i16
5679 %var2 = icmp eq i16 %var1, 0
5680 br i1 %var2, label %for.inc, label %for.cond
5683 %var3 = urem i16 %b, %var1
5687 %var4 = phi i16 [ %var3, %for.cond ], [ 0, %for.body ]
5688 %var5 = sext i16 %var4 to i32
5689 %var6 = or i32 %var0, %var5
5690 %i.next = add nsw i32 %i, 1
5691 %cond = icmp eq i32 %i.next, 0
5692 br i1 %cond, label %for.end, label %for.body
5695 %var7 = phi i32 [ %var6, %for.inc ]
5699 ; Ensure that the shuffle vector for first order recurrence is inserted
5700 ; correctly after all the phis. These new phis correspond to new IVs
5701 ; that are generated by optimizing non-free truncs of IVs to IVs themselves.
5702 ; This also ensures the first-order recurrence splice recipe is placed
5703 ; correctly if it is fed by an induction.
5704 define i64 @trunc_with_first_order_recurrence() {
5705 ; CHECK-LABEL: @trunc_with_first_order_recurrence(
5706 ; CHECK-NEXT: entry:
5707 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5709 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5710 ; CHECK: vector.body:
5711 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5712 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
5713 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5714 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
5715 ; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5716 ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
5717 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
5718 ; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
5719 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
5720 ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]]
5721 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
5722 ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
5723 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
5724 ; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], <i32 1, i32 1>
5725 ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
5726 ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5727 ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
5728 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5729 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5730 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
5731 ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5732 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5733 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5734 ; CHECK: middle.block:
5735 ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
5736 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 113, 112
5737 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1
5738 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 0
5739 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5741 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5742 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5743 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5744 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5745 ; CHECK-NEXT: br label [[LOOP:%.*]]
5747 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5748 ; CHECK-NEXT: ret i64 [[DOTLCSSA]]
5750 ; CHECK-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5751 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5752 ; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5753 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5754 ; CHECK-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5755 ; CHECK-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]]
5756 ; CHECK-NEXT: [[C9:%.*]] = add i32 [[C8]], 42
5757 ; CHECK-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5758 ; CHECK-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]]
5759 ; CHECK-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64
5760 ; CHECK-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]]
5761 ; CHECK-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5762 ; CHECK-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5763 ; CHECK-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]]
5764 ; CHECK-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64
5765 ; CHECK-NEXT: [[C23]] = add i64 [[C13]], [[C16]]
5766 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5767 ; CHECK-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
5768 ; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5769 ; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5771 ; IND-LABEL: @trunc_with_first_order_recurrence(
5773 ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5775 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
5777 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5778 ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
5779 ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5780 ; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
5781 ; IND-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5782 ; IND-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
5783 ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
5784 ; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
5785 ; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
5786 ; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]]
5787 ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
5788 ; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
5789 ; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
5790 ; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], <i32 1, i32 1>
5791 ; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
5792 ; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5793 ; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
5794 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5795 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5796 ; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
5797 ; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5798 ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5799 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5800 ; IND: middle.block:
5801 ; IND-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
5802 ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i64 1
5803 ; IND-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
5805 ; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5806 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5807 ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5808 ; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5809 ; IND-NEXT: br label [[LOOP:%.*]]
5811 ; IND-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5812 ; IND-NEXT: ret i64 [[DOTLCSSA]]
5814 ; IND-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5815 ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5816 ; IND-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5817 ; IND-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5818 ; IND-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5819 ; IND-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]]
5820 ; IND-NEXT: [[C9:%.*]] = add i32 [[C8]], 42
5821 ; IND-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5822 ; IND-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]]
5823 ; IND-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64
5824 ; IND-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]]
5825 ; IND-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5826 ; IND-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5827 ; IND-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]]
5828 ; IND-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64
5829 ; IND-NEXT: [[C23]] = add i64 [[C13]], [[C16]]
5830 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5831 ; IND-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
5832 ; IND-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5833 ; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5835 ; UNROLL-LABEL: @trunc_with_first_order_recurrence(
5836 ; UNROLL-NEXT: entry:
5837 ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5838 ; UNROLL: vector.ph:
5839 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
5840 ; UNROLL: vector.body:
5841 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5842 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
5843 ; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
5844 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5845 ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD5:%.*]], [[VECTOR_BODY]] ]
5846 ; UNROLL-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
5847 ; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
5848 ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5849 ; UNROLL-NEXT: [[STEP_ADD5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5850 ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2>
5851 ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND4]], <2 x i32> [[STEP_ADD5]], <2 x i32> <i32 1, i32 2>
5852 ; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND4]]
5853 ; UNROLL-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD5]]
5854 ; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
5855 ; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
5856 ; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]]
5857 ; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD5]]
5858 ; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
5859 ; UNROLL-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
5860 ; UNROLL-NEXT: [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5861 ; UNROLL-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
5862 ; UNROLL-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
5863 ; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
5864 ; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND8]], <i32 1, i32 1>
5865 ; UNROLL-NEXT: [[STEP_ADD9:%.*]] = shl <2 x i32> [[VEC_IND8]], <i32 1, i32 1>
5866 ; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD9]], <i32 4, i32 4>
5867 ; UNROLL-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
5868 ; UNROLL-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
5869 ; UNROLL-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
5870 ; UNROLL-NEXT: [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64>
5871 ; UNROLL-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
5872 ; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
5873 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5874 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5875 ; UNROLL-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND4]], <i32 4, i32 4>
5876 ; UNROLL-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 4, i32 4>
5877 ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5878 ; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5879 ; UNROLL: middle.block:
5880 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
5881 ; UNROLL-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
5882 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD5]], i64 1
5883 ; UNROLL-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
5884 ; UNROLL: scalar.ph:
5885 ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5886 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5887 ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5888 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
5889 ; UNROLL-NEXT: br label [[LOOP:%.*]]
5891 ; UNROLL-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
5892 ; UNROLL-NEXT: ret i64 [[DOTLCSSA]]
5894 ; UNROLL-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5895 ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5896 ; UNROLL-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5897 ; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5898 ; UNROLL-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5899 ; UNROLL-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]]
5900 ; UNROLL-NEXT: [[C9:%.*]] = add i32 [[C8]], 42
5901 ; UNROLL-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5902 ; UNROLL-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]]
5903 ; UNROLL-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64
5904 ; UNROLL-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]]
5905 ; UNROLL-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5906 ; UNROLL-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5907 ; UNROLL-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]]
5908 ; UNROLL-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64
5909 ; UNROLL-NEXT: [[C23]] = add i64 [[C13]], [[C16]]
5910 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5911 ; UNROLL-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
5912 ; UNROLL-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5913 ; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5915 ; UNROLL-NO-IC-LABEL: @trunc_with_first_order_recurrence(
5916 ; UNROLL-NO-IC-NEXT: entry:
5917 ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5918 ; UNROLL-NO-IC: vector.ph:
5919 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
5920 ; UNROLL-NO-IC: vector.body:
5921 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5922 ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
5923 ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
5924 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5925 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD5:%.*]], [[VECTOR_BODY]] ]
5926 ; UNROLL-NO-IC-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
5927 ; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
5928 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5929 ; UNROLL-NO-IC-NEXT: [[STEP_ADD5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5930 ; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2>
5931 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2>
5932 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND4]], <2 x i32> [[STEP_ADD5]], <2 x i32> <i32 1, i32 2>
5933 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND4]]
5934 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD5]]
5935 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
5936 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
5937 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]]
5938 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD5]]
5939 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
5940 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
5941 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5942 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
5943 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
5944 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
5945 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND8]], <i32 1, i32 1>
5946 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD9]], <i32 1, i32 1>
5947 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
5948 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
5949 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
5950 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64>
5951 ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
5952 ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
5953 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5954 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
5955 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[STEP_ADD5]], <i32 2, i32 2>
5956 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 2, i32 2>
5957 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5958 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5959 ; UNROLL-NO-IC: middle.block:
5960 ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
5961 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
5962 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 113, 112
5963 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD5]], i32 1
5964 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD5]], i32 0
5965 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5966 ; UNROLL-NO-IC: scalar.ph:
5967 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5968 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5969 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5970 ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
5971 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
5972 ; UNROLL-NO-IC: exit:
5973 ; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
5974 ; UNROLL-NO-IC-NEXT: ret i64 [[DOTLCSSA]]
5975 ; UNROLL-NO-IC: loop:
5976 ; UNROLL-NO-IC-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5977 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5978 ; UNROLL-NO-IC-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5979 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5980 ; UNROLL-NO-IC-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5981 ; UNROLL-NO-IC-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]]
5982 ; UNROLL-NO-IC-NEXT: [[C9:%.*]] = add i32 [[C8]], 42
5983 ; UNROLL-NO-IC-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5984 ; UNROLL-NO-IC-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]]
5985 ; UNROLL-NO-IC-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64
5986 ; UNROLL-NO-IC-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]]
5987 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5988 ; UNROLL-NO-IC-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5989 ; UNROLL-NO-IC-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]]
5990 ; UNROLL-NO-IC-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64
5991 ; UNROLL-NO-IC-NEXT: [[C23]] = add i64 [[C13]], [[C16]]
5992 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5993 ; UNROLL-NO-IC-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
5994 ; UNROLL-NO-IC-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5995 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5997 ; INTERLEAVE-LABEL: @trunc_with_first_order_recurrence(
5998 ; INTERLEAVE-NEXT: entry:
5999 ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6000 ; INTERLEAVE: vector.ph:
6001 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
6002 ; INTERLEAVE: vector.body:
6003 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6004 ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
6005 ; INTERLEAVE-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
6006 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6007 ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD5:%.*]], [[VECTOR_BODY]] ]
6008 ; INTERLEAVE-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
6009 ; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
6010 ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
6011 ; INTERLEAVE-NEXT: [[STEP_ADD5]] = add <4 x i32> [[VEC_IND4]], <i32 4, i32 4, i32 4, i32 4>
6012 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6013 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND4]], <4 x i32> [[STEP_ADD5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6014 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND4]]
6015 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD]], [[STEP_ADD5]]
6016 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], <i32 42, i32 42, i32 42, i32 42>
6017 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], <i32 42, i32 42, i32 42, i32 42>
6018 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND4]]
6019 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD5]]
6020 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]]
6021 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP5]]
6022 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[TMP8]] to <4 x i64>
6023 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
6024 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]]
6025 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]]
6026 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND8]], <i32 1, i32 1, i32 1, i32 1>
6027 ; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = shl <4 x i32> [[VEC_IND8]], <i32 1, i32 1, i32 1, i32 1>
6028 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD9]], <i32 8, i32 8, i32 8, i32 8>
6029 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]]
6030 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]]
6031 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64>
6032 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = sext <4 x i32> [[TMP17]] to <4 x i64>
6033 ; INTERLEAVE-NEXT: [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]]
6034 ; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]]
6035 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6036 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
6037 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND4]], <i32 8, i32 8, i32 8, i32 8>
6038 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 8, i32 8, i32 8, i32 8>
6039 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
6040 ; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
6041 ; INTERLEAVE: middle.block:
6042 ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP21]], [[TMP20]]
6043 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]])
6044 ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD5]], i64 3
6045 ; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
6046 ; INTERLEAVE: scalar.ph:
6047 ; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6048 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6049 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6050 ; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6051 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
6053 ; INTERLEAVE-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6054 ; INTERLEAVE-NEXT: ret i64 [[DOTLCSSA]]
6056 ; INTERLEAVE-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
6057 ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6058 ; INTERLEAVE-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
6059 ; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
6060 ; INTERLEAVE-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32
6061 ; INTERLEAVE-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]]
6062 ; INTERLEAVE-NEXT: [[C9:%.*]] = add i32 [[C8]], 42
6063 ; INTERLEAVE-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
6064 ; INTERLEAVE-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]]
6065 ; INTERLEAVE-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64
6066 ; INTERLEAVE-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]]
6067 ; INTERLEAVE-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
6068 ; INTERLEAVE-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
6069 ; INTERLEAVE-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]]
6070 ; INTERLEAVE-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64
6071 ; INTERLEAVE-NEXT: [[C23]] = add i64 [[C13]], [[C16]]
6072 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6073 ; INTERLEAVE-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
6074 ; INTERLEAVE-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
6075 ; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
6080 exit: ; preds = %loop
6081 %.lcssa = phi i64 [ %c23, %loop ]
6084 loop: ; preds = %loop, %entry
6085 %c5 = phi i64 [ %c23, %loop ], [ 0, %entry ]
6086 %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 1, %entry ]
6087 %x = phi i32 [ %c24, %loop ], [ 1, %entry ]
6088 %y = phi i32 [ %c6, %loop ], [ 42, %entry ]
6089 %c6 = trunc i64 %indvars.iv to i32
6090 %c8 = mul i32 %x, %c6
6091 %c9 = add i32 %c8, 42
6092 %c10 = add i32 %y, %c6
6093 %c11 = add i32 %c10, %c9
6094 %c12 = sext i32 %c11 to i64
6095 %c13 = add i64 %c5, %c12
6096 %indvars.iv.tr = trunc i64 %indvars.iv to i32
6097 %c14 = shl i32 %indvars.iv.tr, 1
6098 %c15 = add i32 %c9, %c14
6099 %c16 = sext i32 %c15 to i64
6100 %c23 = add i64 %c13, %c16
6101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
6102 %c24 = add nuw nsw i32 %x, 1
6103 %exitcond.i = icmp eq i64 %indvars.iv.next, 114
6104 br i1 %exitcond.i, label %exit, label %loop
6108 ; Test case for PR52460.
6109 define void @pr52460_first_order_recurrence_truncated_iv(i32* noalias %src, i32* %dst) {
6111 ; CHECK-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6112 ; CHECK-NEXT: entry:
6113 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6115 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
6116 ; CHECK: vector.body:
6117 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6118 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6119 ; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6120 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
6121 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
6122 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6123 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6124 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
6125 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6126 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP2]]
6127 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]]
6128 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP4]]
6129 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 0
6130 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>*
6131 ; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP8]], align 4
6132 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6133 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
6134 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6135 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6136 ; CHECK: middle.block:
6137 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100
6138 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1
6139 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0
6140 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6142 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6143 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6144 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6145 ; CHECK-NEXT: br label [[LOOP:%.*]]
6147 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6148 ; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6149 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6150 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6151 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6152 ; CHECK-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6153 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6154 ; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6155 ; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]]
6156 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
6157 ; CHECK-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4
6158 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6159 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6161 ; CHECK-NEXT: ret void
6163 ; IND-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6165 ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6167 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
6169 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6170 ; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6171 ; IND-NEXT: [[VEC_IND]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6172 ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6173 ; IND-NEXT: [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6174 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
6175 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6176 ; IND-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6177 ; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6178 ; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32
6179 ; IND-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP3]]
6180 ; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]]
6181 ; IND-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
6182 ; IND-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
6183 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6184 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
6185 ; IND-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6186 ; IND-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6187 ; IND: middle.block:
6188 ; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6190 ; IND-NEXT: br label [[LOOP:%.*]]
6192 ; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6194 ; IND-NEXT: ret void
6196 ; UNROLL-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6197 ; UNROLL-NEXT: entry:
6198 ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6199 ; UNROLL: vector.ph:
6200 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
6201 ; UNROLL: vector.body:
6202 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6203 ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6204 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6205 ; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
6206 ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6207 ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6208 ; UNROLL-NEXT: [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6209 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
6210 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6211 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
6212 ; UNROLL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT3]], <2 x i32> poison, <2 x i32> zeroinitializer
6213 ; UNROLL-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6214 ; UNROLL-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
6215 ; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6216 ; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6217 ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]]
6218 ; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP3]]
6219 ; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP4]]
6220 ; UNROLL-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
6221 ; UNROLL-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP9]], align 4
6222 ; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 2
6223 ; UNROLL-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <2 x i32>*
6224 ; UNROLL-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP11]], align 4
6225 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6226 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
6227 ; UNROLL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6228 ; UNROLL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6229 ; UNROLL: middle.block:
6230 ; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6231 ; UNROLL: scalar.ph:
6232 ; UNROLL-NEXT: br label [[LOOP:%.*]]
6234 ; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6236 ; UNROLL-NEXT: ret void
6238 ; UNROLL-NO-IC-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6239 ; UNROLL-NO-IC-NEXT: entry:
6240 ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6241 ; UNROLL-NO-IC: vector.ph:
6242 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
6243 ; UNROLL-NO-IC: vector.body:
6244 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6245 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6246 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6247 ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
6248 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
6249 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
6250 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2
6251 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6252 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6253 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6254 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
6255 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6256 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = load i32, i32* [[SRC]], align 4
6257 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0
6258 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT3]], <2 x i32> poison, <2 x i32> zeroinitializer
6259 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP3]]
6260 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP4]]
6261 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]]
6262 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP2]]
6263 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP7]]
6264 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP8]]
6265 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP9]], i32 0
6266 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
6267 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP11]], <2 x i32>* [[TMP14]], align 4
6268 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2
6269 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>*
6270 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP12]], <2 x i32>* [[TMP16]], align 4
6271 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6272 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
6273 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6274 ; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6275 ; UNROLL-NO-IC: middle.block:
6276 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100
6277 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1
6278 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 0
6279 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6280 ; UNROLL-NO-IC: scalar.ph:
6281 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6282 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6283 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6284 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
6285 ; UNROLL-NO-IC: loop:
6286 ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6287 ; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6288 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6289 ; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6290 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6291 ; UNROLL-NO-IC-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6292 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6293 ; UNROLL-NO-IC-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6294 ; UNROLL-NO-IC-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]]
6295 ; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
6296 ; UNROLL-NO-IC-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4
6297 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6298 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6299 ; UNROLL-NO-IC: exit:
6300 ; UNROLL-NO-IC-NEXT: ret void
6302 ; INTERLEAVE-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6303 ; INTERLEAVE-NEXT: entry:
6304 ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6305 ; INTERLEAVE: vector.ph:
6306 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
6307 ; INTERLEAVE: vector.body:
6308 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6309 ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6310 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6311 ; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
6312 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6313 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6314 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6315 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
6316 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
6317 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
6318 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
6319 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6320 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
6321 ; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6322 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6323 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]]
6324 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP3]]
6325 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD]], [[TMP4]]
6326 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
6327 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4
6328 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 4
6329 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
6330 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
6331 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6332 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
6333 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
6334 ; INTERLEAVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6335 ; INTERLEAVE: middle.block:
6336 ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3
6337 ; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
6338 ; INTERLEAVE: scalar.ph:
6339 ; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6340 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ]
6341 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ]
6342 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
6344 ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6345 ; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6346 ; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6347 ; INTERLEAVE-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6348 ; INTERLEAVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6349 ; INTERLEAVE-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6350 ; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6351 ; INTERLEAVE-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6352 ; INTERLEAVE-NEXT: [[SEXT5:%.*]] = shl i64 [[IV]], 32
6353 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = ashr exact i64 [[SEXT5]], 32
6354 ; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]]
6355 ; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
6356 ; INTERLEAVE-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4
6357 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6358 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6360 ; INTERLEAVE-NEXT: ret void
6366 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
6367 %trunc.iv = phi i32 [ 0, %entry ], [ %trunc.iv.next, %loop ]
6368 %recur = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
6369 %lv = load i32, i32* %src, align 4
6370 %mul = mul nsw i32 %lv, %recur
6371 %trunc.iv.next = add i32 %trunc.iv, 1
6372 %iv.next = add nuw nsw i64 %iv, 1
6373 %iv.trunc = trunc i64 %iv to i32
6374 %dst.gep = getelementptr i32, i32* %dst, i32 %iv.trunc
6375 %add = add i32 %iv.trunc, %mul
6376 store i32 %add, i32* %dst.gep
6377 %exitcond = icmp eq i32 %trunc.iv.next, 100
6378 br i1 %exitcond, label %exit, label %loop
6384 ; Test case where %iv.2.ext and %iv.2.conv become redundant due to the SCEV
6385 ; predicates generated for the vector loop. They should be removed in the
6387 define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n, i32 %step, i32* %ptr) {
6389 ; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6390 ; CHECK-NEXT: entry:
6391 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
6392 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6393 ; CHECK: vector.scevcheck:
6394 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
6395 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6396 ; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6397 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6398 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6399 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6400 ; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6401 ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6402 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6403 ; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
6404 ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6405 ; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
6406 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]]
6407 ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
6408 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
6409 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
6410 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
6411 ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
6412 ; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32
6413 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]]
6414 ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]]
6415 ; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6417 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
6418 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
6419 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
6420 ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_VTC]], [[STEP]]
6421 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
6422 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6423 ; CHECK-NEXT: [[TMP17:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
6424 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]]
6425 ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2
6426 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
6427 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6428 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
6429 ; CHECK: vector.body:
6430 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6431 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6432 ; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6433 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0
6434 ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6435 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP19]]
6436 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0
6437 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <2 x i32>*
6438 ; CHECK-NEXT: store <2 x i32> [[TMP20]], <2 x i32>* [[TMP23]], align 4
6439 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6440 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6441 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6442 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6443 ; CHECK: middle.block:
6444 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
6445 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1
6446 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0
6447 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6449 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6450 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6451 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6452 ; CHECK-NEXT: br label [[LOOP:%.*]]
6454 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6455 ; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6456 ; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6457 ; CHECK-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6458 ; CHECK-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6459 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6460 ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6461 ; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6462 ; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6463 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6464 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6466 ; CHECK-NEXT: ret void
6468 ; IND-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6470 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
6471 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6472 ; IND: vector.scevcheck:
6473 ; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
6474 ; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6475 ; IND-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
6476 ; IND-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
6477 ; IND-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
6478 ; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
6479 ; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6480 ; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6481 ; IND-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6482 ; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6483 ; IND-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
6484 ; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
6485 ; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6486 ; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6487 ; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6488 ; IND-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6489 ; IND-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
6490 ; IND-NEXT: [[TMP14:%.*]] = icmp ult i32 [[TMP13]], -256
6491 ; IND-NEXT: [[TMP15:%.*]] = or i1 [[TMP12]], [[TMP14]]
6492 ; IND-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6494 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2
6495 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
6496 ; IND-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_VTC]], [[STEP]]
6497 ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
6498 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6499 ; IND-NEXT: [[TMP16:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
6500 ; IND-NEXT: [[TMP17:%.*]] = shl i32 [[STEP]], 1
6501 ; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i64 0
6502 ; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6503 ; IND-NEXT: br label [[VECTOR_BODY:%.*]]
6505 ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6506 ; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6507 ; IND-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[TMP16]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6508 ; IND-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6509 ; IND-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6510 ; IND-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>*
6511 ; IND-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP20]], align 4
6512 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6513 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6514 ; IND-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6515 ; IND-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6516 ; IND: middle.block:
6517 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6518 ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1
6519 ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6521 ; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6522 ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6523 ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6524 ; IND-NEXT: br label [[LOOP:%.*]]
6526 ; IND-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6527 ; IND-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6528 ; IND-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6529 ; IND-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6530 ; IND-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6531 ; IND-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6532 ; IND-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6533 ; IND-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6534 ; IND-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6535 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6536 ; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6538 ; IND-NEXT: ret void
6540 ; UNROLL-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6541 ; UNROLL-NEXT: entry:
6542 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
6543 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6544 ; UNROLL: vector.scevcheck:
6545 ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
6546 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6547 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
6548 ; UNROLL-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
6549 ; UNROLL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
6550 ; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
6551 ; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6552 ; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6553 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6554 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6555 ; UNROLL-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
6556 ; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
6557 ; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6558 ; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6559 ; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6560 ; UNROLL-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6561 ; UNROLL-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
6562 ; UNROLL-NEXT: [[TMP14:%.*]] = icmp ult i32 [[TMP13]], -256
6563 ; UNROLL-NEXT: [[TMP15:%.*]] = or i1 [[TMP12]], [[TMP14]]
6564 ; UNROLL-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6565 ; UNROLL: vector.ph:
6566 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
6567 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
6568 ; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_VTC]], [[STEP]]
6569 ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
6570 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6571 ; UNROLL-NEXT: [[TMP16:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
6572 ; UNROLL-NEXT: [[TMP17:%.*]] = shl i32 [[STEP]], 1
6573 ; UNROLL-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i64 0
6574 ; UNROLL-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6575 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
6576 ; UNROLL: vector.body:
6577 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6578 ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6579 ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[TMP16]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6580 ; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6581 ; UNROLL-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6582 ; UNROLL-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6583 ; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6584 ; UNROLL-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>*
6585 ; UNROLL-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 4
6586 ; UNROLL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 2
6587 ; UNROLL-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <2 x i32>*
6588 ; UNROLL-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP23]], align 4
6589 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6590 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6591 ; UNROLL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6592 ; UNROLL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6593 ; UNROLL: middle.block:
6594 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6595 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1
6596 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6597 ; UNROLL: scalar.ph:
6598 ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6599 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6600 ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6601 ; UNROLL-NEXT: br label [[LOOP:%.*]]
6603 ; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6604 ; UNROLL-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6605 ; UNROLL-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6606 ; UNROLL-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6607 ; UNROLL-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6608 ; UNROLL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6609 ; UNROLL-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6610 ; UNROLL-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6611 ; UNROLL-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6612 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6613 ; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6615 ; UNROLL-NEXT: ret void
6617 ; UNROLL-NO-IC-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6618 ; UNROLL-NO-IC-NEXT: entry:
6619 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
6620 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6621 ; UNROLL-NO-IC: vector.scevcheck:
6622 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
6623 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6624 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6625 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6626 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6627 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6628 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6629 ; UNROLL-NO-IC-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6630 ; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6631 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
6632 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6633 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
6634 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]]
6635 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
6636 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
6637 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
6638 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
6639 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
6640 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32
6641 ; UNROLL-NO-IC-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]]
6642 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]]
6643 ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6644 ; UNROLL-NO-IC: vector.ph:
6645 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
6646 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
6647 ; UNROLL-NO-IC-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
6648 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_VTC]], [[STEP]]
6649 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
6650 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6651 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
6652 ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]]
6653 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2
6654 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
6655 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6656 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
6657 ; UNROLL-NO-IC: vector.body:
6658 ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6659 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6660 ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6661 ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6662 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0
6663 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2
6664 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6665 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6666 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP19]]
6667 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP20]]
6668 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
6669 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <2 x i32>*
6670 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], <2 x i32>* [[TMP26]], align 4
6671 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 2
6672 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to <2 x i32>*
6673 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP28]], align 4
6674 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6675 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6676 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6677 ; UNROLL-NO-IC-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6678 ; UNROLL-NO-IC: middle.block:
6679 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
6680 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1
6681 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 0
6682 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6683 ; UNROLL-NO-IC: scalar.ph:
6684 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6685 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6686 ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6687 ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
6688 ; UNROLL-NO-IC: loop:
6689 ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6690 ; UNROLL-NO-IC-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6691 ; UNROLL-NO-IC-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6692 ; UNROLL-NO-IC-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6693 ; UNROLL-NO-IC-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6694 ; UNROLL-NO-IC-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6695 ; UNROLL-NO-IC-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6696 ; UNROLL-NO-IC-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6697 ; UNROLL-NO-IC-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6698 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6699 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6700 ; UNROLL-NO-IC: exit:
6701 ; UNROLL-NO-IC-NEXT: ret void
6703 ; INTERLEAVE-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6704 ; INTERLEAVE-NEXT: entry:
6705 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
6706 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6707 ; INTERLEAVE: vector.scevcheck:
6708 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
6709 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6710 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
6711 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
6712 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
6713 ; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
6714 ; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6715 ; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6716 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6717 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6718 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
6719 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
6720 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6721 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6722 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6723 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6724 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
6725 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp ult i32 [[TMP13]], -256
6726 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i1 [[TMP12]], [[TMP14]]
6727 ; INTERLEAVE-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6728 ; INTERLEAVE: vector.ph:
6729 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
6730 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
6731 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_VTC]], [[STEP]]
6732 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
6733 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
6734 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = mul <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
6735 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = shl i32 [[STEP]], 2
6736 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0
6737 ; INTERLEAVE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
6738 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
6739 ; INTERLEAVE: vector.body:
6740 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6741 ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6742 ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[TMP16]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6743 ; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6744 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6745 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6746 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6747 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
6748 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP21]], align 4
6749 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 4
6750 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
6751 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP23]], align 4
6752 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6753 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6754 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6755 ; INTERLEAVE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6756 ; INTERLEAVE: middle.block:
6757 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6758 ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3
6759 ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6760 ; INTERLEAVE: scalar.ph:
6761 ; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6762 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6763 ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6764 ; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
6766 ; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6767 ; INTERLEAVE-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6768 ; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6769 ; INTERLEAVE-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6770 ; INTERLEAVE-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6771 ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6772 ; INTERLEAVE-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6773 ; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6774 ; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6775 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6776 ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6778 ; INTERLEAVE-NEXT: ret void
6784 %for = phi i32 [ 0, %entry ], [ %iv.2.conv, %loop ]
6785 %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ]
6786 %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ]
6787 %iv.2.ext = shl i32 %iv.2, 24
6788 %iv.2.conv = ashr exact i32 %iv.2.ext, 24
6789 %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv.1
6790 store i32 %for, i32* %gep, align 4
6791 %iv.2.next = add nsw i32 %iv.2.conv, %step
6792 %iv.1.next = add nuw nsw i64 %iv.1, 1
6793 %exitcond = icmp eq i64 %iv.1.next, %n
6794 br i1 %exitcond, label %exit, label %loop