1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 %s -S | FileCheck --check-prefix=VF2 %s
3 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 %s -S | FileCheck --check-prefix=VF4 %s
5 ; for (iv = 0, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
6 define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
7 ; VF2-LABEL: define void @ld_div1_step1_start0_ind2
8 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
10 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
14 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
15 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
16 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
17 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
18 ; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
19 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
20 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
21 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
22 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
23 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
24 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
25 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
26 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
27 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
28 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
29 ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
30 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
31 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
32 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
33 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
34 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
35 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
36 ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
37 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
41 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
42 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
43 ; VF2-NEXT: br label [[LOOP:%.*]]
45 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
46 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
47 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
48 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
49 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
50 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
51 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
52 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
53 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
54 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
55 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
56 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
57 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
58 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
62 ; VF4-LABEL: define void @ld_div1_step1_start0_ind2
63 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
65 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
67 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
69 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
70 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
71 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
72 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
73 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
74 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
75 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
76 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
77 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
78 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
79 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
80 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
81 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
82 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
83 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
84 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
85 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
86 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
87 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
88 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
89 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
90 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
91 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
92 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
93 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
94 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
95 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
96 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
97 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
98 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
99 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
100 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
102 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
104 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
105 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
106 ; VF4-NEXT: br label [[LOOP:%.*]]
108 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
109 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
110 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
111 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
112 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
113 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
114 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
115 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
116 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
117 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
118 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
119 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
120 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
121 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
128 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
129 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
130 %div1 = udiv i64 %iv, 1
131 %div2 = udiv i64 %iv2, 1
132 %add = add i64 %div1, %div2
133 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
134 %ld = load i64, ptr %gep_ld, align 8
135 %calc = add nsw i64 %ld, 42
136 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
137 store i64 %calc, ptr %gep_st, align 8
138 %iv2_next = add nsw i64 %iv2, 1
139 %iv_next = add nsw i64 %iv, 1
140 %cond = icmp eq i64 %iv_next, 1000
141 br i1 %cond, label %exit, label %loop
146 ; for (iv = 0, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
147 ; A[iv/2 + iv2/2] is uniform for VF=2 but not for VF=4.
148 define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
149 ; VF2-LABEL: define void @ld_div2_step1_start0_ind2
150 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
152 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
154 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
156 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
157 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
158 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
159 ; VF2-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 2
160 ; VF2-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 2
161 ; VF2-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
162 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
163 ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
164 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
165 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
166 ; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
167 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
168 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
169 ; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8
170 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
171 ; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
172 ; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
174 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
176 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
177 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
178 ; VF2-NEXT: br label [[LOOP:%.*]]
180 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
181 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
182 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
183 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
184 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
185 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
186 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
187 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
188 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
189 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
190 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
191 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
192 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
193 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
197 ; VF4-LABEL: define void @ld_div2_step1_start0_ind2
198 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
200 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
202 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
204 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
205 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
206 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
207 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
208 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
209 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
210 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
211 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
212 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
213 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
214 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
215 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
216 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
217 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
218 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
219 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
220 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
221 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
222 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
223 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
224 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
225 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
226 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
227 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
228 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
229 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
230 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
231 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
232 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
233 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
234 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
235 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
237 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
239 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
240 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
241 ; VF4-NEXT: br label [[LOOP:%.*]]
243 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
244 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
245 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
246 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
247 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
248 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
249 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
250 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
251 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
252 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
253 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
254 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
255 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
256 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
263 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
264 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
265 %div1 = udiv i64 %iv, 2
266 %div2 = udiv i64 %iv2, 2
267 %add = add i64 %div1, %div2
268 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
269 %ld = load i64, ptr %gep_ld, align 8
270 %calc = add nsw i64 %ld, 42
271 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
272 store i64 %calc, ptr %gep_st, align 8
273 %iv2_next = add nsw i64 %iv2, 1
274 %iv_next = add nsw i64 %iv, 1
275 %cond = icmp eq i64 %iv_next, 1000
276 br i1 %cond, label %exit, label %loop
281 ; for (iv = 0, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
282 define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
283 ; VF2-LABEL: define void @ld_div3_step1_start0_ind2
284 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
286 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
288 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
290 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
291 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
292 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
293 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
294 ; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
295 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
296 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
297 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
298 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
299 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
300 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
301 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
302 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
303 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
304 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
305 ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
306 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
307 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
308 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
309 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
310 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
311 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
312 ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
313 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
315 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
317 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
318 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
319 ; VF2-NEXT: br label [[LOOP:%.*]]
321 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
322 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
323 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
324 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
325 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
326 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
327 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
328 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
329 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
330 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
331 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
332 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
333 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
334 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
338 ; VF4-LABEL: define void @ld_div3_step1_start0_ind2
339 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
341 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
343 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
345 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
346 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
347 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
348 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
349 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
350 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
351 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
352 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
353 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
354 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
355 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
356 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
357 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
358 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
359 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
360 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
361 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
362 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
363 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
364 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
365 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
366 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
367 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
368 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
369 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
370 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
371 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
372 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
373 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
374 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
375 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
376 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
378 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
380 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
381 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
382 ; VF4-NEXT: br label [[LOOP:%.*]]
384 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
385 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
386 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
387 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
388 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
389 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
390 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
391 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
392 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
393 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
394 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
395 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
396 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
397 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
404 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
405 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
406 %div1 = udiv i64 %iv, 3
407 %div2 = udiv i64 %iv2, 3
408 %add = add i64 %div1, %div2
409 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
410 %ld = load i64, ptr %gep_ld, align 8
411 %calc = add nsw i64 %ld, 42
412 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
413 store i64 %calc, ptr %gep_st, align 8
414 %iv2_next = add nsw i64 %iv2, 1
415 %iv_next = add nsw i64 %iv, 1
416 %cond = icmp eq i64 %iv_next, 1000
417 br i1 %cond, label %exit, label %loop
422 ; for (iv = 0, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
423 define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
424 ; VF2-LABEL: define void @ld_div1_step2_start0_ind2
425 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
427 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
429 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
431 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
432 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
433 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
434 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
435 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
436 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
437 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
438 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
439 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
440 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
441 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
442 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
443 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
444 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
445 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
446 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
447 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
448 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
449 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
450 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
451 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
452 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
453 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
454 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
455 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
456 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
457 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
458 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
459 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
461 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
463 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
464 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
465 ; VF2-NEXT: br label [[LOOP:%.*]]
467 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
468 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
469 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
470 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
471 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
472 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
473 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
474 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
475 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
476 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
477 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
478 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
479 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
480 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
484 ; VF4-LABEL: define void @ld_div1_step2_start0_ind2
485 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
487 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
489 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
491 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
492 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
493 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
494 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
495 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
496 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
497 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
498 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
499 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
500 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
501 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
502 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
503 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
504 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
505 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
506 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
507 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
508 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
509 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
510 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
511 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
512 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
513 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
514 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
515 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
516 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
517 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
518 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
519 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
520 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
521 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
522 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
523 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
524 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
525 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
526 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
527 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
528 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
529 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
530 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
531 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
532 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
533 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
534 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
535 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
537 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
539 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
540 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
541 ; VF4-NEXT: br label [[LOOP:%.*]]
543 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
544 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
545 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
546 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
547 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
548 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
549 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
550 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
551 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
552 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
553 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
554 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
555 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
556 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
563 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
564 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
565 %div1 = udiv i64 %iv, 1
566 %div2 = udiv i64 %iv2, 1
567 %add = add i64 %div1, %div2
568 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
569 %ld = load i64, ptr %gep_ld, align 8
570 %calc = add nsw i64 %ld, 42
571 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
572 store i64 %calc, ptr %gep_st, align 8
573 %iv2_next = add nsw i64 %iv2, 1
574 %iv_next = add nsw i64 %iv, 2
575 %cond = icmp eq i64 %iv_next, 1000
576 br i1 %cond, label %exit, label %loop
581 ; for (iv = 0, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
582 define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
583 ; VF2-LABEL: define void @ld_div2_step2_start0_ind2
584 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
586 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
588 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
590 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
591 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
592 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
593 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
594 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
595 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
596 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
597 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
598 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
599 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
600 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
601 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
602 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
603 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
604 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
605 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
606 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
607 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
608 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
609 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
610 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
611 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
612 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
613 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
614 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
615 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
616 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
617 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
618 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
620 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
622 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
623 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
624 ; VF2-NEXT: br label [[LOOP:%.*]]
626 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
627 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
628 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
629 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
630 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
631 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
632 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
633 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
634 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
635 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
636 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
637 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
638 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
639 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
643 ; VF4-LABEL: define void @ld_div2_step2_start0_ind2
644 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
646 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
648 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
650 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
651 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
652 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
653 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
654 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
655 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
656 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
657 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
658 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
659 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
660 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
661 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
662 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
663 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
664 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
665 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
666 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
667 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
668 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
669 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
670 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
671 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
672 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
673 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
674 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
675 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
676 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
677 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
678 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
679 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
680 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
681 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
682 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
683 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
684 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
685 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
686 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
687 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
688 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
689 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
690 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
691 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
692 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
693 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
694 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
696 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
698 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
699 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
700 ; VF4-NEXT: br label [[LOOP:%.*]]
702 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
703 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
704 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
705 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
706 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
707 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
708 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
709 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
710 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
711 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
712 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
713 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
714 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
715 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
722 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
723 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
724 %div1 = udiv i64 %iv, 2
725 %div2 = udiv i64 %iv2, 2
726 %add = add i64 %div1, %div2
727 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
728 %ld = load i64, ptr %gep_ld, align 8
729 %calc = add nsw i64 %ld, 42
730 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
731 store i64 %calc, ptr %gep_st, align 8
732 %iv2_next = add nsw i64 %iv2, 1
733 %iv_next = add nsw i64 %iv, 2
734 %cond = icmp eq i64 %iv_next, 1000
735 br i1 %cond, label %exit, label %loop
740 ; for (iv = 0, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
741 define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
742 ; VF2-LABEL: define void @ld_div3_step2_start0_ind2
743 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
745 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
747 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
749 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
750 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
751 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
752 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
753 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
754 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
755 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
756 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
757 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
758 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
759 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
760 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
761 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
762 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
763 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
764 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
765 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
766 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
767 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
768 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
769 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
770 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
771 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
772 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
773 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
774 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
775 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
776 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
777 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
779 ; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
781 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
782 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
783 ; VF2-NEXT: br label [[LOOP:%.*]]
785 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
786 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
787 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
788 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
789 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
790 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
791 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
792 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
793 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
794 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
795 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
796 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
797 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
798 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
802 ; VF4-LABEL: define void @ld_div3_step2_start0_ind2
803 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
805 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
807 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
809 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
810 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
811 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
812 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
813 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
814 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
815 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
816 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
817 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
818 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
819 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
820 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
821 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
822 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
823 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
824 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
825 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
826 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
827 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
828 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
829 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
830 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
831 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
832 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
833 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
834 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
835 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
836 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
837 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
838 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
839 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
840 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
841 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
842 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
843 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
844 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
845 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
846 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
847 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
848 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
849 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
850 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
851 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
852 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
853 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
855 ; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
857 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
858 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
859 ; VF4-NEXT: br label [[LOOP:%.*]]
861 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
862 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
863 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
864 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
865 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
866 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
867 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
868 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
869 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
870 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
871 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
872 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
873 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
874 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
881 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
882 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
883 %div1 = udiv i64 %iv, 3
884 %div2 = udiv i64 %iv2, 3
885 %add = add i64 %div1, %div2
886 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
887 %ld = load i64, ptr %gep_ld, align 8
888 %calc = add nsw i64 %ld, 42
889 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
890 store i64 %calc, ptr %gep_st, align 8
891 %iv2_next = add nsw i64 %iv2, 1
892 %iv_next = add nsw i64 %iv, 2
893 %cond = icmp eq i64 %iv_next, 1000
894 br i1 %cond, label %exit, label %loop
899 ; for (iv = 0, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
900 define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
901 ; VF2-LABEL: define void @ld_div1_step3_start0_ind2
902 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
904 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
906 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
908 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
909 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
910 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
911 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
912 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
913 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
914 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
915 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
916 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
917 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
918 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
919 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
920 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
921 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
922 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
923 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
924 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
925 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
926 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
927 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
928 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
929 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
930 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
931 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
932 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
933 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
934 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
935 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
936 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
938 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
940 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
941 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
942 ; VF2-NEXT: br label [[LOOP:%.*]]
944 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
945 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
946 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
947 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
948 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
949 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
950 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
951 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
952 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
953 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
954 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
955 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
956 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
957 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
961 ; VF4-LABEL: define void @ld_div1_step3_start0_ind2
962 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
964 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
966 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
968 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
969 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
970 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
971 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
972 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
973 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
974 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
975 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
976 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
977 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
978 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
979 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
980 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
981 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
982 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
983 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
984 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
985 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
986 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
987 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
988 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
989 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
990 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
991 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
992 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
993 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
994 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
995 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
996 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
997 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
998 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
999 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
1000 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
1001 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
1002 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
1003 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
1004 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
1005 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
1006 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
1007 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
1008 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1009 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
1010 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1011 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
1012 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1013 ; VF4: middle.block:
1014 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1016 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1017 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1018 ; VF4-NEXT: br label [[LOOP:%.*]]
1020 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1021 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1022 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
1023 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
1024 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1025 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1026 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1027 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1028 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1029 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1030 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1031 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
1032 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1033 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1035 ; VF4-NEXT: ret void
1040 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
1041 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1042 %div1 = udiv i64 %iv, 1
1043 %div2 = udiv i64 %iv2, 1
1044 %add = add i64 %div1, %div2
1045 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1046 %ld = load i64, ptr %gep_ld, align 8
1047 %calc = add nsw i64 %ld, 42
1048 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1049 store i64 %calc, ptr %gep_st, align 8
1050 %iv2_next = add nsw i64 %iv2, 1
1051 %iv_next = add nsw i64 %iv, 3
1052 %cond = icmp eq i64 %iv_next, 1000
1053 br i1 %cond, label %exit, label %loop
1058 ; for (iv = 0, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
1059 define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
1060 ; VF2-LABEL: define void @ld_div2_step3_start0_ind2
1061 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1063 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1065 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1067 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1068 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1069 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1070 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
1071 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1072 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
1073 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1074 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1075 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
1076 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
1077 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
1078 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
1079 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
1080 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
1081 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
1082 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
1083 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
1084 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
1085 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1086 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1087 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
1088 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
1089 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
1090 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
1091 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1092 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
1093 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1094 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
1095 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1096 ; VF2: middle.block:
1097 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1099 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1100 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1101 ; VF2-NEXT: br label [[LOOP:%.*]]
1103 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1104 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1105 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
1106 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
1107 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1108 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1109 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1110 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1111 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1112 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1113 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1114 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
1115 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1116 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1118 ; VF2-NEXT: ret void
1120 ; VF4-LABEL: define void @ld_div2_step3_start0_ind2
1121 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1123 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1125 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1127 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1128 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1129 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1130 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
1131 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1132 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
1133 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
1134 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
1135 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
1136 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
1137 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
1138 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
1139 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
1140 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
1141 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
1142 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
1143 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
1144 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
1145 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
1146 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
1147 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
1148 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
1149 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
1150 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
1151 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
1152 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
1153 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
1154 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
1155 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1156 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1157 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
1158 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
1159 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
1160 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
1161 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
1162 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
1163 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
1164 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
1165 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
1166 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
1167 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1168 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
1169 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1170 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
1171 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1172 ; VF4: middle.block:
1173 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1175 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1176 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1177 ; VF4-NEXT: br label [[LOOP:%.*]]
1179 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1180 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1181 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
1182 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
1183 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1184 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1185 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1186 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1187 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1188 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1189 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1190 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
1191 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1192 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1194 ; VF4-NEXT: ret void
1199 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
1200 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1201 %div1 = udiv i64 %iv, 2
1202 %div2 = udiv i64 %iv2, 2
1203 %add = add i64 %div1, %div2
1204 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1205 %ld = load i64, ptr %gep_ld, align 8
1206 %calc = add nsw i64 %ld, 42
1207 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1208 store i64 %calc, ptr %gep_st, align 8
1209 %iv2_next = add nsw i64 %iv2, 1
1210 %iv_next = add nsw i64 %iv, 3
1211 %cond = icmp eq i64 %iv_next, 1000
1212 br i1 %cond, label %exit, label %loop
1217 ; for (iv = 0, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
1218 define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
1219 ; VF2-LABEL: define void @ld_div3_step3_start0_ind2
1220 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1222 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1224 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1226 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1227 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1228 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1229 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
1230 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1231 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
1232 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
1233 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
1234 ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
1235 ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
1236 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
1237 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
1238 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
1239 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP6]], align 8
1240 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
1241 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
1242 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
1243 ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
1244 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1245 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1246 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
1247 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8
1248 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
1249 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
1250 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1251 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
1252 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1253 ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
1254 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1255 ; VF2: middle.block:
1256 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1258 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1259 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1260 ; VF2-NEXT: br label [[LOOP:%.*]]
1262 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1263 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1264 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
1265 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
1266 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1267 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1268 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1269 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1270 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1271 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1272 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1273 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
1274 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1275 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
1277 ; VF2-NEXT: ret void
1279 ; VF4-LABEL: define void @ld_div3_step3_start0_ind2
1280 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1282 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1284 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1286 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1287 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1288 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1289 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
1290 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1291 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
1292 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
1293 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
1294 ; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
1295 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
1296 ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
1297 ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
1298 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
1299 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
1300 ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
1301 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
1302 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
1303 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
1304 ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
1305 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP8]], align 8
1306 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP10]], align 8
1307 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP12]], align 8
1308 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
1309 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
1310 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
1311 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
1312 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
1313 ; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
1314 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1315 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1316 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
1317 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
1318 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
1319 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP24]], align 8
1320 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
1321 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
1322 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
1323 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
1324 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
1325 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
1326 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1327 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
1328 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1329 ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
1330 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1331 ; VF4: middle.block:
1332 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1334 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1335 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1336 ; VF4-NEXT: br label [[LOOP:%.*]]
1338 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1339 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1340 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
1341 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
1342 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1343 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1344 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1345 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1346 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1347 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1348 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1349 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
1350 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1351 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
1353 ; VF4-NEXT: ret void
1358 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop ]
1359 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1360 %div1 = udiv i64 %iv, 3
1361 %div2 = udiv i64 %iv2, 3
1362 %add = add i64 %div1, %div2
1363 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1364 %ld = load i64, ptr %gep_ld, align 8
1365 %calc = add nsw i64 %ld, 42
1366 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1367 store i64 %calc, ptr %gep_st, align 8
1368 %iv2_next = add nsw i64 %iv2, 1
1369 %iv_next = add nsw i64 %iv, 3
1370 %cond = icmp eq i64 %iv_next, 1000
1371 br i1 %cond, label %exit, label %loop
1376 ; for (iv = 1, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
1377 define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
1378 ; VF2-LABEL: define void @ld_div1_step1_start1_ind2
1379 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1381 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1383 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1385 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1386 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1387 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1388 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1389 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1390 ; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
1391 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
1392 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
1393 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
1394 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1395 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
1396 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1397 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
1398 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
1399 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
1400 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
1401 ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
1402 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1403 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
1404 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
1405 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1406 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1407 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1408 ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
1409 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1410 ; VF2: middle.block:
1411 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1413 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 999, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1414 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1415 ; VF2-NEXT: br label [[LOOP:%.*]]
1417 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1418 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1419 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
1420 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
1421 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1422 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1423 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1424 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1425 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1426 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1427 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1428 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1429 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1430 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
1432 ; VF2-NEXT: ret void
1434 ; VF4-LABEL: define void @ld_div1_step1_start1_ind2
1435 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1437 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1439 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1441 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1442 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1443 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1444 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1445 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1446 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
1447 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
1448 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
1449 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
1450 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1451 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
1452 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1453 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
1454 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1455 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
1456 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
1457 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
1458 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
1459 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
1460 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
1461 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
1462 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
1463 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
1464 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
1465 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
1466 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1467 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
1468 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
1469 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1470 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1471 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1472 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1473 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1474 ; VF4: middle.block:
1475 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1477 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1478 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1479 ; VF4-NEXT: br label [[LOOP:%.*]]
1481 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1482 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1483 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
1484 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
1485 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1486 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1487 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1488 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1489 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1490 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1491 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1492 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1493 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1494 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
1496 ; VF4-NEXT: ret void
1501 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
1502 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1503 %div1 = udiv i64 %iv, 1
1504 %div2 = udiv i64 %iv2, 1
1505 %add = add i64 %div1, %div2
1506 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1507 %ld = load i64, ptr %gep_ld, align 8
1508 %calc = add nsw i64 %ld, 42
1509 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1510 store i64 %calc, ptr %gep_st, align 8
1511 %iv2_next = add nsw i64 %iv2, 1
1512 %iv_next = add nsw i64 %iv, 1
1513 %cond = icmp eq i64 %iv_next, 1000
1514 br i1 %cond, label %exit, label %loop
1519 ; for (iv = 1, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
1520 define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
1521 ; VF2-LABEL: define void @ld_div2_step1_start1_ind2
1522 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1524 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1526 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1528 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1529 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1530 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1531 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1532 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1533 ; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1534 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1535 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
1536 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
1537 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1538 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
1539 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1540 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
1541 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
1542 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
1543 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
1544 ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
1545 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1546 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
1547 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
1548 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1549 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1550 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1551 ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
1552 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1553 ; VF2: middle.block:
1554 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1556 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 999, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1557 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1558 ; VF2-NEXT: br label [[LOOP:%.*]]
1560 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1561 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1562 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
1563 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
1564 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1565 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1566 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1567 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1568 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1569 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1570 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1571 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1572 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1573 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
1575 ; VF2-NEXT: ret void
1577 ; VF4-LABEL: define void @ld_div2_step1_start1_ind2
1578 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1580 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1582 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1584 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1585 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1586 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1587 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1588 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1589 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
1590 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
1591 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
1592 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
1593 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1594 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
1595 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1596 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
1597 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1598 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
1599 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
1600 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
1601 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
1602 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
1603 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
1604 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
1605 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
1606 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
1607 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
1608 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
1609 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1610 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
1611 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
1612 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1613 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1614 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1615 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1616 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1617 ; VF4: middle.block:
1618 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1620 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1621 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1622 ; VF4-NEXT: br label [[LOOP:%.*]]
1624 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1625 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1626 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
1627 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
1628 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1629 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1630 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1631 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1632 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1633 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1634 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1635 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1636 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1637 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
1639 ; VF4-NEXT: ret void
1644 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
1645 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1646 %div1 = udiv i64 %iv, 2
1647 %div2 = udiv i64 %iv2, 2
1648 %add = add i64 %div1, %div2
1649 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1650 %ld = load i64, ptr %gep_ld, align 8
1651 %calc = add nsw i64 %ld, 42
1652 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1653 store i64 %calc, ptr %gep_st, align 8
1654 %iv2_next = add nsw i64 %iv2, 1
1655 %iv_next = add nsw i64 %iv, 1
1656 %cond = icmp eq i64 %iv_next, 1000
1657 br i1 %cond, label %exit, label %loop
1662 ; for (iv = 1, iv2 = 0 ; ; iv += 1, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
1663 define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
1664 ; VF2-LABEL: define void @ld_div3_step1_start1_ind2
1665 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1667 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1669 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1671 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1672 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1673 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1674 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1675 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1676 ; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
1677 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
1678 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
1679 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
1680 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1681 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
1682 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1683 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
1684 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
1685 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
1686 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
1687 ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
1688 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1689 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
1690 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
1691 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1692 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1693 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1694 ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
1695 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1696 ; VF2: middle.block:
1697 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1699 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 999, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1700 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1701 ; VF2-NEXT: br label [[LOOP:%.*]]
1703 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1704 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1705 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
1706 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
1707 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1708 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1709 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1710 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1711 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1712 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1713 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1714 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1715 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1716 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
1718 ; VF2-NEXT: ret void
1720 ; VF4-LABEL: define void @ld_div3_step1_start1_ind2
1721 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1723 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1725 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1727 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1728 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1729 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1730 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1731 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1732 ; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
1733 ; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
1734 ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
1735 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
1736 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
1737 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
1738 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1739 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
1740 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1741 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
1742 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
1743 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
1744 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
1745 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
1746 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
1747 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
1748 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
1749 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
1750 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
1751 ; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
1752 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1753 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
1754 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
1755 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1756 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1757 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1758 ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1759 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1760 ; VF4: middle.block:
1761 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1763 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1764 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1765 ; VF4-NEXT: br label [[LOOP:%.*]]
1767 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1768 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1769 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
1770 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
1771 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1772 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1773 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1774 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1775 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1776 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1777 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1778 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1779 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1780 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
1782 ; VF4-NEXT: ret void
1787 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
1788 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1789 %div1 = udiv i64 %iv, 3
1790 %div2 = udiv i64 %iv2, 3
1791 %add = add i64 %div1, %div2
1792 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1793 %ld = load i64, ptr %gep_ld, align 8
1794 %calc = add nsw i64 %ld, 42
1795 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1796 store i64 %calc, ptr %gep_st, align 8
1797 %iv2_next = add nsw i64 %iv2, 1
1798 %iv_next = add nsw i64 %iv, 1
1799 %cond = icmp eq i64 %iv_next, 1000
1800 br i1 %cond, label %exit, label %loop
1805 ; for (iv = 1, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
1806 define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
1807 ; VF2-LABEL: define void @ld_div1_step2_start1_ind2
1808 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1810 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1812 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1814 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1815 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1816 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1817 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
1818 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
1819 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
1820 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1821 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
1822 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
1823 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
1824 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
1825 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1826 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
1827 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1828 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
1829 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
1830 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
1831 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
1832 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
1833 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1834 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
1835 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
1836 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
1837 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
1838 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
1839 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1840 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
1841 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1842 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
1843 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1844 ; VF2: middle.block:
1845 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1847 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1848 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 498, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1849 ; VF2-NEXT: br label [[LOOP:%.*]]
1851 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1852 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1853 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
1854 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
1855 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1856 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1857 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1858 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1859 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1860 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1861 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1862 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
1863 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1864 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
1866 ; VF2-NEXT: ret void
1868 ; VF4-LABEL: define void @ld_div1_step2_start1_ind2
1869 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1871 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1873 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
1875 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1876 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 3, i64 5, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1877 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1878 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
1879 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
1880 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
1881 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1882 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
1883 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
1884 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
1885 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
1886 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
1887 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
1888 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1889 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
1890 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
1891 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
1892 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
1893 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
1894 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
1895 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
1896 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
1897 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
1898 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
1899 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
1900 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
1901 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
1902 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
1903 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
1904 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1905 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
1906 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
1907 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
1908 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
1909 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
1910 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
1911 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
1912 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
1913 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
1914 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
1915 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
1916 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1917 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
1918 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
1919 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
1920 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1921 ; VF4: middle.block:
1922 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1924 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 993, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1925 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 496, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1926 ; VF4-NEXT: br label [[LOOP:%.*]]
1928 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1929 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
1930 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
1931 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
1932 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
1933 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
1934 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
1935 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
1936 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
1937 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
1938 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
1939 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
1940 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1941 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
1943 ; VF4-NEXT: ret void
1948 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
1949 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
1950 %div1 = udiv i64 %iv, 1
1951 %div2 = udiv i64 %iv2, 1
1952 %add = add i64 %div1, %div2
1953 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
1954 %ld = load i64, ptr %gep_ld, align 8
1955 %calc = add nsw i64 %ld, 42
1956 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
1957 store i64 %calc, ptr %gep_st, align 8
1958 %iv2_next = add nsw i64 %iv2, 1
1959 %iv_next = add nsw i64 %iv, 2
1960 %cond = icmp eq i64 %iv_next, 1000
1961 br i1 %cond, label %exit, label %loop
1966 ; for (iv = 1, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
1967 define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
1968 ; VF2-LABEL: define void @ld_div2_step2_start1_ind2
1969 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
1971 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1973 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
1975 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1976 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1977 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
1978 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
1979 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
1980 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
1981 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1982 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1983 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
1984 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
1985 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
1986 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
1987 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
1988 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
1989 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
1990 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
1991 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
1992 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
1993 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
1994 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
1995 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
1996 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
1997 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
1998 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
1999 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
2000 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2001 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
2002 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2003 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
2004 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2005 ; VF2: middle.block:
2006 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2008 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2009 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 498, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2010 ; VF2-NEXT: br label [[LOOP:%.*]]
2012 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2013 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2014 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
2015 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
2016 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2017 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2018 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2019 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2020 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2021 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2022 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2023 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
2024 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2025 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2027 ; VF2-NEXT: ret void
2029 ; VF4-LABEL: define void @ld_div2_step2_start1_ind2
2030 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2032 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2034 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
2036 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2037 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 3, i64 5, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2038 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2039 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
2040 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2041 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2042 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
2043 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
2044 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
2045 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
2046 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
2047 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
2048 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
2049 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2050 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
2051 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
2052 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
2053 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
2054 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
2055 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
2056 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
2057 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
2058 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
2059 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
2060 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
2061 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
2062 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
2063 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
2064 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
2065 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2066 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2067 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
2068 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
2069 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
2070 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
2071 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
2072 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
2073 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
2074 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
2075 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
2076 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
2077 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2078 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
2079 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
2080 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
2081 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2082 ; VF4: middle.block:
2083 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2085 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 993, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2086 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 496, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2087 ; VF4-NEXT: br label [[LOOP:%.*]]
2089 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2090 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2091 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
2092 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
2093 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2094 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2095 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2096 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2097 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2098 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2099 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2100 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
2101 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2102 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2104 ; VF4-NEXT: ret void
2109 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
2110 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
2111 %div1 = udiv i64 %iv, 2
2112 %div2 = udiv i64 %iv2, 2
2113 %add = add i64 %div1, %div2
2114 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
2115 %ld = load i64, ptr %gep_ld, align 8
2116 %calc = add nsw i64 %ld, 42
2117 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
2118 store i64 %calc, ptr %gep_st, align 8
2119 %iv2_next = add nsw i64 %iv2, 1
2120 %iv_next = add nsw i64 %iv, 2
2121 %cond = icmp eq i64 %iv_next, 1000
2122 br i1 %cond, label %exit, label %loop
2127 ; for (iv = 1, iv2 = 0 ; ; iv += 2, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
2128 define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
2129 ; VF2-LABEL: define void @ld_div3_step2_start1_ind2
2130 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2132 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2134 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
2136 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2137 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2138 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2139 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
2140 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2141 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2142 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
2143 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
2144 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
2145 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
2146 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
2147 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
2148 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
2149 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2150 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
2151 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
2152 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
2153 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
2154 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
2155 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2156 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2157 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
2158 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
2159 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
2160 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
2161 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2162 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
2163 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2164 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
2165 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
2166 ; VF2: middle.block:
2167 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2169 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2170 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 498, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2171 ; VF2-NEXT: br label [[LOOP:%.*]]
2173 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2174 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2175 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
2176 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
2177 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2178 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2179 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2180 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2181 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2182 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2183 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2184 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
2185 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2186 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
2188 ; VF2-NEXT: ret void
2190 ; VF4-LABEL: define void @ld_div3_step2_start1_ind2
2191 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2193 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2195 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
2197 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2198 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 3, i64 5, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2199 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2200 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
2201 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2202 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2203 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
2204 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
2205 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
2206 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
2207 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
2208 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
2209 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
2210 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2211 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
2212 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
2213 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
2214 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
2215 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
2216 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
2217 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
2218 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
2219 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
2220 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
2221 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
2222 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
2223 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
2224 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
2225 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
2226 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2227 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2228 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
2229 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
2230 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
2231 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
2232 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
2233 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
2234 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
2235 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
2236 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
2237 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
2238 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2239 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
2240 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
2241 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
2242 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
2243 ; VF4: middle.block:
2244 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2246 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 993, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2247 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 496, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2248 ; VF4-NEXT: br label [[LOOP:%.*]]
2250 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2251 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2252 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
2253 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
2254 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2255 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2256 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2257 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2258 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2259 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2260 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2261 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2
2262 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2263 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
2265 ; VF4-NEXT: ret void
2270 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
2271 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
2272 %div1 = udiv i64 %iv, 3
2273 %div2 = udiv i64 %iv2, 3
2274 %add = add i64 %div1, %div2
2275 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
2276 %ld = load i64, ptr %gep_ld, align 8
2277 %calc = add nsw i64 %ld, 42
2278 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
2279 store i64 %calc, ptr %gep_st, align 8
2280 %iv2_next = add nsw i64 %iv2, 1
2281 %iv_next = add nsw i64 %iv, 2
2282 %cond = icmp eq i64 %iv_next, 1000
2283 br i1 %cond, label %exit, label %loop
2288 ; for (iv = 1, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/1 + iv2/1] + 42;
2289 define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
2290 ; VF2-LABEL: define void @ld_div1_step3_start1_ind2
2291 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2293 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2295 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
2297 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2298 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2299 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2300 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2301 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2302 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2303 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2304 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
2305 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
2306 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
2307 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
2308 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
2309 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
2310 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2311 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
2312 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
2313 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
2314 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
2315 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
2316 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2317 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2318 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
2319 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
2320 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
2321 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
2322 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2323 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
2324 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2325 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2326 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2327 ; VF2: middle.block:
2328 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2330 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2331 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2332 ; VF2-NEXT: br label [[LOOP:%.*]]
2334 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2335 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2336 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
2337 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
2338 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2339 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2340 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2341 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2342 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2343 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2344 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2345 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2346 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2347 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
2349 ; VF2-NEXT: ret void
2351 ; VF4-LABEL: define void @ld_div1_step3_start1_ind2
2352 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2354 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2356 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
2358 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2359 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2360 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2361 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2362 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2363 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2364 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2365 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
2366 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
2367 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
2368 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
2369 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
2370 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
2371 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2372 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
2373 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
2374 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
2375 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
2376 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
2377 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
2378 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
2379 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
2380 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
2381 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
2382 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
2383 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
2384 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
2385 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
2386 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
2387 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2388 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2389 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
2390 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
2391 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
2392 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
2393 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
2394 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
2395 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
2396 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
2397 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
2398 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
2399 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2400 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
2401 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
2402 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2403 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2404 ; VF4: middle.block:
2405 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2407 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2408 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2409 ; VF4-NEXT: br label [[LOOP:%.*]]
2411 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2412 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2413 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1
2414 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1
2415 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2416 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2417 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2418 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2419 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2420 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2421 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2422 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2423 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2424 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
2426 ; VF4-NEXT: ret void
2431 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
2432 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
2433 %div1 = udiv i64 %iv, 1
2434 %div2 = udiv i64 %iv2, 1
2435 %add = add i64 %div1, %div2
2436 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
2437 %ld = load i64, ptr %gep_ld, align 8
2438 %calc = add nsw i64 %ld, 42
2439 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
2440 store i64 %calc, ptr %gep_st, align 8
2441 %iv2_next = add nsw i64 %iv2, 1
2442 %iv_next = add nsw i64 %iv, 3
2443 %cond = icmp eq i64 %iv_next, 1000
2444 br i1 %cond, label %exit, label %loop
2449 ; for (iv = 1, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/2 + iv2/2] + 42;
2450 define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
2451 ; VF2-LABEL: define void @ld_div2_step3_start1_ind2
2452 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2454 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2456 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
2458 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2459 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2460 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2461 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2462 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2463 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2464 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2465 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
2466 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2467 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
2468 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
2469 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
2470 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
2471 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2472 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
2473 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
2474 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
2475 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
2476 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
2477 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2478 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2479 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
2480 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
2481 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
2482 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
2483 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2484 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
2485 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2486 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2487 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
2488 ; VF2: middle.block:
2489 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2491 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2492 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2493 ; VF2-NEXT: br label [[LOOP:%.*]]
2495 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2496 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2497 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
2498 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
2499 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2500 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2501 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2502 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2503 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2504 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2505 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2506 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2507 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2508 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]]
2510 ; VF2-NEXT: ret void
2512 ; VF4-LABEL: define void @ld_div2_step3_start1_ind2
2513 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2515 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2517 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
2519 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2520 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2521 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2522 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2523 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2524 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2525 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2526 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
2527 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
2528 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
2529 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
2530 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
2531 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
2532 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2533 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
2534 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
2535 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
2536 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
2537 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
2538 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
2539 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
2540 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
2541 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
2542 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
2543 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
2544 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
2545 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
2546 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
2547 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
2548 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2549 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2550 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
2551 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
2552 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
2553 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
2554 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
2555 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
2556 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
2557 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
2558 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
2559 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
2560 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2561 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
2562 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
2563 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2564 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
2565 ; VF4: middle.block:
2566 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2568 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2569 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2570 ; VF4-NEXT: br label [[LOOP:%.*]]
2572 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2573 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2574 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2
2575 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2
2576 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2577 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2578 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2579 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2580 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2581 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2582 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2583 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2584 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2585 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]]
2587 ; VF4-NEXT: ret void
2592 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
2593 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
2594 %div1 = udiv i64 %iv, 2
2595 %div2 = udiv i64 %iv2, 2
2596 %add = add i64 %div1, %div2
2597 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
2598 %ld = load i64, ptr %gep_ld, align 8
2599 %calc = add nsw i64 %ld, 42
2600 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
2601 store i64 %calc, ptr %gep_st, align 8
2602 %iv2_next = add nsw i64 %iv2, 1
2603 %iv_next = add nsw i64 %iv, 3
2604 %cond = icmp eq i64 %iv_next, 1000
2605 br i1 %cond, label %exit, label %loop
2610 ; for (iv = 1, iv2 = 0 ; ; iv += 3, iv2 += 1) B[iv] = A[iv/3 + iv2/3] + 42;
2611 define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
2612 ; VF2-LABEL: define void @ld_div3_step3_start1_ind2
2613 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2615 ; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2617 ; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
2619 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2620 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2621 ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2622 ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2623 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2624 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2625 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2626 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
2627 ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
2628 ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
2629 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
2630 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
2631 ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
2632 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2633 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP7]], align 8
2634 ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
2635 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
2636 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
2637 ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
2638 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2639 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2640 ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
2641 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8
2642 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
2643 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8
2644 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2645 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
2646 ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
2647 ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2648 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
2649 ; VF2: middle.block:
2650 ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2652 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2653 ; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2654 ; VF2-NEXT: br label [[LOOP:%.*]]
2656 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2657 ; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2658 ; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
2659 ; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
2660 ; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2661 ; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2662 ; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2663 ; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2664 ; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2665 ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2666 ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2667 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2668 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2669 ; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]]
2671 ; VF2-NEXT: ret void
2673 ; VF4-LABEL: define void @ld_div3_step3_start1_ind2
2674 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
2676 ; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2678 ; VF4-NEXT: br label [[VECTOR_BODY:%.*]]
2680 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2681 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2682 ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
2683 ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
2684 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2685 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2686 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
2687 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
2688 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
2689 ; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
2690 ; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
2691 ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
2692 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
2693 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
2694 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
2695 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
2696 ; VF4-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
2697 ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
2698 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
2699 ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP14]]
2700 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP9]], align 8
2701 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
2702 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP13]], align 8
2703 ; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
2704 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
2705 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
2706 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
2707 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
2708 ; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
2709 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
2710 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
2711 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
2712 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
2713 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
2714 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8
2715 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
2716 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8
2717 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
2718 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8
2719 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
2720 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8
2721 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2722 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
2723 ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
2724 ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
2725 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
2726 ; VF4: middle.block:
2727 ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
2729 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
2730 ; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 332, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2731 ; VF4-NEXT: br label [[LOOP:%.*]]
2733 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2734 ; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ]
2735 ; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3
2736 ; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3
2737 ; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]]
2738 ; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]]
2739 ; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8
2740 ; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42
2741 ; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
2742 ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8
2743 ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1
2744 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
2745 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
2746 ; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]]
2748 ; VF4-NEXT: ret void
2753 %iv = phi i64 [ 1, %entry ], [ %iv_next, %loop ]
2754 %iv2 = phi i64 [ 0, %entry ], [ %iv2_next, %loop ]
2755 %div1 = udiv i64 %iv, 3
2756 %div2 = udiv i64 %iv2, 3
2757 %add = add i64 %div1, %div2
2758 %gep_ld = getelementptr inbounds i64, ptr %A, i64 %add
2759 %ld = load i64, ptr %gep_ld, align 8
2760 %calc = add nsw i64 %ld, 42
2761 %gep_st = getelementptr inbounds i64, ptr %B, i64 %iv
2762 store i64 %calc, ptr %gep_st, align 8
2763 %iv2_next = add nsw i64 %iv2, 1
2764 %iv_next = add nsw i64 %iv, 3
2765 %cond = icmp eq i64 %iv_next, 1000
2766 br i1 %cond, label %exit, label %loop