1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -passes=loop-vectorize,simplifycfg -verify-loop-info -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=UNROLL
3 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -passes=loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
4 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -passes=loop-vectorize,simplifycfg -verify-loop-info -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=VEC
6 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
8 ; Test predication of stores.
9 define i32 @test(ptr nocapture %f) #0 {
10 ; UNROLL-LABEL: @test(
12 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
13 ; UNROLL: vector.body:
14 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
15 ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
16 ; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
17 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]]
18 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP1]]
19 ; UNROLL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
20 ; UNROLL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
21 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP4]], 100
22 ; UNROLL-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], 100
23 ; UNROLL-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
24 ; UNROLL: pred.store.if:
25 ; UNROLL-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP4]], 20
26 ; UNROLL-NEXT: store i32 [[TMP8]], ptr [[TMP2]], align 4
27 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE]]
28 ; UNROLL: pred.store.continue:
29 ; UNROLL-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
30 ; UNROLL: pred.store.if1:
31 ; UNROLL-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP5]], 20
32 ; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP3]], align 4
33 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE2]]
34 ; UNROLL: pred.store.continue2:
35 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
36 ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
37 ; UNROLL-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39 ; UNROLL-NEXT: ret i32 0
41 ; UNROLL-NOSIMPLIFY-LABEL: @test(
42 ; UNROLL-NOSIMPLIFY-NEXT: entry:
43 ; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
44 ; UNROLL-NOSIMPLIFY: vector.ph:
45 ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
46 ; UNROLL-NOSIMPLIFY: vector.body:
47 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
48 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
49 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
50 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]]
51 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP1]]
52 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
53 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
54 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP4]], 100
55 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], 100
56 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
57 ; UNROLL-NOSIMPLIFY: pred.store.if:
58 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP4]], 20
59 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP8]], ptr [[TMP2]], align 4
60 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
61 ; UNROLL-NOSIMPLIFY: pred.store.continue:
62 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
63 ; UNROLL-NOSIMPLIFY: pred.store.if1:
64 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP5]], 20
65 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP3]], align 4
66 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE2]]
67 ; UNROLL-NOSIMPLIFY: pred.store.continue2:
68 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
69 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
70 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
71 ; UNROLL-NOSIMPLIFY: middle.block:
72 ; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
73 ; UNROLL-NOSIMPLIFY: scalar.ph:
74 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
75 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]]
76 ; UNROLL-NOSIMPLIFY: for.body:
77 ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
78 ; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]]
79 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
80 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 100
81 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
82 ; UNROLL-NOSIMPLIFY: if.then:
83 ; UNROLL-NOSIMPLIFY-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 20
84 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
85 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]]
86 ; UNROLL-NOSIMPLIFY: for.inc:
87 ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
88 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
89 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
90 ; UNROLL-NOSIMPLIFY: for.end:
91 ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0
95 ; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
97 ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
98 ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
99 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]]
100 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
101 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
102 ; VEC-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
103 ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
104 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
105 ; VEC: pred.store.if:
106 ; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP0]]
107 ; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
108 ; VEC-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP6]], 20
109 ; VEC-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4
110 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
111 ; VEC: pred.store.continue:
112 ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
113 ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
114 ; VEC: pred.store.if1:
115 ; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
116 ; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]]
117 ; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
118 ; VEC-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 20
119 ; VEC-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
120 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]]
121 ; VEC: pred.store.continue2:
122 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
123 ; VEC-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
124 ; VEC-NEXT: br i1 [[TMP13]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
126 ; VEC-NEXT: ret i32 0
134 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
135 %arrayidx = getelementptr inbounds i32, ptr %f, i64 %indvars.iv
136 %0 = load i32, ptr %arrayidx, align 4
137 %cmp1 = icmp sgt i32 %0, 100
138 br i1 %cmp1, label %if.then, label %for.inc
141 %add = add nsw i32 %0, 20
142 store i32 %add, ptr %arrayidx, align 4
146 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
147 %exitcond = icmp eq i64 %indvars.iv.next, 128
148 br i1 %exitcond, label %for.end, label %for.body
154 ; Track basic blocks when unrolling conditional blocks. This code used to assert
155 ; because we did not update the phi nodes with the proper predecessor in the
156 ; vectorized loop body.
159 define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
160 ; UNROLL-LABEL: @bug18724(
161 ; UNROLL-NEXT: entry:
162 ; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
163 ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]])
164 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[V_1:%.*]] to i32
165 ; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
166 ; UNROLL-NEXT: [[TMP2:%.*]] = sub i32 [[SMAX]], [[TMP1]]
167 ; UNROLL-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
168 ; UNROLL-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
169 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 2
170 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
172 ; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2
173 ; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
174 ; UNROLL-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
175 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
176 ; UNROLL: vector.body:
177 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
178 ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ]
179 ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ]
180 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]]
181 ; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
182 ; UNROLL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1
183 ; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP5]]
184 ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]]
185 ; UNROLL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
186 ; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
187 ; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
188 ; UNROLL: pred.store.if:
189 ; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
190 ; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4
191 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]]
192 ; UNROLL: pred.store.continue3:
193 ; UNROLL-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI]], 1
194 ; UNROLL-NEXT: [[TMP12:%.*]] = add i32 [[VEC_PHI1]], 1
195 ; UNROLL-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true
196 ; UNROLL-NEXT: [[TMP14:%.*]] = xor i1 [[COND_2]], true
197 ; UNROLL-NEXT: [[PREDPHI]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
198 ; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]]
199 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
200 ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
201 ; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
202 ; UNROLL: middle.block:
203 ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]]
204 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
205 ; UNROLL-NEXT: [[TMP16:%.*]] = xor i1 [[CMP_N]], true
206 ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP16]])
207 ; UNROLL-NEXT: br label [[SCALAR_PH]]
209 ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[V_1]], [[ENTRY:%.*]] ]
210 ; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[V_2]], [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
211 ; UNROLL-NEXT: br label [[FOR_BODY14:%.*]]
212 ; UNROLL: for.body14:
213 ; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
214 ; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
215 ; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
216 ; UNROLL-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
217 ; UNROLL-NEXT: br i1 [[COND_2]], label [[IF_THEN18:%.*]], label [[FOR_INC23]]
219 ; UNROLL-NEXT: store i32 [[TMP]], ptr [[ARRAYIDX16]], align 4
220 ; UNROLL-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
221 ; UNROLL-NEXT: br label [[FOR_INC23]]
223 ; UNROLL-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
224 ; UNROLL-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
225 ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
226 ; UNROLL-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
227 ; UNROLL-NEXT: call void @llvm.assume(i1 [[CMP13]])
228 ; UNROLL-NEXT: br label [[FOR_BODY14]]
230 ; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
231 ; UNROLL-NOSIMPLIFY-NEXT: entry:
232 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY9:%.*]]
233 ; UNROLL-NOSIMPLIFY: for.body9:
234 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND:%.*]], label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]]
235 ; UNROLL-NOSIMPLIFY: for.body14.preheader:
236 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = trunc i64 [[V_1:%.*]] to i32
237 ; UNROLL-NOSIMPLIFY-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 0)
238 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[TMP0]]
239 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
240 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
241 ; UNROLL-NOSIMPLIFY-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
242 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
243 ; UNROLL-NOSIMPLIFY: vector.ph:
244 ; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
245 ; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
246 ; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
247 ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
248 ; UNROLL-NOSIMPLIFY: vector.body:
249 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
250 ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ]
251 ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ]
252 ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]]
253 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
254 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1
255 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP4]]
256 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP5]]
257 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4
258 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
259 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
260 ; UNROLL-NOSIMPLIFY: pred.store.if:
261 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4
262 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
263 ; UNROLL-NOSIMPLIFY: pred.store.continue:
264 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
265 ; UNROLL-NOSIMPLIFY: pred.store.if2:
266 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
267 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
268 ; UNROLL-NOSIMPLIFY: pred.store.continue3:
269 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = add i32 [[VEC_PHI]], 1
270 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI1]], 1
271 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP12:%.*]] = xor i1 [[COND_2]], true
272 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true
273 ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 [[TMP12]], i32 [[VEC_PHI]], i32 [[TMP10]]
274 ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP11]]
275 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
276 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
277 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
278 ; UNROLL-NOSIMPLIFY: middle.block:
279 ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]]
280 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
281 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]]
282 ; UNROLL-NOSIMPLIFY: scalar.ph:
283 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[V_1]], [[FOR_BODY14_PREHEADER]] ]
284 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[V_2]], [[FOR_BODY14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
285 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY14:%.*]]
286 ; UNROLL-NOSIMPLIFY: for.body14:
287 ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
288 ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
289 ; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
290 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
291 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[IF_THEN18:%.*]], label [[FOR_INC23]]
292 ; UNROLL-NOSIMPLIFY: if.then18:
293 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP]], ptr [[ARRAYIDX16]], align 4
294 ; UNROLL-NOSIMPLIFY-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
295 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC23]]
296 ; UNROLL-NOSIMPLIFY: for.inc23:
297 ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
298 ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
299 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
300 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
301 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
302 ; UNROLL-NOSIMPLIFY: for.inc26.loopexit:
303 ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
304 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]]
305 ; UNROLL-NOSIMPLIFY: for.inc26:
306 ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY9]] ], [ [[INEWCHUNKS_2_LCSSA]], [[FOR_INC26_LOOPEXIT]] ]
307 ; UNROLL-NOSIMPLIFY-NEXT: unreachable
309 ; VEC-LABEL: @bug18724(
311 ; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
312 ; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]])
313 ; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[V_1:%.*]] to i32
314 ; VEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
315 ; VEC-NEXT: [[TMP2:%.*]] = sub i32 [[SMAX]], [[TMP1]]
316 ; VEC-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
317 ; VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
318 ; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 2
319 ; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
321 ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2
322 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
323 ; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
324 ; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
325 ; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0
326 ; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
327 ; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
329 ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
330 ; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ]
331 ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]]
332 ; VEC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
333 ; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP6]]
334 ; VEC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
335 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
336 ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
337 ; VEC-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
338 ; VEC: pred.store.if:
339 ; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]]
340 ; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
341 ; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4
342 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
343 ; VEC: pred.store.continue:
344 ; VEC-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
345 ; VEC-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
346 ; VEC: pred.store.if1:
347 ; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1
348 ; VEC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP13]]
349 ; VEC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
350 ; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
351 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]]
352 ; VEC: pred.store.continue2:
353 ; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], <i32 1, i32 1>
354 ; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
355 ; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]]
356 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
357 ; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
358 ; VEC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
360 ; VEC-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]])
361 ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
362 ; VEC-NEXT: [[TMP20:%.*]] = xor i1 [[CMP_N]], true
363 ; VEC-NEXT: call void @llvm.assume(i1 [[TMP20]])
364 ; VEC-NEXT: br label [[SCALAR_PH]]
366 ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[V_1]], [[ENTRY:%.*]] ]
367 ; VEC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[V_2]], [[ENTRY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
368 ; VEC-NEXT: br label [[FOR_BODY14:%.*]]
370 ; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
371 ; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
372 ; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
373 ; VEC-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
374 ; VEC-NEXT: br i1 [[COND_2]], label [[IF_THEN18:%.*]], label [[FOR_INC23]]
376 ; VEC-NEXT: store i32 [[TMP]], ptr [[ARRAYIDX16]], align 4
377 ; VEC-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
378 ; VEC-NEXT: br label [[FOR_INC23]]
380 ; VEC-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
381 ; VEC-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
382 ; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
383 ; VEC-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
384 ; VEC-NEXT: call void @llvm.assume(i1 [[CMP13]])
385 ; VEC-NEXT: br label [[FOR_BODY14]]
391 br i1 %cond, label %for.inc26, label %for.body14
394 %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ %v.1, %for.body9 ]
395 %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ %v.2, %for.body9 ]
396 %arrayidx16 = getelementptr inbounds [768 x i32], ptr %ptr, i64 0, i64 %indvars.iv3
397 %tmp = load i32, ptr %arrayidx16, align 4
398 br i1 %cond.2, label %if.then18, label %for.inc23
401 store i32 %tmp, ptr %arrayidx16, align 4
402 %inc21 = add nsw i32 %iNewChunks.120, 1
406 %iNewChunks.2 = phi i32 [ %inc21, %if.then18 ], [ %iNewChunks.120, %for.body14 ]
407 %indvars.iv.next4 = add nsw i64 %indvars.iv3, 1
408 %tmp1 = trunc i64 %indvars.iv3 to i32
409 %cmp13 = icmp slt i32 %tmp1, 0
410 br i1 %cmp13, label %for.body14, label %for.inc26
413 %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ]
417 ; In the test below, it's more profitable for the expression feeding the
418 ; conditional store to remain scalar. Since we can only type-shrink vector
419 ; types, we shouldn't try to represent the expression in a smaller type.
421 define void @minimal_bit_widths(i1 %c) {
422 ; UNROLL-LABEL: @minimal_bit_widths(
423 ; UNROLL-NEXT: entry:
424 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
425 ; UNROLL: vector.body:
426 ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
427 ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
428 ; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
429 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
430 ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]]
431 ; UNROLL-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
432 ; UNROLL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
433 ; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
434 ; UNROLL: pred.store.if:
435 ; UNROLL-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
436 ; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
437 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]]
438 ; UNROLL: pred.store.continue3:
439 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
440 ; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
441 ; UNROLL-NEXT: br i1 [[TMP6]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
443 ; UNROLL-NEXT: ret void
445 ; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths(
446 ; UNROLL-NOSIMPLIFY-NEXT: entry:
447 ; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
448 ; UNROLL-NOSIMPLIFY: vector.ph:
449 ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
450 ; UNROLL-NOSIMPLIFY: vector.body:
451 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
452 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
453 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
454 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
455 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]]
456 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
457 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
458 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
459 ; UNROLL-NOSIMPLIFY: pred.store.if:
460 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
461 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
462 ; UNROLL-NOSIMPLIFY: pred.store.continue:
463 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
464 ; UNROLL-NOSIMPLIFY: pred.store.if2:
465 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
466 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
467 ; UNROLL-NOSIMPLIFY: pred.store.continue3:
468 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
469 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
470 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
471 ; UNROLL-NOSIMPLIFY: middle.block:
472 ; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
473 ; UNROLL-NOSIMPLIFY: scalar.ph:
474 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
475 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
476 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]]
477 ; UNROLL-NOSIMPLIFY: for.body:
478 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
479 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
480 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
481 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
482 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
483 ; UNROLL-NOSIMPLIFY: if.then:
484 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
485 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
486 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
487 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]]
488 ; UNROLL-NOSIMPLIFY: for.inc:
489 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
490 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
491 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
492 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
493 ; UNROLL-NOSIMPLIFY: for.end:
494 ; UNROLL-NOSIMPLIFY-NEXT: ret void
496 ; VEC-LABEL: @minimal_bit_widths(
498 ; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
499 ; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
500 ; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
502 ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
503 ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
504 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
505 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
506 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
507 ; VEC-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
508 ; VEC-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
509 ; VEC: pred.store.if:
510 ; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
511 ; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
512 ; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
513 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
514 ; VEC: pred.store.continue:
515 ; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
516 ; VEC-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
517 ; VEC: pred.store.if2:
518 ; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
519 ; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr undef, i64 [[TMP7]]
520 ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
521 ; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
522 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]]
523 ; VEC: pred.store.continue3:
524 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
525 ; VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
526 ; VEC-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
534 %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
535 %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 1000, %entry ]
536 %tmp2 = getelementptr i8, ptr undef, i64 %tmp0
537 %tmp3 = load i8, ptr %tmp2, align 1
538 br i1 %c, label %if.then, label %for.inc
541 %tmp4 = zext i8 %tmp3 to i32
542 %tmp5 = trunc i32 %tmp4 to i8
543 store i8 %tmp5, ptr %tmp2, align 1
547 %tmp6 = add nuw nsw i64 %tmp0, 1
548 %tmp7 = add i64 %tmp1, -1
549 %tmp8 = icmp eq i64 %tmp7, 0
550 br i1 %tmp8, label %for.end, label %for.body
556 define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
557 ; UNROLL-LABEL: @minimal_bit_widths_with_aliasing_store(
558 ; UNROLL-NEXT: entry:
559 ; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
561 ; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
562 ; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
563 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
564 ; UNROLL-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
565 ; UNROLL-NEXT: store i8 0, ptr [[TMP2]], align 1
566 ; UNROLL-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
568 ; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
569 ; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
570 ; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
571 ; UNROLL-NEXT: br label [[FOR_INC]]
573 ; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
574 ; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
575 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
576 ; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
578 ; UNROLL-NEXT: ret void
580 ; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths_with_aliasing_store(
581 ; UNROLL-NOSIMPLIFY-NEXT: entry:
582 ; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
583 ; UNROLL-NOSIMPLIFY: vector.ph:
584 ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
585 ; UNROLL-NOSIMPLIFY: vector.body:
586 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
587 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
588 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
589 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
590 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]]
591 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
592 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
593 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP2]], align 1
594 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1
595 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
596 ; UNROLL-NOSIMPLIFY: pred.store.if:
597 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
598 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
599 ; UNROLL-NOSIMPLIFY: pred.store.continue:
600 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
601 ; UNROLL-NOSIMPLIFY: pred.store.if2:
602 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
603 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
604 ; UNROLL-NOSIMPLIFY: pred.store.continue3:
605 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
606 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
607 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
608 ; UNROLL-NOSIMPLIFY: middle.block:
609 ; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
610 ; UNROLL-NOSIMPLIFY: scalar.ph:
611 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
612 ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
613 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]]
614 ; UNROLL-NOSIMPLIFY: for.body:
615 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
616 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
617 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
618 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
619 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP2]], align 1
620 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
621 ; UNROLL-NOSIMPLIFY: if.then:
622 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
623 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
624 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
625 ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]]
626 ; UNROLL-NOSIMPLIFY: for.inc:
627 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
628 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
629 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
630 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
631 ; UNROLL-NOSIMPLIFY: for.end:
632 ; UNROLL-NOSIMPLIFY-NEXT: ret void
634 ; VEC-LABEL: @minimal_bit_widths_with_aliasing_store(
636 ; VEC-NEXT: br label [[FOR_BODY:%.*]]
638 ; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
639 ; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
640 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
641 ; VEC-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
642 ; VEC-NEXT: store i8 0, ptr [[TMP2]], align 1
643 ; VEC-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
645 ; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
646 ; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
647 ; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
648 ; VEC-NEXT: br label [[FOR_INC]]
650 ; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
651 ; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
652 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
653 ; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
661 %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
662 %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
663 %tmp2 = getelementptr i8, ptr %ptr, i64 %tmp0
664 %tmp3 = load i8, ptr %tmp2, align 1
665 store i8 0, ptr %tmp2
666 br i1 %c, label %if.then, label %for.inc
669 %tmp4 = zext i8 %tmp3 to i32
670 %tmp5 = trunc i32 %tmp4 to i8
671 store i8 %tmp5, ptr %tmp2, align 1
675 %tmp6 = add nuw nsw i64 %tmp0, 1
676 %tmp7 = add i64 %tmp1, -1
677 %tmp8 = icmp eq i64 %tmp7, 0
678 br i1 %tmp8, label %for.end, label %for.body