1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64"
7 define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
8 ; CHECK-LABEL: define void @add_a
9 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
11 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0
12 ; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]]
14 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
15 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
17 ; CHECK: vector.main.loop.iter.check:
18 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
19 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
21 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
22 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
23 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
25 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
27 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
28 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
29 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
30 ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2)
31 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
32 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
33 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
34 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
35 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
36 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
37 ; CHECK: middle.block:
38 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
39 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
40 ; CHECK: vec.epilog.iter.check:
41 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
42 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
43 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
44 ; CHECK: vec.epilog.ph:
45 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
46 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
47 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
48 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
49 ; CHECK: vec.epilog.vector.body:
50 ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
51 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0
52 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]]
53 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
54 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
55 ; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[WIDE_LOAD5]], splat (i8 2)
56 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]]
57 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
58 ; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1
59 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4
60 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
61 ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
62 ; CHECK: vec.epilog.middle.block:
63 ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
64 ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
65 ; CHECK: vec.epilog.scalar.ph:
66 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
67 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
68 ; CHECK: for.cond.cleanup.loopexit:
69 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
70 ; CHECK: for.cond.cleanup:
71 ; CHECK-NEXT: ret void
73 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
74 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
75 ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
76 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP15]] to i32
77 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
78 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
79 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
80 ; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1
81 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
82 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
83 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
84 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
87 %cmp8 = icmp sgt i32 %len, 0
88 br i1 %cmp8, label %for.body, label %for.cond.cleanup
90 for.cond.cleanup: ; preds = %for.body, %entry
93 for.body: ; preds = %entry, %for.body
94 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
95 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
96 %0 = load i8, ptr %arrayidx
97 %conv = zext i8 %0 to i32
98 %add = add nuw nsw i32 %conv, 2
99 %conv1 = trunc i32 %add to i8
100 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
101 store i8 %conv1, ptr %arrayidx3
102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
103 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
104 %exitcond = icmp eq i32 %lftr.wideiv, %len
105 br i1 %exitcond, label %for.cond.cleanup, label %for.body
108 ; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
110 define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
111 ; CHECK-LABEL: define void @add_a1
112 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
114 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0
115 ; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]]
117 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
118 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
119 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
120 ; CHECK: vector.main.loop.iter.check:
121 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
122 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
124 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
125 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
126 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
127 ; CHECK: vector.body:
128 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
129 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
130 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
131 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
132 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
133 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2)
134 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
135 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
136 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
137 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
138 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
139 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
140 ; CHECK: middle.block:
141 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
142 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
143 ; CHECK: vec.epilog.iter.check:
144 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
145 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
146 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
147 ; CHECK: vec.epilog.ph:
148 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
149 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
150 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
151 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
152 ; CHECK: vec.epilog.vector.body:
153 ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
154 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0
155 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]]
156 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
157 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
158 ; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i8> [[WIDE_LOAD5]], splat (i8 2)
159 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]]
160 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
161 ; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1
162 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4
163 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
164 ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
165 ; CHECK: vec.epilog.middle.block:
166 ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
167 ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
168 ; CHECK: vec.epilog.scalar.ph:
169 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
170 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
171 ; CHECK: for.cond.cleanup.loopexit:
172 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
173 ; CHECK: for.cond.cleanup:
174 ; CHECK-NEXT: ret void
176 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
177 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
178 ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
179 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[TMP15]], 2
180 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
181 ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX3]], align 1
182 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
183 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
184 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
185 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
188 %cmp8 = icmp sgt i32 %len, 0
189 br i1 %cmp8, label %for.body, label %for.cond.cleanup
191 for.cond.cleanup: ; preds = %for.body, %entry
194 for.body: ; preds = %entry, %for.body
195 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
196 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
197 %0 = load i8, ptr %arrayidx
198 %add = add nuw nsw i8 %0, 2
199 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
200 store i8 %add, ptr %arrayidx3
201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
203 %exitcond = icmp eq i32 %lftr.wideiv, %len
204 br i1 %exitcond, label %for.cond.cleanup, label %for.body
207 define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
208 ; CHECK-LABEL: define void @add_b
209 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
211 ; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[LEN]], 0
212 ; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
213 ; CHECK: for.body.preheader:
214 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
215 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
216 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
218 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
219 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
220 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
221 ; CHECK: vector.body:
222 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
223 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
224 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]]
225 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
226 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
227 ; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2)
228 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]]
229 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
230 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2
231 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
232 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
233 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
234 ; CHECK: middle.block:
235 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
236 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
238 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
239 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
240 ; CHECK: for.cond.cleanup.loopexit:
241 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
242 ; CHECK: for.cond.cleanup:
243 ; CHECK-NEXT: ret void
245 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
246 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]]
247 ; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
248 ; CHECK-NEXT: [[CONV8:%.*]] = zext i16 [[TMP8]] to i32
249 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV8]], 2
250 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
251 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
252 ; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2
253 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
254 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
255 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
259 %cmp9 = icmp sgt i32 %len, 0
260 br i1 %cmp9, label %for.body, label %for.cond.cleanup
262 for.cond.cleanup: ; preds = %for.body, %entry
265 for.body: ; preds = %entry, %for.body
266 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
267 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
268 %0 = load i16, ptr %arrayidx
269 %conv8 = zext i16 %0 to i32
270 %add = add nuw nsw i32 %conv8, 2
271 %conv1 = trunc i32 %add to i16
272 %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
273 store i16 %conv1, ptr %arrayidx3
274 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
275 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
276 %exitcond = icmp eq i32 %lftr.wideiv, %len
277 br i1 %exitcond, label %for.cond.cleanup, label %for.body
280 define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
281 ; CHECK-LABEL: define void @add_c
282 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
284 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0
285 ; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]]
287 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
288 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
289 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
290 ; CHECK: vector.main.loop.iter.check:
291 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
292 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
294 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
295 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
296 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
297 ; CHECK: vector.body:
298 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
299 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
300 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
301 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
302 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
303 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
304 ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2)
305 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]]
306 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
307 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2
308 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
309 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
310 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
311 ; CHECK: middle.block:
312 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
313 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
314 ; CHECK: vec.epilog.iter.check:
315 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
316 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
317 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
318 ; CHECK: vec.epilog.ph:
319 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
320 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
321 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
322 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
323 ; CHECK: vec.epilog.vector.body:
324 ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
325 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX4]], 0
326 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
327 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
328 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
329 ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[WIDE_LOAD5]] to <4 x i16>
330 ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i16> [[TMP12]], splat (i16 2)
331 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP9]]
332 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0
333 ; CHECK-NEXT: store <4 x i16> [[TMP13]], ptr [[TMP15]], align 2
334 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4
335 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
336 ; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
337 ; CHECK: vec.epilog.middle.block:
338 ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
339 ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
340 ; CHECK: vec.epilog.scalar.ph:
341 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
342 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
343 ; CHECK: for.cond.cleanup.loopexit:
344 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
345 ; CHECK: for.cond.cleanup:
346 ; CHECK-NEXT: ret void
348 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
349 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
350 ; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
351 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP17]] to i32
352 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
353 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
354 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
355 ; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2
356 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
357 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
358 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
359 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
362 %cmp8 = icmp sgt i32 %len, 0
363 br i1 %cmp8, label %for.body, label %for.cond.cleanup
365 for.cond.cleanup: ; preds = %for.body, %entry
368 for.body: ; preds = %entry, %for.body
369 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
370 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
371 %0 = load i8, ptr %arrayidx
372 %conv = zext i8 %0 to i32
373 %add = add nuw nsw i32 %conv, 2
374 %conv1 = trunc i32 %add to i16
375 %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
376 store i16 %conv1, ptr %arrayidx3
377 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
378 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
379 %exitcond = icmp eq i32 %lftr.wideiv, %len
380 br i1 %exitcond, label %for.cond.cleanup, label %for.body
383 define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
384 ; CHECK-LABEL: define void @add_d
385 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
387 ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[LEN]], 0
388 ; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
389 ; CHECK: for.body.preheader:
390 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
391 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
392 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
394 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
395 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
396 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
397 ; CHECK: vector.body:
398 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
399 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
400 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]]
401 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
402 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
403 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
404 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2)
405 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[TMP1]]
406 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
407 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4
408 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
409 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
410 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
411 ; CHECK: middle.block:
412 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
413 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
415 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
416 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
417 ; CHECK: for.cond.cleanup.loopexit:
418 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
419 ; CHECK: for.cond.cleanup:
420 ; CHECK-NEXT: ret void
422 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
423 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]]
424 ; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
425 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
426 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
427 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[INDVARS_IV]]
428 ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
429 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
430 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
431 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
432 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
435 %cmp7 = icmp sgt i32 %len, 0
436 br i1 %cmp7, label %for.body, label %for.cond.cleanup
438 for.cond.cleanup: ; preds = %for.body, %entry
441 for.body: ; preds = %entry, %for.body
442 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
443 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
444 %0 = load i16, ptr %arrayidx
445 %conv = sext i16 %0 to i32
446 %add = add nsw i32 %conv, 2
447 %arrayidx2 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
448 store i32 %add, ptr %arrayidx2
449 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
450 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
451 %exitcond = icmp eq i32 %lftr.wideiv, %len
452 br i1 %exitcond, label %for.cond.cleanup, label %for.body
455 define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
456 ; CHECK-LABEL: define void @add_e
457 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 [[ARG1:%.*]], i8 [[ARG2:%.*]], i32 [[LEN:%.*]]) {
459 ; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0
460 ; CHECK-NEXT: br i1 [[CMP_32]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]]
462 ; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32
463 ; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32
464 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
465 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
466 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
467 ; CHECK: vector.main.loop.iter.check:
468 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
469 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
471 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
472 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
473 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0
474 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
475 ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8>
476 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0
477 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer
478 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8>
479 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
480 ; CHECK: vector.body:
481 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
482 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
483 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]]
484 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
485 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
486 ; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4)
487 ; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32)
488 ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51)
489 ; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60)
490 ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]]
491 ; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4)
492 ; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]]
493 ; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]]
494 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
495 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0
496 ; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP15]], align 1
497 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
498 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
499 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
500 ; CHECK: middle.block:
501 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
502 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
503 ; CHECK: vec.epilog.iter.check:
504 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
505 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
506 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
507 ; CHECK: vec.epilog.ph:
508 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
509 ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4
510 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
511 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0
512 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
513 ; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8>
514 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0
515 ; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
516 ; CHECK-NEXT: [[TMP18:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8>
517 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
518 ; CHECK: vec.epilog.vector.body:
519 ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
520 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX10]], 0
521 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP19]]
522 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0
523 ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1
524 ; CHECK-NEXT: [[TMP22:%.*]] = shl <4 x i8> [[WIDE_LOAD11]], splat (i8 4)
525 ; CHECK-NEXT: [[TMP23:%.*]] = add <4 x i8> [[TMP22]], splat (i8 32)
526 ; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i8> [[WIDE_LOAD11]], splat (i8 51)
527 ; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i8> [[TMP24]], splat (i8 60)
528 ; CHECK-NEXT: [[TMP26:%.*]] = and <4 x i8> [[TMP23]], [[TMP17]]
529 ; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP25]], splat (i8 -4)
530 ; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i8> [[TMP27]], [[TMP18]]
531 ; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], [[TMP26]]
532 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]]
533 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0
534 ; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP31]], align 1
535 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4
536 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]]
537 ; CHECK-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
538 ; CHECK: vec.epilog.middle.block:
539 ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
540 ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
541 ; CHECK: vec.epilog.scalar.ph:
542 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
543 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
544 ; CHECK: for.cond.cleanup.loopexit:
545 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
546 ; CHECK: for.cond.cleanup:
547 ; CHECK-NEXT: ret void
549 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
550 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
551 ; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
552 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP33]] to i32
553 ; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4
554 ; CHECK-NEXT: [[CONV2:%.*]] = add nuw nsw i32 [[ADD]], 32
555 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], 51
556 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[OR]], 60
557 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]]
558 ; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252
559 ; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]]
560 ; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]]
561 ; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8
562 ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
563 ; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1
564 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
565 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
566 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
567 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
570 %cmp.32 = icmp sgt i32 %len, 0
571 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
573 for.body.lr.ph: ; preds = %entry
574 %conv11 = zext i8 %arg2 to i32
575 %conv13 = zext i8 %arg1 to i32
578 for.cond.cleanup: ; preds = %for.body, %entry
581 for.body: ; preds = %for.body, %for.body.lr.ph
582 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
583 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
584 %0 = load i8, ptr %arrayidx
585 %conv = zext i8 %0 to i32
586 %add = shl i32 %conv, 4
587 %conv2 = add nuw nsw i32 %add, 32
588 %or = or i32 %conv, 51
589 %mul = mul nuw nsw i32 %or, 60
590 %and = and i32 %conv2, %conv13
591 %mul.masked = and i32 %mul, 252
592 %conv17 = xor i32 %mul.masked, %conv11
593 %mul18 = mul nuw nsw i32 %conv17, %and
594 %conv19 = trunc i32 %mul18 to i8
595 %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
596 store i8 %conv19, ptr %arrayidx21
597 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
598 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
599 %exitcond = icmp eq i32 %lftr.wideiv, %len
600 br i1 %exitcond, label %for.cond.cleanup, label %for.body
603 define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
604 ; CHECK-LABEL: define void @add_f
605 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 [[ARG1:%.*]], i8 [[ARG2:%.*]], i32 [[LEN:%.*]]) {
607 ; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0
608 ; CHECK-NEXT: br i1 [[CMP_32]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]]
610 ; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32
611 ; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32
612 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
613 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
614 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
615 ; CHECK: vector.main.loop.iter.check:
616 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
617 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
619 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
620 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
621 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0
622 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
623 ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8>
624 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0
625 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer
626 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8>
627 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
628 ; CHECK: vector.body:
629 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
630 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
631 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP3]]
632 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
633 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2
634 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8>
635 ; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4)
636 ; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32)
637 ; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52)
638 ; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51)
639 ; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60)
640 ; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]]
641 ; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4)
642 ; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]]
643 ; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]]
644 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
645 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
646 ; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr [[TMP17]], align 1
647 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
648 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
649 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
650 ; CHECK: middle.block:
651 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
652 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
653 ; CHECK: vec.epilog.iter.check:
654 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
655 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
656 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
657 ; CHECK: vec.epilog.ph:
658 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
659 ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4
660 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
661 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0
662 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
663 ; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8>
664 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0
665 ; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
666 ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8>
667 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
668 ; CHECK: vec.epilog.vector.body:
669 ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
670 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX10]], 0
671 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]]
672 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0
673 ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i16>, ptr [[TMP23]], align 2
674 ; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i16> [[WIDE_LOAD11]] to <4 x i8>
675 ; CHECK-NEXT: [[TMP25:%.*]] = shl <4 x i8> [[TMP24]], splat (i8 4)
676 ; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i8> [[TMP25]], splat (i8 32)
677 ; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP24]], splat (i8 -52)
678 ; CHECK-NEXT: [[TMP28:%.*]] = or <4 x i8> [[TMP27]], splat (i8 51)
679 ; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], splat (i8 60)
680 ; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], [[TMP19]]
681 ; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP29]], splat (i8 -4)
682 ; CHECK-NEXT: [[TMP32:%.*]] = xor <4 x i8> [[TMP31]], [[TMP20]]
683 ; CHECK-NEXT: [[TMP33:%.*]] = mul <4 x i8> [[TMP32]], [[TMP30]]
684 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]]
685 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0
686 ; CHECK-NEXT: store <4 x i8> [[TMP33]], ptr [[TMP35]], align 1
687 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4
688 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]]
689 ; CHECK-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
690 ; CHECK: vec.epilog.middle.block:
691 ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
692 ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
693 ; CHECK: vec.epilog.scalar.ph:
694 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
695 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
696 ; CHECK: for.cond.cleanup.loopexit:
697 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
698 ; CHECK: for.cond.cleanup:
699 ; CHECK-NEXT: ret void
701 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
702 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]]
703 ; CHECK-NEXT: [[TMP37:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
704 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP37]] to i32
705 ; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4
706 ; CHECK-NEXT: [[CONV2:%.*]] = add nsw i32 [[ADD]], 32
707 ; CHECK-NEXT: [[OR:%.*]] = and i32 [[CONV]], 204
708 ; CHECK-NEXT: [[CONV8:%.*]] = or i32 [[OR]], 51
709 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV8]], 60
710 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]]
711 ; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252
712 ; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]]
713 ; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]]
714 ; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8
715 ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
716 ; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1
717 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
718 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
719 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
720 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
723 %cmp.32 = icmp sgt i32 %len, 0
724 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
726 for.body.lr.ph: ; preds = %entry
727 %conv11 = zext i8 %arg2 to i32
728 %conv13 = zext i8 %arg1 to i32
731 for.cond.cleanup: ; preds = %for.body, %entry
734 for.body: ; preds = %for.body, %for.body.lr.ph
735 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
736 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
737 %0 = load i16, ptr %arrayidx
738 %conv = sext i16 %0 to i32
739 %add = shl i32 %conv, 4
740 %conv2 = add nsw i32 %add, 32
741 %or = and i32 %conv, 204
742 %conv8 = or i32 %or, 51
743 %mul = mul nuw nsw i32 %conv8, 60
744 %and = and i32 %conv2, %conv13
745 %mul.masked = and i32 %mul, 252
746 %conv17 = xor i32 %mul.masked, %conv11
747 %mul18 = mul nuw nsw i32 %conv17, %and
748 %conv19 = trunc i32 %mul18 to i8
749 %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
750 store i8 %conv19, ptr %arrayidx21
751 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
752 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
753 %exitcond = icmp eq i32 %lftr.wideiv, %len
754 br i1 %exitcond, label %for.cond.cleanup, label %for.body
757 define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
758 ; CHECK-LABEL: define void @add_phifail
759 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
761 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0
762 ; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
763 ; CHECK: for.body.preheader:
764 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
765 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
766 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
768 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
769 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
770 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
771 ; CHECK: vector.body:
772 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
773 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
774 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
775 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
776 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
777 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
778 ; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
779 ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2)
780 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8>
781 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
782 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
783 ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
784 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
785 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
786 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
787 ; CHECK: middle.block:
788 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
789 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
790 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
792 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
793 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
794 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
795 ; CHECK: for.cond.cleanup.loopexit:
796 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
797 ; CHECK: for.cond.cleanup:
798 ; CHECK-NEXT: ret void
800 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
801 ; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
802 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
803 ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
804 ; CHECK-NEXT: [[CONV]] = zext i8 [[TMP10]] to i32
805 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
806 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
807 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
808 ; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1
809 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
810 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
811 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
812 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
815 %cmp8 = icmp sgt i32 %len, 0
816 br i1 %cmp8, label %for.body, label %for.cond.cleanup
818 for.cond.cleanup: ; preds = %for.body, %entry
821 for.body: ; preds = %entry, %for.body
822 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
823 %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
824 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
825 %0 = load i8, ptr %arrayidx
826 %conv = zext i8 %0 to i32
827 %add = add nuw nsw i32 %conv, 2
828 %conv1 = trunc i32 %add to i8
829 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
830 store i8 %conv1, ptr %arrayidx3
831 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
832 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
833 %exitcond = icmp eq i32 %lftr.wideiv, %len
834 br i1 %exitcond, label %for.cond.cleanup, label %for.body
837 ; When we vectorize this loop, we generate correct code
838 ; even when %len exactly divides VF (since we extract from the second last index
839 ; and pass this to the for.cond.cleanup block). Vectorized loop returns
840 ; the correct value a_phi = p[len -2]
841 define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
842 ; CHECK-LABEL: define i8 @add_phifail2
843 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) {
845 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1
846 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
847 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
848 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
849 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
851 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
852 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
853 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
854 ; CHECK: vector.body:
855 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
856 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
857 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
858 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]]
859 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
860 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
861 ; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
862 ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2)
863 ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8>
864 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
865 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
866 ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1
867 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
868 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
869 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
870 ; CHECK: middle.block:
871 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
872 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
873 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
874 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
876 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
877 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
878 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
879 ; CHECK: for.cond.cleanup:
880 ; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
881 ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[A_PHI_LCSSA]] to i8
882 ; CHECK-NEXT: ret i8 [[RET]]
884 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
885 ; CHECK-NEXT: [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
886 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
887 ; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
888 ; CHECK-NEXT: [[CONV]] = zext i8 [[TMP12]] to i32
889 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
890 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
891 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
892 ; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1
893 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
894 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
895 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
896 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
901 for.cond.cleanup: ; preds = %for.body, %entry
902 %ret = trunc i32 %a_phi to i8
905 for.body: ; preds = %entry, %for.body
906 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
907 %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
908 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
909 %0 = load i8, ptr %arrayidx
910 %conv = zext i8 %0 to i32
911 %add = add nuw nsw i32 %conv, 2
912 %conv1 = trunc i32 %add to i8
913 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
914 store i8 %conv1, ptr %arrayidx3
915 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
916 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
917 %exitcond = icmp eq i32 %lftr.wideiv, %len
918 br i1 %exitcond, label %for.cond.cleanup, label %for.body