1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -S %s | FileCheck %s
4 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
6 define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 {
7 ; CHECK-LABEL: define i32 @iv_used_widened_and_truncated(
8 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
9 ; CHECK-NEXT: iter.check:
10 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
11 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4
12 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[ENTRY:%.*]]
13 ; CHECK: vector.main.loop.iter.check:
14 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
18 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
19 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
22 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
23 ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ]
24 ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
25 ; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
26 ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8)
27 ; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], splat (i32 8)
28 ; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], splat (i32 8)
29 ; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], splat (i32 8)
30 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]]
31 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]]
32 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]]
33 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD2]]
34 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> splat (i1 true))
35 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true))
36 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> splat (i1 true))
37 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> splat (i1 true))
38 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
39 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8)
40 ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], splat (i32 8)
41 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
42 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43 ; CHECK: middle.block:
44 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
45 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
46 ; CHECK: vec.epilog.iter.check:
47 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
48 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
49 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]]
50 ; CHECK: vec.epilog.ph:
51 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
52 ; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP0]], 4
53 ; CHECK-NEXT: [[N_VEC9:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF8]]
54 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
55 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
56 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
57 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32
58 ; CHECK-NEXT: [[DOTSPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
59 ; CHECK-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer
60 ; CHECK-NEXT: [[INDUCTION16:%.*]] = add <4 x i32> [[DOTSPLAT15]], <i32 0, i32 1, i32 2, i32 3>
61 ; CHECK-NEXT: br label [[LOOP:%.*]]
62 ; CHECK: vec.epilog.vector.body:
63 ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT19:%.*]], [[LOOP]] ]
64 ; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[SCALAR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[LOOP]] ]
65 ; CHECK-NEXT: [[VEC_IND17:%.*]] = phi <4 x i32> [ [[INDUCTION16]], [[SCALAR_PH]] ], [ [[VEC_IND_NEXT18:%.*]], [[LOOP]] ]
66 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <4 x i64> [[VEC_IND12]]
67 ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VEC_IND17]], <4 x ptr> [[TMP7]], i32 8, <4 x i1> splat (i1 true))
68 ; CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX11]], 4
69 ; CHECK-NEXT: [[VEC_IND_NEXT13]] = add <4 x i64> [[VEC_IND12]], splat (i64 4)
70 ; CHECK-NEXT: [[VEC_IND_NEXT18]] = add <4 x i32> [[VEC_IND17]], splat (i32 4)
71 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC9]]
72 ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
73 ; CHECK: vec.epilog.middle.block:
74 ; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC9]]
75 ; CHECK-NEXT: br i1 [[CMP_N20]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
76 ; CHECK: vec.epilog.scalar.ph:
77 ; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ]
78 ; CHECK-NEXT: br label [[LOOP1:%.*]]
80 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ]
81 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], i64 [[IV]]
82 ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[IV]] to i32
83 ; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 8
84 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
85 ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], [[N]]
86 ; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP4:![0-9]+]]
88 ; CHECK-NEXT: ret i32 0
94 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
95 %gep = getelementptr { i32, [ 8 x i32 ]}, ptr %dst, i64 %iv
96 %t = trunc i64 %iv to i32
97 store i32 %t, ptr %gep, align 8
98 %iv.next = add i64 %iv, 1
99 %c = icmp eq i64 %iv, %N
100 br i1 %c, label %exit, label %loop
106 define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) {
107 ; CHECK-LABEL: define void @multiple_truncated_ivs_with_wide_uses(
108 ; CHECK-SAME: i1 [[C:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) {
110 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
111 ; CHECK: vector.memcheck:
112 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 130
113 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 260
114 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
115 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
116 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
117 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
119 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
120 ; CHECK: vector.body:
121 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
122 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
123 ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
124 ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
125 ; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], splat (i32 4)
126 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
127 ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10)
128 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10)
129 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]]
130 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
131 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4
132 ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP6]], align 2, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]]
133 ; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META5]], !noalias [[META8]]
134 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
135 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
136 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4
137 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META8]]
138 ; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META8]]
139 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
140 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
141 ; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4)
142 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
143 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
144 ; CHECK: middle.block:
145 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
147 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
148 ; CHECK-NEXT: br label [[LOOP:%.*]]
150 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
151 ; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16
152 ; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32
153 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10
154 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i16, ptr [[A]], i64 [[IV]]
155 ; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP_A]], align 2
156 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
157 ; CHECK-NEXT: store i32 [[IV_32]], ptr [[GEP_B]], align 4
158 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
159 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64
160 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
162 ; CHECK-NEXT: ret void
168 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
169 %iv.16 = trunc i64 %iv to i16
170 %iv.32 = trunc i64 %iv to i32
171 %sel = select i1 %c, i16 %iv.16, i16 10
172 %gep.A = getelementptr i16, ptr %A, i64 %iv
173 store i16 %sel, ptr %gep.A
174 %gep.B = getelementptr i32, ptr %B, i64 %iv
175 store i32 %iv.32, ptr %gep.B
176 %iv.next = add i64 %iv, 1
177 %ec = icmp eq i64 %iv, 64
178 br i1 %ec, label %exit, label %loop
184 define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) {
185 ; CHECK-LABEL: define void @truncated_ivs_with_wide_and_scalar_uses(
186 ; CHECK-SAME: i1 [[C:%.*]], ptr [[DST:%.*]]) {
188 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
190 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
191 ; CHECK: vector.body:
192 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
193 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
194 ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
195 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
196 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
197 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]]
198 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10)
199 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10)
200 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0
201 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8
202 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
203 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2
204 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
205 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], splat (i16 8)
206 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
207 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
208 ; CHECK: middle.block:
209 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
211 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
212 ; CHECK-NEXT: br label [[LOOP:%.*]]
214 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
215 ; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16
216 ; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32
217 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i32 [[IV_32]]
218 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10
219 ; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP]], align 2
220 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
221 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64
222 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
224 ; CHECK-NEXT: ret void
230 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
231 %iv.16 = trunc i64 %iv to i16
232 %iv.32 = trunc i64 %iv to i32
233 %gep = getelementptr i16, ptr %dst, i32 %iv.32
234 %sel = select i1 %c, i16 %iv.16, i16 10
235 store i16 %sel, ptr %gep
236 %iv.next = add i64 %iv, 1
237 %ec = icmp eq i64 %iv, 64
238 br i1 %ec, label %exit, label %loop
244 define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
245 ; CHECK-LABEL: define void @multiple_pointer_ivs_with_scalar_uses_only(
246 ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
248 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
249 ; CHECK: vector.memcheck:
250 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8589934391
251 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 4294967196
252 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
253 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
254 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
255 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
257 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368
258 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
259 ; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
260 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
261 ; CHECK: vector.body:
262 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
263 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2048>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
264 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
265 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
266 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
267 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
268 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
269 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
270 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
271 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
272 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
273 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
274 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
275 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
276 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
277 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
278 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
279 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
280 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
281 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
282 ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
283 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
284 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
285 ; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
286 ; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
287 ; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
288 ; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
289 ; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
290 ; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
291 ; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP10]]
292 ; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
293 ; CHECK-NEXT: [[NEXT_GEP18:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]]
294 ; CHECK-NEXT: [[NEXT_GEP19:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]]
295 ; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP14]]
296 ; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP15]]
297 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
298 ; CHECK-NEXT: [[NEXT_GEP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]]
299 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0
300 ; CHECK-NEXT: [[NEXT_GEP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]]
301 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP23]], i32 0
302 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1, !alias.scope [[META14:![0-9]+]]
303 ; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
304 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP22]], i32 0
305 ; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1, !alias.scope [[META14]]
306 ; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32>
307 ; CHECK-NEXT: [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]]
308 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
309 ; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], splat (i32 1)
310 ; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8>
311 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0
312 ; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META17:![0-9]+]], !noalias [[META14]]
313 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP25]], i32 1
314 ; CHECK-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP7]], align 1, !alias.scope [[META17]], !noalias [[META14]]
315 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP25]], i32 2
316 ; CHECK-NEXT: store i8 [[TMP28]], ptr [[NEXT_GEP8]], align 1, !alias.scope [[META17]], !noalias [[META14]]
317 ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i8> [[TMP25]], i32 3
318 ; CHECK-NEXT: store i8 [[TMP29]], ptr [[NEXT_GEP9]], align 1, !alias.scope [[META17]], !noalias [[META14]]
319 ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP25]], i32 4
320 ; CHECK-NEXT: store i8 [[TMP30]], ptr [[NEXT_GEP10]], align 1, !alias.scope [[META17]], !noalias [[META14]]
321 ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP25]], i32 5
322 ; CHECK-NEXT: store i8 [[TMP31]], ptr [[NEXT_GEP11]], align 1, !alias.scope [[META17]], !noalias [[META14]]
323 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP25]], i32 6
324 ; CHECK-NEXT: store i8 [[TMP32]], ptr [[NEXT_GEP12]], align 1, !alias.scope [[META17]], !noalias [[META14]]
325 ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i8> [[TMP25]], i32 7
326 ; CHECK-NEXT: store i8 [[TMP33]], ptr [[NEXT_GEP13]], align 1, !alias.scope [[META17]], !noalias [[META14]]
327 ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP25]], i32 8
328 ; CHECK-NEXT: store i8 [[TMP34]], ptr [[NEXT_GEP14]], align 1, !alias.scope [[META17]], !noalias [[META14]]
329 ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP25]], i32 9
330 ; CHECK-NEXT: store i8 [[TMP35]], ptr [[NEXT_GEP15]], align 1, !alias.scope [[META17]], !noalias [[META14]]
331 ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP25]], i32 10
332 ; CHECK-NEXT: store i8 [[TMP36]], ptr [[NEXT_GEP16]], align 1, !alias.scope [[META17]], !noalias [[META14]]
333 ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i8> [[TMP25]], i32 11
334 ; CHECK-NEXT: store i8 [[TMP37]], ptr [[NEXT_GEP17]], align 1, !alias.scope [[META17]], !noalias [[META14]]
335 ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP25]], i32 12
336 ; CHECK-NEXT: store i8 [[TMP38]], ptr [[NEXT_GEP18]], align 1, !alias.scope [[META17]], !noalias [[META14]]
337 ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP25]], i32 13
338 ; CHECK-NEXT: store i8 [[TMP39]], ptr [[NEXT_GEP19]], align 1, !alias.scope [[META17]], !noalias [[META14]]
339 ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP25]], i32 14
340 ; CHECK-NEXT: store i8 [[TMP40]], ptr [[NEXT_GEP20]], align 1, !alias.scope [[META17]], !noalias [[META14]]
341 ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i8> [[TMP25]], i32 15
342 ; CHECK-NEXT: store i8 [[TMP41]], ptr [[NEXT_GEP21]], align 1, !alias.scope [[META17]], !noalias [[META14]]
343 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
344 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967184
345 ; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
346 ; CHECK: middle.block:
347 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15
348 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
350 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ]
351 ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ]
352 ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
353 ; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
354 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ]
355 ; CHECK-NEXT: br label [[LOOP:%.*]]
357 ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ]
358 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD38:%.*]], [[LOOP]] ]
359 ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[OUTPTR_0:%.*]], [[LOOP]] ]
360 ; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[INCDEC_PTR36:%.*]], [[LOOP]] ]
361 ; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[INCDEC_PTR33:%.*]], [[LOOP]] ]
362 ; CHECK-NEXT: [[INCDEC_PTR33]] = getelementptr i8, ptr [[PTR_IV_3]], i64 1
363 ; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[PTR_IV_3]], align 1
364 ; CHECK-NEXT: [[CONV34:%.*]] = zext i8 [[TMP43]] to i32
365 ; CHECK-NEXT: [[INCDEC_PTR36]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1
366 ; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[PTR_IV_2]], align 1
367 ; CHECK-NEXT: [[CONV37:%.*]] = zext i8 [[TMP44]] to i32
368 ; CHECK-NEXT: [[ADD38]] = add i32 [[CONV34]], [[CONV37]]
369 ; CHECK-NEXT: [[SHR42:%.*]] = lshr i32 [[SCALAR_RECUR]], 1
370 ; CHECK-NEXT: [[CONV43:%.*]] = trunc i32 [[SHR42]] to i8
371 ; CHECK-NEXT: store i8 [[CONV43]], ptr [[PTR_IV_1]], align 1
372 ; CHECK-NEXT: [[DEC]] = add i32 [[IV_1]], 1
373 ; CHECK-NEXT: [[OUTPTR_0]] = getelementptr i8, ptr [[PTR_IV_1]], i64 2
374 ; CHECK-NEXT: [[CMP30_NOT:%.*]] = icmp eq i32 [[DEC]], 0
375 ; CHECK-NEXT: br i1 [[CMP30_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
377 ; CHECK-NEXT: ret void
383 %iv.1 = phi i32 [ 100, %entry ], [ %dec, %loop ]
384 %iv.2 = phi i32 [ 2048, %entry ], [ %add38, %loop ]
385 %ptr.iv.1 = phi ptr [ %A, %entry ], [ %outptr.0, %loop ]
386 %ptr.iv.2 = phi ptr [ %B, %entry ], [ %incdec.ptr36, %loop ]
387 %ptr.iv.3 = phi ptr [ %B, %entry ], [ %incdec.ptr33, %loop ]
388 %incdec.ptr33 = getelementptr i8, ptr %ptr.iv.3, i64 1
389 %0 = load i8, ptr %ptr.iv.3, align 1
390 %conv34 = zext i8 %0 to i32
391 %incdec.ptr36 = getelementptr i8, ptr %ptr.iv.2, i64 1
392 %1 = load i8, ptr %ptr.iv.2, align 1
393 %conv37 = zext i8 %1 to i32
394 %add38 = add i32 %conv34, %conv37
395 %shr42 = lshr i32 %iv.2, 1
396 %conv43 = trunc i32 %shr42 to i8
397 store i8 %conv43, ptr %ptr.iv.1, align 1
398 %dec = add i32 %iv.1, 1
399 %outptr.0 = getelementptr i8, ptr %ptr.iv.1, i64 2
400 %cmp30.not = icmp eq i32 %dec, 0
401 br i1 %cmp30.not, label %exit, label %loop
407 define i16 @iv_and_step_trunc() {
408 ; CHECK-LABEL: define i16 @iv_and_step_trunc() {
410 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
412 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
413 ; CHECK: vector.body:
414 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
415 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
416 ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ <i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
417 ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
418 ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1)
419 ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16>
420 ; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]]
421 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
422 ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
423 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
424 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
425 ; CHECK: middle.block:
426 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
427 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
428 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
430 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
431 ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
432 ; CHECK-NEXT: br label [[LOOP:%.*]]
434 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
435 ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ]
436 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
437 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[IV]] to i16
438 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_NEXT]] to i16
439 ; CHECK-NEXT: [[REC_NEXT]] = mul i16 [[TMP3]], [[TMP4]]
440 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
441 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
443 ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
444 ; CHECK-NEXT: ret i16 [[REC_LCSSA]]
450 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
451 %rec = phi i16 [ 0, %entry ], [ %rec.next, %loop ]
452 %iv.next = add i64 %iv, 1
453 %0 = trunc i64 %iv to i16
454 %1 = trunc i64 %iv.next to i16
455 %rec.next = mul i16 %0, %1
456 %ec = icmp eq i64 %iv, 1
457 br i1 %ec, label %exit, label %loop
463 define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
464 ; CHECK-LABEL: define i32 @test_scalar_predicated_cost(
465 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
466 ; CHECK-NEXT: iter.check:
467 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
468 ; CHECK: vector.main.loop.iter.check:
469 ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
471 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[Y]], i64 0
472 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
473 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i64> poison, i64 [[X]], i64 0
474 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT4]], <8 x i64> poison, <8 x i32> zeroinitializer
475 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
476 ; CHECK: vector.body:
477 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
478 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
479 ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
480 ; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
481 ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8)
482 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
483 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
484 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
485 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
486 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
487 ; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], splat (i1 true)
488 ; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], splat (i1 true)
489 ; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], splat (i1 true)
490 ; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], splat (i1 true)
491 ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]]
492 ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]]
493 ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]]
494 ; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD2]]
495 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
496 ; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i64> [[TMP12]] to <8 x i32>
497 ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32>
498 ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32>
499 ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32>
500 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
501 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8
502 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16
503 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24
504 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr [[TMP24]], i32 4, <8 x i1> [[TMP8]])
505 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr [[TMP25]], i32 4, <8 x i1> [[TMP9]])
506 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]])
507 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]])
508 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
509 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8)
510 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
511 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
512 ; CHECK: middle.block:
513 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
514 ; CHECK: vec.epilog.iter.check:
515 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
516 ; CHECK: vec.epilog.ph:
517 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
518 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
519 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
520 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
521 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[Y]], i64 0
522 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
523 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
524 ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
525 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
526 ; CHECK: vec.epilog.vector.body:
527 ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[LOOP_HEADER]] ]
528 ; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[LOOP_HEADER]] ]
529 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX4]], 0
530 ; CHECK-NEXT: [[TMP32:%.*]] = icmp ule <4 x i64> [[VEC_IND5]], [[BROADCAST_SPLAT8]]
531 ; CHECK-NEXT: [[TMP33:%.*]] = xor <4 x i1> [[TMP32]], splat (i1 true)
532 ; CHECK-NEXT: [[TMP34:%.*]] = or <4 x i64> [[BROADCAST_SPLAT10]], [[VEC_IND5]]
533 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP31]]
534 ; CHECK-NEXT: [[TMP36:%.*]] = trunc <4 x i64> [[TMP34]] to <4 x i32>
535 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP35]], i32 0
536 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP36]], ptr [[TMP29]], i32 4, <4 x i1> [[TMP33]])
537 ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX4]], 4
538 ; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i64> [[VEC_IND5]], splat (i64 4)
539 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 100
540 ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP24:![0-9]+]]
541 ; CHECK: vec.epilog.middle.block:
542 ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[SCALAR_PH]]
543 ; CHECK: vec.epilog.scalar.ph:
544 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
545 ; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]]
546 ; CHECK: loop.header:
547 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
548 ; CHECK-NEXT: [[CMP9_NOT:%.*]] = icmp ule i64 [[IV]], [[Y]]
549 ; CHECK-NEXT: br i1 [[CMP9_NOT]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]]
551 ; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[IV]]
552 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
553 ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[OR]] to i32
554 ; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4
555 ; CHECK-NEXT: br label [[LOOP_LATCH]]
557 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
558 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100
559 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP25:![0-9]+]]
561 ; CHECK-NEXT: ret i32 0
564 br label %loop.header
567 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
568 %cmp9.not = icmp ule i64 %iv, %y
569 br i1 %cmp9.not, label %loop.latch, label %if.then
573 %gep = getelementptr i32, ptr %A, i64 %iv
574 %t = trunc i64 %or to i32
575 store i32 %t, ptr %gep, align 4
579 %iv.next = add i64 %iv, 1
580 %ec = icmp eq i64 %iv, 100
581 br i1 %ec, label %exit, label %loop.header
587 define void @wide_iv_trunc(ptr %dst, i64 %N) {
588 ; CHECK-LABEL: define void @wide_iv_trunc(
589 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
591 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[N]], 8
592 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
593 ; CHECK: loop.preheader:
594 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
595 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3
598 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
599 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
600 ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
601 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
602 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
603 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
604 ; CHECK: vector.body:
605 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
606 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
607 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
608 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
609 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
610 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
611 ; CHECK: pred.store.if:
612 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0
613 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
614 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
615 ; CHECK: pred.store.continue:
616 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
617 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
618 ; CHECK: pred.store.if1:
619 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 1
620 ; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
621 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
622 ; CHECK: pred.store.continue2:
623 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
624 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
625 ; CHECK: pred.store.if3:
626 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP1]], 2
627 ; CHECK-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
628 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
629 ; CHECK: pred.store.continue4:
630 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
631 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
632 ; CHECK: pred.store.if5:
633 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP1]], 3
634 ; CHECK-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
635 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
636 ; CHECK: pred.store.continue6:
637 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
638 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
639 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
640 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
641 ; CHECK: middle.block:
642 ; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
644 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
645 ; CHECK-NEXT: br label [[LOOP:%.*]]
647 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
648 ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
649 ; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[DST]], align 4
650 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
651 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
652 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
653 ; CHECK: exit.loopexit:
654 ; CHECK-NEXT: br label [[EXIT]]
656 ; CHECK-NEXT: ret void
659 %cmp = icmp ult i64 %N, 8
660 br i1 %cmp, label %loop, label %exit
663 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
664 %iv.trunc = trunc i64 %iv to i32
665 store i32 %iv.trunc, ptr %dst, align 4
666 %iv.next = add i64 %iv, 1
667 %ec = icmp eq i64 %iv, %N
668 br i1 %ec, label %exit, label %loop
674 define void @wide_iv_trunc_reuse(ptr %dst) {
675 ; CHECK-LABEL: define void @wide_iv_trunc_reuse(
676 ; CHECK-SAME: ptr [[DST:%.*]]) {
678 ; CHECK-NEXT: br label [[LOOP:%.*]]
680 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
681 ; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
682 ; CHECK-NEXT: store i32 [[IV_2]], ptr [[DST]], align 4
683 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
684 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0
685 ; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
686 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
688 ; CHECK-NEXT: ret void
694 %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
695 %iv.2 = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
696 store i32 %iv.2, ptr %dst, align 4
697 %iv.next = add i64 %iv, 1
698 %ec = icmp eq i64 %iv, 0
699 %iv.trunc = trunc i64 %iv to i32
700 br i1 %ec, label %exit, label %loop
706 define void @wombat(i32 %arg, ptr %dst) #1 {
707 ; CHECK-LABEL: define void @wombat(
708 ; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
710 ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ARG]], 3
711 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
712 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
714 ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
715 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
716 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
717 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
718 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
719 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
720 ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
721 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
722 ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
723 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
724 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
725 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
726 ; CHECK: vector.body:
727 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
728 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
729 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
730 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
731 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
732 ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
733 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
734 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
735 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
736 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT4]]
737 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
738 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
739 ; CHECK: middle.block:
740 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
742 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
743 ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
744 ; CHECK-NEXT: br label [[LOOP:%.*]]
746 ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
747 ; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
748 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
749 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[PHI2]], 12
750 ; CHECK-NEXT: store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
751 ; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
752 ; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1
753 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
754 ; CHECK-NEXT: [[TRUNC]] = trunc i64 [[MUL3]] to i32
755 ; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
757 ; CHECK-NEXT: ret void
760 %mul = mul i32 %arg, 3
761 %zext = zext i32 %arg to i64
765 %phi = phi i64 [ 4, %entry ], [ %add, %loop ]
766 %phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
767 %getelementptr = getelementptr i32, ptr %dst, i64 %phi
768 %and = and i32 %phi2, 12
769 store i32 %and, ptr %getelementptr, align 4
770 %mul3 = mul i64 %phi, %zext
771 %add = add i64 %phi, 1
772 %icmp = icmp ugt i64 %phi, 65
773 %trunc = trunc i64 %mul3 to i32
774 br i1 %icmp, label %exit, label %loop
780 define void @wombat2(i32 %arg, ptr %dst) #1 {
781 ; CHECK-LABEL: define void @wombat2(
782 ; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1]] {
784 ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ARG]], 3
785 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
786 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
788 ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
789 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
790 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
791 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
792 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
793 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
794 ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
795 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
796 ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
797 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
798 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
799 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
800 ; CHECK: vector.body:
801 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
802 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
803 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
804 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
805 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
806 ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
807 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
808 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
809 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
810 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT4]]
811 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
812 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
813 ; CHECK: middle.block:
814 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
816 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
817 ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
818 ; CHECK-NEXT: br label [[LOOP:%.*]]
820 ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
821 ; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[TRUNC_1:%.*]], [[LOOP]] ]
822 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
823 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[PHI2]], 12
824 ; CHECK-NEXT: store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
825 ; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
826 ; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1
827 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
828 ; CHECK-NEXT: [[TRUNC_0:%.*]] = trunc i64 [[MUL3]] to i60
829 ; CHECK-NEXT: [[TRUNC_1]] = trunc i60 [[TRUNC_0]] to i32
830 ; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
832 ; CHECK-NEXT: ret void
835 %mul = mul i32 %arg, 3
836 %zext = zext i32 %arg to i64
840 %phi = phi i64 [ 4, %entry ], [ %add, %loop ]
841 %phi2 = phi i32 [ %mul, %entry ], [ %trunc.1, %loop ]
842 %getelementptr = getelementptr i32, ptr %dst, i64 %phi
843 %and = and i32 %phi2, 12
844 store i32 %and, ptr %getelementptr, align 4
845 %mul3 = mul i64 %phi, %zext
846 %add = add i64 %phi, 1
847 %icmp = icmp ugt i64 %phi, 65
848 %trunc.0 = trunc i64 %mul3 to i60
849 %trunc.1 = trunc i60 %trunc.0 to i32
850 br i1 %icmp, label %exit, label %loop
857 define void @with_dead_use(i32 %arg, ptr %dst) #1 {
858 ; CHECK-LABEL: define void @with_dead_use(
859 ; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1]] {
861 ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ARG]], 3
862 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
863 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
865 ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
866 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
867 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
868 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
869 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
870 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
871 ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
872 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
873 ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
874 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
875 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
876 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
877 ; CHECK: vector.body:
878 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
879 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
880 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
881 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
882 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
883 ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
884 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
885 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
886 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
887 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT4]]
888 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
889 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
890 ; CHECK: middle.block:
891 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
893 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
894 ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
895 ; CHECK-NEXT: br label [[LOOP:%.*]]
897 ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
898 ; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
899 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
900 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[PHI2]], 12
901 ; CHECK-NEXT: store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
902 ; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
903 ; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1
904 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
905 ; CHECK-NEXT: [[TRUNC]] = trunc i64 [[MUL3]] to i32
906 ; CHECK-NEXT: [[DEAD_AND:%.*]] = and i32 [[TRUNC]], 123
907 ; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
909 ; CHECK-NEXT: ret void
912 %mul = mul i32 %arg, 3
913 %zext = zext i32 %arg to i64
917 %phi = phi i64 [ 4, %entry ], [ %add, %loop ]
918 %phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
919 %getelementptr = getelementptr i32, ptr %dst, i64 %phi
920 %and = and i32 %phi2, 12
921 store i32 %and, ptr %getelementptr, align 4
922 %mul3 = mul i64 %phi, %zext
923 %add = add i64 %phi, 1
924 %icmp = icmp ugt i64 %phi, 65
925 %trunc = trunc i64 %mul3 to i32
926 %dead.and = and i32 %trunc, 123
927 br i1 %icmp, label %exit, label %loop
933 attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }
934 attributes #1 = { "target-cpu"="skylake-avx512" "target-features"="-avx512f" }
936 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
937 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
938 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
939 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
940 ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
941 ; CHECK: [[META5]] = !{[[META6:![0-9]+]]}
942 ; CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]]}
943 ; CHECK: [[META7]] = distinct !{[[META7]], !"LVerDomain"}
944 ; CHECK: [[META8]] = !{[[META9:![0-9]+]]}
945 ; CHECK: [[META9]] = distinct !{[[META9]], [[META7]]}
946 ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
947 ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
948 ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
949 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
950 ; CHECK: [[META14]] = !{[[META15:![0-9]+]]}
951 ; CHECK: [[META15]] = distinct !{[[META15]], [[META16:![0-9]+]]}
952 ; CHECK: [[META16]] = distinct !{[[META16]], !"LVerDomain"}
953 ; CHECK: [[META17]] = !{[[META18:![0-9]+]]}
954 ; CHECK: [[META18]] = distinct !{[[META18]], [[META16]]}
955 ; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
956 ; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]}
957 ; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
958 ; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]}
959 ; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
960 ; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
961 ; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
962 ; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
963 ; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
964 ; CHECK: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
965 ; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
966 ; CHECK: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
967 ; CHECK: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]}
968 ; CHECK: [[LOOP32]] = distinct !{[[LOOP32]], [[META1]], [[META2]]}
969 ; CHECK: [[LOOP33]] = distinct !{[[LOOP33]], [[META2]], [[META1]]}