1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
3 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
6 ; About the truncated test cases, the range analysis of induction variable is
7 ; used to ensure the induction variable is always greater than the sentinal
8 ; value. The case is vectorizable if the truncated induction variable is
9 ; monotonic increasing, and not equals to the sentinal.
10 define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) {
11 ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
12 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
13 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
14 ; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
15 ; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
16 ; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
17 ; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
18 ; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
19 ; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
20 ; CHECK-VF4IC1: [[VECTOR_PH]]:
21 ; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
22 ; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
23 ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
24 ; CHECK-VF4IC1: [[VECTOR_BODY]]:
25 ; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26 ; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
27 ; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
28 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
29 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
30 ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
31 ; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
32 ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
33 ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
34 ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
35 ; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
36 ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38 ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
39 ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
40 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
41 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
42 ; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
43 ; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
44 ; CHECK-VF4IC1: [[SCALAR_PH]]:
45 ; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
46 ; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
47 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
48 ; CHECK-VF4IC1: [[FOR_BODY]]:
49 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
50 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
51 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
52 ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
53 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
54 ; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
55 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
56 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
57 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
58 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
59 ; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]:
60 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
61 ; CHECK-VF4IC1-NEXT: br label %[[EXIT]]
62 ; CHECK-VF4IC1: [[EXIT]]:
63 ; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
64 ; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]]
66 ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
67 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
68 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
69 ; CHECK-VF4IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
70 ; CHECK-VF4IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
71 ; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]:
72 ; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
73 ; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
74 ; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
75 ; CHECK-VF4IC4: [[VECTOR_PH]]:
76 ; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
77 ; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
78 ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
79 ; CHECK-VF4IC4: [[VECTOR_BODY]]:
80 ; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
81 ; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
82 ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
83 ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
84 ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
85 ; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
86 ; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
87 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
88 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
89 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
90 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
91 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
92 ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
93 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8
94 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12
95 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
96 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
97 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
98 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
99 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
100 ; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
101 ; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
102 ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
103 ; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
104 ; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
105 ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
106 ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
107 ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
108 ; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
109 ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
110 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
111 ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
112 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
113 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
114 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
115 ; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
116 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
117 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331
118 ; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
119 ; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
120 ; CHECK-VF4IC4: [[SCALAR_PH]]:
121 ; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
122 ; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
123 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
124 ; CHECK-VF4IC4: [[FOR_BODY]]:
125 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
126 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
127 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
128 ; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
129 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3
130 ; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32
131 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
132 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
133 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
134 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
135 ; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]:
136 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
137 ; CHECK-VF4IC4-NEXT: br label %[[EXIT]]
138 ; CHECK-VF4IC4: [[EXIT]]:
139 ; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
140 ; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]]
142 ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
143 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
144 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
145 ; CHECK-VF1IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
146 ; CHECK-VF1IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
147 ; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]:
148 ; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
149 ; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
150 ; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
151 ; CHECK-VF1IC4: [[VECTOR_PH]]:
152 ; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
153 ; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
154 ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
155 ; CHECK-VF1IC4: [[VECTOR_BODY]]:
156 ; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
157 ; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
158 ; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
159 ; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
160 ; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
161 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
162 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
163 ; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
164 ; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
165 ; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
166 ; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0
167 ; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1
168 ; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2
169 ; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3
170 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
171 ; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
172 ; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
173 ; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
174 ; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
175 ; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
176 ; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
177 ; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
178 ; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
179 ; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
180 ; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
181 ; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
182 ; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
183 ; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
184 ; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
185 ; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
186 ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
187 ; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
188 ; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
189 ; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
190 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
191 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
192 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
193 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
194 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
195 ; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
196 ; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
197 ; CHECK-VF1IC4: [[SCALAR_PH]]:
198 ; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
199 ; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
200 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
201 ; CHECK-VF1IC4: [[FOR_BODY]]:
202 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
203 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
204 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
205 ; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
206 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
207 ; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32
208 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
209 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
210 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
211 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
212 ; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]:
213 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
214 ; CHECK-VF1IC4-NEXT: br label %[[EXIT]]
215 ; CHECK-VF1IC4: [[EXIT]]:
216 ; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
217 ; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]]
220 %cmp.sgt = icmp sgt i32 %n, 0
221 br i1 %cmp.sgt, label %for.body.preheader, label %exit
223 for.body.preheader: ; preds = %entry
224 %wide.trip.count = zext i32 %n to i64
227 for.body: ; preds = %for.body.preheader, %for.body
228 %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
229 %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
230 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
231 %0 = load i64, ptr %arrayidx, align 8
232 %cmp = icmp sgt i64 %0, 3
233 %1 = trunc i64 %iv to i32
234 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
235 %inc = add nuw nsw i64 %iv, 1
236 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
237 br i1 %exitcond.not, label %exit, label %for.body
239 exit: ; preds = %for.body, %entry
240 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
244 ; Without loop guard, the range analysis is also able to base on the constant
246 define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) {
247 ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
248 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
249 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
250 ; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
251 ; CHECK-VF4IC1: [[VECTOR_PH]]:
252 ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
253 ; CHECK-VF4IC1: [[VECTOR_BODY]]:
254 ; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
255 ; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
256 ; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
257 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
258 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
259 ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
260 ; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
261 ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
262 ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
263 ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
264 ; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
265 ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
266 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
267 ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
268 ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
269 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
270 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
271 ; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
272 ; CHECK-VF4IC1: [[SCALAR_PH]]:
273 ; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
274 ; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
275 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
276 ; CHECK-VF4IC1: [[FOR_BODY]]:
277 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
278 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
279 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
280 ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
281 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
282 ; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
283 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
284 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
285 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
286 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
287 ; CHECK-VF4IC1: [[EXIT]]:
288 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
289 ; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
291 ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
292 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
293 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
294 ; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
295 ; CHECK-VF4IC4: [[VECTOR_PH]]:
296 ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
297 ; CHECK-VF4IC4: [[VECTOR_BODY]]:
298 ; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
299 ; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
300 ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
301 ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
302 ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
303 ; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
304 ; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
305 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
306 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
307 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
308 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
309 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
310 ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
311 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8
312 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12
313 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
314 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
315 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
316 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
317 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
318 ; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
319 ; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
320 ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
321 ; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
322 ; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
323 ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
324 ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
325 ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
326 ; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
327 ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
328 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
329 ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
330 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
331 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
332 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
333 ; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
334 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
335 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331
336 ; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
337 ; CHECK-VF4IC4: [[SCALAR_PH]]:
338 ; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
339 ; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
340 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
341 ; CHECK-VF4IC4: [[FOR_BODY]]:
342 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
343 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
344 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
345 ; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
346 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3
347 ; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32
348 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
349 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
350 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
351 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
352 ; CHECK-VF4IC4: [[EXIT]]:
353 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
354 ; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
356 ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
357 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
358 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
359 ; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
360 ; CHECK-VF1IC4: [[VECTOR_PH]]:
361 ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
362 ; CHECK-VF1IC4: [[VECTOR_BODY]]:
363 ; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
364 ; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
365 ; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
366 ; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
367 ; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
368 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
369 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
370 ; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
371 ; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
372 ; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
373 ; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0
374 ; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1
375 ; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2
376 ; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3
377 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
378 ; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
379 ; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
380 ; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
381 ; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
382 ; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
383 ; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
384 ; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
385 ; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
386 ; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
387 ; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
388 ; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
389 ; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
390 ; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
391 ; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
392 ; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
393 ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
394 ; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
395 ; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
396 ; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
397 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
398 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
399 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
400 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
401 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
402 ; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
403 ; CHECK-VF1IC4: [[SCALAR_PH]]:
404 ; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
405 ; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
406 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
407 ; CHECK-VF1IC4: [[FOR_BODY]]:
408 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
409 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
410 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
411 ; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
412 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
413 ; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32
414 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
415 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
416 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
417 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
418 ; CHECK-VF1IC4: [[EXIT]]:
419 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
420 ; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
425 for.body: ; preds = %entry, %for.body
426 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
427 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
428 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
429 %0 = load i64, ptr %arrayidx, align 8
430 %cmp = icmp sgt i64 %0, 3
431 %1 = trunc i64 %iv to i32
432 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
433 %inc = add nuw nsw i64 %iv, 1
434 %exitcond.not = icmp eq i64 %inc, 20000
435 br i1 %exitcond.not, label %exit, label %for.body
437 exit: ; preds = %for.body
441 ; Without loop guard, the maximum constant trip count that can be vectorized is
442 ; the signed maximum value of reduction type.
443 define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
444 ; CHECK-VF4IC1-LABEL: define i32 @select_fcmp_max_valid_const_ub(
445 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
446 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
447 ; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
448 ; CHECK-VF4IC1: [[VECTOR_PH]]:
449 ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
450 ; CHECK-VF4IC1: [[VECTOR_BODY]]:
451 ; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
452 ; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
453 ; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
454 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
455 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
456 ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
457 ; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
458 ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer
459 ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
460 ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
461 ; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
462 ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
463 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
464 ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
465 ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
466 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
467 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1
468 ; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
469 ; CHECK-VF4IC1: [[SCALAR_PH]]:
470 ; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
471 ; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
472 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
473 ; CHECK-VF4IC1: [[FOR_BODY]]:
474 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
475 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
476 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
477 ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
478 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP7]], 0.000000e+00
479 ; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
480 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
481 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
482 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
483 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
484 ; CHECK-VF4IC1: [[EXIT]]:
485 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
486 ; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
488 ; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
489 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
490 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
491 ; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
492 ; CHECK-VF4IC4: [[VECTOR_PH]]:
493 ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
494 ; CHECK-VF4IC4: [[VECTOR_BODY]]:
495 ; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
496 ; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
497 ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
498 ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
499 ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
500 ; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
501 ; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
502 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
503 ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
504 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
505 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
506 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
507 ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4
508 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8
509 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12
510 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
511 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
512 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
513 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
514 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer
515 ; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
516 ; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
517 ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD6]], zeroinitializer
518 ; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
519 ; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
520 ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
521 ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
522 ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
523 ; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
524 ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
525 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
526 ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
527 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
528 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
529 ; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
530 ; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
531 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
532 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1
533 ; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
534 ; CHECK-VF4IC4: [[SCALAR_PH]]:
535 ; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
536 ; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
537 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
538 ; CHECK-VF4IC4: [[FOR_BODY]]:
539 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
540 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
541 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
542 ; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
543 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00
544 ; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32
545 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
546 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
547 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
548 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
549 ; CHECK-VF4IC4: [[EXIT]]:
550 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
551 ; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
553 ; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
554 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
555 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
556 ; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
557 ; CHECK-VF1IC4: [[VECTOR_PH]]:
558 ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
559 ; CHECK-VF1IC4: [[VECTOR_BODY]]:
560 ; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
561 ; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
562 ; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
563 ; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
564 ; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
565 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
566 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
567 ; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
568 ; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
569 ; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
570 ; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0
571 ; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1
572 ; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2
573 ; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3
574 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
575 ; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
576 ; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
577 ; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
578 ; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP9]], align 4
579 ; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP10]], align 4
580 ; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP11]], align 4
581 ; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP12]], align 4
582 ; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = fcmp fast olt float [[TMP13]], 0.000000e+00
583 ; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = fcmp fast olt float [[TMP14]], 0.000000e+00
584 ; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = fcmp fast olt float [[TMP15]], 0.000000e+00
585 ; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00
586 ; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
587 ; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
588 ; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
589 ; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
590 ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
591 ; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
592 ; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
593 ; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
594 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
595 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
596 ; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
597 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
598 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1
599 ; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
600 ; CHECK-VF1IC4: [[SCALAR_PH]]:
601 ; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
602 ; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
603 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
604 ; CHECK-VF1IC4: [[FOR_BODY]]:
605 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
606 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
607 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
608 ; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX]], align 4
609 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP26]], 0.000000e+00
610 ; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32
611 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
612 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
613 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
614 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
615 ; CHECK-VF1IC4: [[EXIT]]:
616 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
617 ; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
622 for.body: ; preds = %entry, %for.body
623 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
624 %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
625 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
626 %0 = load float, ptr %arrayidx, align 4
627 %cmp = fcmp fast olt float %0, 0.000000e+00
628 %1 = trunc i64 %iv to i32
629 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
630 %inc = add nuw nsw i64 %iv, 1
631 %exitcond.not = icmp eq i64 %inc, 2147483648
632 br i1 %exitcond.not, label %exit, label %for.body
634 exit: ; preds = %for.body
640 ; This test can theoretically be vectorized, but only with a runtime-check.
641 ; The construct that are introduced by IndVarSimplify is:
642 ; %1 = trunc i64 %iv to i32
643 ; However, the loop guard is an i64:
644 ; %cmp.sgt = icmp sgt i64 %n, 0
645 ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
646 ; sentinel value), and need a runtime-check to vectorize this case.
647 define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) {
648 ; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
649 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
650 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
651 ; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
652 ; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
653 ; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
654 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
655 ; CHECK-VF4IC1: [[FOR_BODY]]:
656 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
657 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
658 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
659 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
660 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
661 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
662 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
663 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
664 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
665 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
666 ; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]:
667 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
668 ; CHECK-VF4IC1-NEXT: br label %[[EXIT]]
669 ; CHECK-VF4IC1: [[EXIT]]:
670 ; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
671 ; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]]
673 ; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
674 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
675 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
676 ; CHECK-VF4IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
677 ; CHECK-VF4IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
678 ; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]:
679 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
680 ; CHECK-VF4IC4: [[FOR_BODY]]:
681 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
682 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
683 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
684 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
685 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
686 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
687 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
688 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
689 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
690 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
691 ; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]:
692 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
693 ; CHECK-VF4IC4-NEXT: br label %[[EXIT]]
694 ; CHECK-VF4IC4: [[EXIT]]:
695 ; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
696 ; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]]
698 ; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
699 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
700 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
701 ; CHECK-VF1IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
702 ; CHECK-VF1IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
703 ; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]:
704 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
705 ; CHECK-VF1IC4: [[FOR_BODY]]:
706 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
707 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
708 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
709 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
710 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
711 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
712 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
713 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
714 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
715 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
716 ; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]:
717 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
718 ; CHECK-VF1IC4-NEXT: br label %[[EXIT]]
719 ; CHECK-VF1IC4: [[EXIT]]:
720 ; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
721 ; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]]
724 %cmp.sgt = icmp sgt i64 %n, 0
725 br i1 %cmp.sgt, label %for.body, label %exit
727 for.body: ; preds = %entry, %for.body
728 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
729 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
730 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
731 %0 = load i32, ptr %arrayidx, align 4
732 %cmp = icmp sgt i32 %0, 3
733 %1 = trunc i64 %iv to i32
734 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
735 %inc = add nuw nsw i64 %iv, 1
736 %exitcond.not = icmp eq i64 %inc, %n
737 br i1 %exitcond.not, label %exit, label %for.body
739 exit: ; preds = %for.body, %entry
740 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
744 ; This test can theoretically be vectorized, but only with a runtime-check.
745 ; The construct that are introduced by IndVarSimplify is:
746 ; %1 = trunc i64 %iv to i32
747 ; However, the loop guard is unsigned:
748 ; %cmp.not = icmp eq i32 %n, 0
749 ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
750 ; sentinel value), and need a runtime-check to vectorize this case.
751 define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
752 ; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
753 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
754 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
755 ; CHECK-VF4IC1-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
756 ; CHECK-VF4IC1-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
757 ; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
758 ; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
759 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
760 ; CHECK-VF4IC1: [[FOR_BODY]]:
761 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
762 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
763 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
764 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
765 ; CHECK-VF4IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
766 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
767 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
768 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
769 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
770 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
771 ; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]:
772 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
773 ; CHECK-VF4IC1-NEXT: br label %[[EXIT]]
774 ; CHECK-VF4IC1: [[EXIT]]:
775 ; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
776 ; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]]
778 ; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
779 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
780 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
781 ; CHECK-VF4IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
782 ; CHECK-VF4IC4-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
783 ; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]:
784 ; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
785 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
786 ; CHECK-VF4IC4: [[FOR_BODY]]:
787 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
788 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
789 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
790 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
791 ; CHECK-VF4IC4-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
792 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
793 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
794 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
795 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
796 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
797 ; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]:
798 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
799 ; CHECK-VF4IC4-NEXT: br label %[[EXIT]]
800 ; CHECK-VF4IC4: [[EXIT]]:
801 ; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
802 ; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]]
804 ; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
805 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
806 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
807 ; CHECK-VF1IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
808 ; CHECK-VF1IC4-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
809 ; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]:
810 ; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
811 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
812 ; CHECK-VF1IC4: [[FOR_BODY]]:
813 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
814 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
815 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
816 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
817 ; CHECK-VF1IC4-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
818 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
819 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
820 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
821 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
822 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
823 ; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]:
824 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
825 ; CHECK-VF1IC4-NEXT: br label %[[EXIT]]
826 ; CHECK-VF1IC4: [[EXIT]]:
827 ; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
828 ; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]]
831 %cmp.not = icmp eq i32 %n, 0
832 br i1 %cmp.not, label %exit, label %for.body.preheader
834 for.body.preheader: ; preds = %entry
835 %wide.trip.count = zext i32 %n to i64
838 for.body: ; preds = %for.body.preheader, %for.body
839 %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
840 %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
841 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
842 %0 = load i32, ptr %arrayidx, align 4
843 %cmp1 = icmp sgt i32 %0, 3
844 %1 = trunc i64 %iv to i32
845 %spec.select = select i1 %cmp1, i32 %1, i32 %rdx
846 %inc = add nuw nsw i64 %iv, 1
847 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
848 br i1 %exitcond.not, label %exit, label %for.body
850 exit: ; preds = %for.body, %entry
851 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
855 ; This test cannot be vectorized, even with a runtime check.
856 ; The construct that are introduced by IndVarSimplify is:
857 ; %1 = trunc i64 %iv to i32
858 ; However, the loop exit condition is a constant that overflows i32:
859 ; %exitcond.not = icmp eq i64 %inc, 4294967294
860 ; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
861 ; cannot vectorize this case.
862 define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
863 ; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
864 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
865 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
866 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
867 ; CHECK-VF4IC1: [[FOR_BODY]]:
868 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
869 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
870 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
871 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
872 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
873 ; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32
874 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
875 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
876 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
877 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
878 ; CHECK-VF4IC1: [[EXIT]]:
879 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
880 ; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
882 ; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
883 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
884 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
885 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
886 ; CHECK-VF4IC4: [[FOR_BODY]]:
887 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
888 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
889 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
890 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
891 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
892 ; CHECK-VF4IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32
893 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
894 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
895 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
896 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
897 ; CHECK-VF4IC4: [[EXIT]]:
898 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
899 ; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
901 ; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
902 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
903 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
904 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
905 ; CHECK-VF1IC4: [[FOR_BODY]]:
906 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
907 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
908 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
909 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
910 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
911 ; CHECK-VF1IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32
912 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
913 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
914 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
915 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
916 ; CHECK-VF1IC4: [[EXIT]]:
917 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
918 ; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
923 for.body: ; preds = %entry, %for.body
924 %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ]
925 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
926 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
927 %0 = load i32, ptr %arrayidx, align 4
928 %cmp = icmp sgt i32 %0, 3
929 %conv = trunc i64 %iv to i32
930 %spec.select = select i1 %cmp, i32 %conv, i32 %rdx
931 %inc = add nuw nsw i64 %iv, 1
932 %exitcond.not = icmp eq i64 %inc, 4294967294
933 br i1 %exitcond.not, label %exit, label %for.body
935 exit: ; preds = %for.body
939 ; Forbidding vectorization of the FindLastIV pattern involving a truncated
940 ; induction variable in the absence of any loop guard.
941 define i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) {
942 ; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
943 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
944 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
945 ; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
946 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
947 ; CHECK-VF4IC1: [[FOR_BODY]]:
948 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
949 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
950 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
951 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
952 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
953 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
954 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
955 ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32
956 ; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
957 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
958 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
959 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
960 ; CHECK-VF4IC1: [[EXIT]]:
961 ; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
962 ; CHECK-VF4IC1-NEXT: ret i32 [[COND_LCSSA]]
964 ; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
965 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
966 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
967 ; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
968 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
969 ; CHECK-VF4IC4: [[FOR_BODY]]:
970 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
971 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
972 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
973 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
974 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
975 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
976 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
977 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32
978 ; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
979 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
980 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
981 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
982 ; CHECK-VF4IC4: [[EXIT]]:
983 ; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
984 ; CHECK-VF4IC4-NEXT: ret i32 [[COND_LCSSA]]
986 ; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
987 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
988 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
989 ; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
990 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
991 ; CHECK-VF1IC4: [[FOR_BODY]]:
992 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
993 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
994 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
995 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
996 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
997 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
998 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
999 ; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32
1000 ; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
1001 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1002 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1003 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1004 ; CHECK-VF1IC4: [[EXIT]]:
1005 ; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
1006 ; CHECK-VF1IC4-NEXT: ret i32 [[COND_LCSSA]]
1009 %wide.trip.count = zext i32 %n to i64
1012 for.body: ; preds = %entry, %for.body
1013 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
1014 %rdx = phi i32 [ %start, %entry ], [ %cond, %for.body ]
1015 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1016 %0 = load i32, ptr %arrayidx, align 4
1017 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
1018 %1 = load i32, ptr %arrayidx2, align 4
1019 %cmp = icmp sgt i32 %0, %1
1020 %2 = trunc i64 %iv to i32
1021 %cond = select i1 %cmp, i32 %2, i32 %rdx
1022 %inc = add nuw nsw i64 %iv, 1
1023 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
1024 br i1 %exitcond.not, label %exit, label %for.body
1026 exit: ; preds = %for.body
1030 ; Without loop guard, when the constant trip count exceeds the maximum signed
1031 ; value of the reduction type, truncation may cause overflow. Therefore,
1032 ; vectorizer is unable to guarantee that the induction variable is monotonic
1034 define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
1035 ; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1036 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
1037 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
1038 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
1039 ; CHECK-VF4IC1: [[FOR_BODY]]:
1040 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1041 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1042 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1043 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1044 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1045 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1046 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1047 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1048 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1049 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1050 ; CHECK-VF4IC1: [[EXIT]]:
1051 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1052 ; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
1054 ; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1055 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
1056 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
1057 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
1058 ; CHECK-VF4IC4: [[FOR_BODY]]:
1059 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1060 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1061 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1062 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1063 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1064 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1065 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1066 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1067 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1068 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1069 ; CHECK-VF4IC4: [[EXIT]]:
1070 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1071 ; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
1073 ; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1074 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
1075 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
1076 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
1077 ; CHECK-VF1IC4: [[FOR_BODY]]:
1078 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1079 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1080 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1081 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1082 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1083 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1084 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1085 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1086 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1087 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1088 ; CHECK-VF1IC4: [[EXIT]]:
1089 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1090 ; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
1095 for.body: ; preds = %entry, %for.body
1096 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
1097 %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
1098 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1099 %0 = load float, ptr %arrayidx, align 4
1100 %cmp = fcmp fast olt float %0, 0.000000e+00
1101 %1 = trunc i64 %iv to i32
1102 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
1103 %inc = add nuw nsw i64 %iv, 1
1104 %exitcond.not = icmp eq i64 %inc, 2147483649
1105 br i1 %exitcond.not, label %exit, label %for.body
1107 exit: ; preds = %for.body
1108 ret i32 %spec.select
1111 ; Even with loop guard protection, if the destination type of the truncation
1112 ; instruction is smaller than the trip count type before extension, overflow
1113 ; could still occur.
1114 define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(ptr %a, ptr %b, i16 %start, i32 %n) {
1115 ; CHECK-VF4IC1-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1116 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1117 ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
1118 ; CHECK-VF4IC1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1119 ; CHECK-VF4IC1-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1120 ; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
1121 ; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1122 ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
1123 ; CHECK-VF4IC1: [[FOR_BODY]]:
1124 ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1125 ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1126 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1127 ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1128 ; CHECK-VF4IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1129 ; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1130 ; CHECK-VF4IC1-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1131 ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1132 ; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1133 ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1134 ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1135 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1136 ; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]:
1137 ; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1138 ; CHECK-VF4IC1-NEXT: br label %[[EXIT]]
1139 ; CHECK-VF4IC1: [[EXIT]]:
1140 ; CHECK-VF4IC1-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1141 ; CHECK-VF4IC1-NEXT: ret i16 [[RDX_0_LCSSA]]
1143 ; CHECK-VF4IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1144 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1145 ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
1146 ; CHECK-VF4IC4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1147 ; CHECK-VF4IC4-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1148 ; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]:
1149 ; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1150 ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
1151 ; CHECK-VF4IC4: [[FOR_BODY]]:
1152 ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1153 ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1154 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1155 ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1156 ; CHECK-VF4IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1157 ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1158 ; CHECK-VF4IC4-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1159 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1160 ; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1161 ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1162 ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1163 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1164 ; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]:
1165 ; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1166 ; CHECK-VF4IC4-NEXT: br label %[[EXIT]]
1167 ; CHECK-VF4IC4: [[EXIT]]:
1168 ; CHECK-VF4IC4-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1169 ; CHECK-VF4IC4-NEXT: ret i16 [[RDX_0_LCSSA]]
1171 ; CHECK-VF1IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1172 ; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1173 ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
1174 ; CHECK-VF1IC4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1175 ; CHECK-VF1IC4-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1176 ; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]:
1177 ; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1178 ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
1179 ; CHECK-VF1IC4: [[FOR_BODY]]:
1180 ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1181 ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1182 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1183 ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1184 ; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1185 ; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1186 ; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1187 ; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1188 ; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1189 ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
1190 ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1191 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1192 ; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]:
1193 ; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1194 ; CHECK-VF1IC4-NEXT: br label %[[EXIT]]
1195 ; CHECK-VF1IC4: [[EXIT]]:
1196 ; CHECK-VF1IC4-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1197 ; CHECK-VF1IC4-NEXT: ret i16 [[RDX_0_LCSSA]]
1200 %cmp9 = icmp sgt i32 %n, 0
1201 br i1 %cmp9, label %for.body.preheader, label %exit
1203 for.body.preheader: ; preds = %entry
1204 %wide.trip.count = zext i32 %n to i64
1207 for.body: ; preds = %for.body.preheader, %for.body
1208 %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
1209 %rdx = phi i16 [ %start, %for.body.preheader ], [ %cond, %for.body ]
1210 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1211 %0 = load i32, ptr %arrayidx, align 4
1212 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
1213 %1 = load i32, ptr %arrayidx2, align 4
1214 %cmp3 = icmp sgt i32 %0, %1
1215 %2 = trunc i64 %iv to i16
1216 %cond = select i1 %cmp3, i16 %2, i16 %rdx
1217 %inc = add nuw nsw i64 %iv, 1
1218 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
1219 br i1 %exitcond.not, label %exit, label %for.body
1221 exit: ; preds = %for.body, %entry
1222 %rdx.0.lcssa = phi i16 [ %start, %entry ], [ %cond, %for.body ]
1223 ret i16 %rdx.0.lcssa
1226 ; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1227 ; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1228 ; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1229 ; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1230 ; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1231 ; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1232 ; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1233 ; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1235 ; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1236 ; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1237 ; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1238 ; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1239 ; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1240 ; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1241 ; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1242 ; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1244 ; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1245 ; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1246 ; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1247 ; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
1248 ; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1249 ; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
1250 ; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1251 ; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}