1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
4 define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
5 ; CHECK-LABEL: @reduction_and_trunc(
7 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
11 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
13 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
14 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
15 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
16 ; CHECK-NEXT: [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
17 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
18 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
19 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
20 ; CHECK: middle.block:
21 ; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]])
22 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
24 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
26 ; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
28 ; CHECK-NEXT: [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
29 ; CHECK-NEXT: ret i8 [[AND_LCSSA_OFF0]]
35 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
36 %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
37 %sum.02 = and i32 %sum.02p, 255
38 %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
39 %load = load i8, ptr %gep
40 %ext = zext i8 %load to i32
41 %and = and i32 %sum.02, %ext
42 %iv.next = add i32 %iv, 1
43 %exitcond = icmp eq i32 %iv.next, 256
44 br i1 %exitcond, label %for.end, label %for.body
47 %ret = trunc i32 %and to i8
51 define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
52 ; CHECK-LABEL: @reduction_or_trunc(
54 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
56 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
58 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
59 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
60 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
61 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
62 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
63 ; CHECK-NEXT: [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
65 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
66 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
67 ; CHECK: middle.block:
68 ; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]])
69 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
71 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
73 ; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
75 ; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
76 ; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]]
82 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
83 %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
84 %sum.02 = and i32 %sum.02p, 65535
85 %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
86 %load = load i16, ptr %gep
87 %ext = zext i16 %load to i32
88 %xor = or i32 %sum.02, %ext
89 %iv.next = add i32 %iv, 1
90 %exitcond = icmp eq i32 %iv.next, 256
91 br i1 %exitcond, label %for.end, label %for.body
94 %ret = trunc i32 %xor to i16
98 define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
99 ; CHECK-LABEL: @reduction_xor_trunc(
101 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
103 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
104 ; CHECK: vector.body:
105 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
106 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
107 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
108 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
109 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
110 ; CHECK-NEXT: [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
111 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
112 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
113 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
114 ; CHECK: middle.block:
115 ; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]])
116 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
118 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
120 ; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
122 ; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
123 ; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]]
129 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
130 %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
131 %sum.02 = and i32 %sum.02p, 65535
132 %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
133 %load = load i16, ptr %gep
134 %ext = zext i16 %load to i32
135 %xor = xor i32 %sum.02, %ext
136 %iv.next = add i32 %iv, 1
137 %exitcond = icmp eq i32 %iv.next, 256
138 br i1 %exitcond, label %for.end, label %for.body
141 %ret = trunc i32 %xor to i16
145 define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
146 ; CHECK-LABEL: @reduction_smin_trunc(
147 ; CHECK-NOT: vector.body
154 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
155 %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
156 %sum.02 = and i32 %sum.02p, 255
157 %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
158 %load = load i8, ptr %gep
159 %ext = sext i8 %load to i32
160 %icmp = icmp slt i32 %sum.02, %ext
161 %min = select i1 %icmp, i32 %sum.02, i32 %ext
162 %iv.next = add i32 %iv, 1
163 %exitcond = icmp eq i32 %iv.next, 256
164 br i1 %exitcond, label %for.end, label %for.body
167 %ret = trunc i32 %min to i8
171 define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
172 ; CHECK-LABEL: @reduction_umin_trunc(
173 ; CHECK-NOT: vector.body
180 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
181 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
182 %sum.02 = and i32 %sum.02p, 255
183 %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
184 %load = load i8, ptr %gep
185 %ext = zext i8 %load to i32
186 %icmp = icmp ult i32 %sum.02, %ext
187 %min = select i1 %icmp, i32 %sum.02, i32 %ext
188 %iv.next = add i32 %iv, 1
189 %exitcond = icmp eq i32 %iv.next, 256
190 br i1 %exitcond, label %for.end, label %for.body
193 %ret = trunc i32 %min to i8
197 define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
198 ; CHECK-LABEL: @reduction_smax_trunc(
199 ; CHECK-NOT: vector.body
206 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
207 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
208 %sum.02 = and i32 %sum.02p, 65535
209 %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
210 %load = load i16, ptr %gep
211 %ext = sext i16 %load to i32
212 %icmp = icmp sgt i32 %sum.02, %ext
213 %min = select i1 %icmp, i32 %sum.02, i32 %ext
214 %iv.next = add i32 %iv, 1
215 %exitcond = icmp eq i32 %iv.next, 256
216 br i1 %exitcond, label %for.end, label %for.body
219 %ret = trunc i32 %min to i16
223 define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
224 ; CHECK-LABEL: @reduction_umax_trunc(
225 ; CHECK-NOT: vector.body
232 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
233 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
234 %sum.02 = and i32 %sum.02p, 65535
235 %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
236 %load = load i16, ptr %gep
237 %ext = zext i16 %load to i32
238 %icmp = icmp ugt i32 %sum.02, %ext
239 %min = select i1 %icmp, i32 %sum.02, i32 %ext
240 %iv.next = add i32 %iv, 1
241 %exitcond = icmp eq i32 %iv.next, 256
242 br i1 %exitcond, label %for.end, label %for.body
245 %ret = trunc i32 %min to i16
249 ; Test case for https://github.com/llvm/llvm-project/issues/81415.
250 define i32 @reduction_and_or(i16 %a, i32 %b, ptr %src) {
251 ; CHECK-LABEL: @reduction_and_or(
253 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
255 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
256 ; CHECK: vector.body:
257 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
258 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
259 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
260 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
261 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
262 ; CHECK-NEXT: [[TMP2]] = or <8 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
263 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
264 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
265 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
266 ; CHECK: middle.block:
267 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
268 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
270 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ]
271 ; CHECK-NEXT: br label [[LOOP:%.*]]
273 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
274 ; CHECK-NEXT: [[OR67:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
275 ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[IV]] to i64
276 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
277 ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
278 ; CHECK-NEXT: [[OR]] = or i32 [[OR67]], [[L]]
279 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
280 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 999
281 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
283 ; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ]
284 ; CHECK-NEXT: ret i32 [[OR_LCSSA]]
287 %ext1 = zext i16 %a to i32
291 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
292 %or67 = phi i32 [ 10, %entry ], [ %or, %loop ]
293 %t = trunc i32 %b to i16
294 %ext = sext i16 %t to i32
295 %cmp = icmp sgt i32 %ext, %ext1
296 %ext2 = zext i1 %cmp to i32
297 %cmp3 = icmp sge i32 %iv, %ext2
298 %ext4 = zext i1 %cmp3 to i32
299 %div = sdiv i32 %ext4, %b
300 %and = and i32 %div, 0
301 %gep = getelementptr inbounds i32, ptr %src, i32 %iv
302 %l = load i32, ptr %gep
303 %add = add i32 %and, %l
304 %or = or i32 %or67, %add
305 %iv.next = add nsw i32 %iv, 1
306 %tobool.not = icmp eq i32 %iv.next, 999
307 br i1 %tobool.not, label %exit, label %loop
310 %or.lcssa = phi i32 [ %or, %loop ]