1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
7 ;; Based on the following C code:
9 ;; void simple_histogram(int *buckets, unsigned *indices, int N) {
10 ;; for (int i = 0; i < N; ++i)
11 ;; buckets[indices[i]]++;
14 ;; Confirm finding a histogram operation
15 ; CHECK-LABEL: Checking a loop in 'simple_histogram'
16 ; CHECK: LV: Checking for a histogram on: store i32 %inc, ptr %gep.bucket, align 4
17 ; CHECK: LV: Found histogram for: store i32 %inc, ptr %gep.bucket, align 4
19 ;; Confirm cost calculation for runtime checks
20 ; CHECK-LABEL: LV: Checking a loop in 'simple_histogram_rtdepcheck'
21 ; CHECK: Calculating cost of runtime checks:
22 ; CHECK: Total cost of runtime checks:
23 ; CHECK: LV: Minimum required TC for runtime checks to be profitable:
25 ;; Confirm inability to vectorize with potential alias to buckets
26 ; CHECK-LABEL: LV: Checking a loop in 'simple_histogram_unsafe_alias'
27 ; CHECK: LV: Can't vectorize due to memory conflicts
28 ; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
30 define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
31 ; CHECK-LABEL: define void @simple_histogram(
32 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
34 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
35 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
36 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
37 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
39 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
40 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
41 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
42 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
43 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
44 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
46 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
47 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
49 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
50 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
51 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> splat (i1 true))
52 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
53 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55 ; CHECK: middle.block:
56 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
57 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
59 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
60 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
62 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
63 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
64 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
65 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
66 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
67 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
68 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
69 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
70 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
71 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
72 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
74 ; CHECK-NEXT: ret void
80 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
81 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
82 %l.idx = load i32, ptr %gep.indices, align 4
83 %idxprom1 = zext i32 %l.idx to i64
84 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
85 %l.bucket = load i32, ptr %gep.bucket, align 4
86 %inc = add nsw i32 %l.bucket, 1
87 store i32 %inc, ptr %gep.bucket, align 4
88 %iv.next = add nuw nsw i64 %iv, 1
89 %exitcond = icmp eq i64 %iv.next, %N
90 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
96 define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indices, i64 %N, i32 %incval) #0 {
97 ; CHECK-LABEL: define void @simple_histogram_inc_param(
98 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]], i32 [[INCVAL:%.*]]) #[[ATTR0]] {
100 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
101 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
102 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
103 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
105 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
106 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
107 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
108 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
109 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
110 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
111 ; CHECK: vector.body:
112 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
113 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
114 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
115 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
116 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
117 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 [[INCVAL]], <vscale x 4 x i1> splat (i1 true))
118 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
119 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
120 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
121 ; CHECK: middle.block:
122 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
123 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
125 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
126 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
128 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
129 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
130 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
131 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
132 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
133 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
134 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], [[INCVAL]]
135 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
136 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
137 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
138 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
140 ; CHECK-NEXT: ret void
146 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
147 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
148 %l.idx = load i32, ptr %gep.indices, align 4
149 %idxprom1 = zext i32 %l.idx to i64
150 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
151 %l.bucket = load i32, ptr %gep.bucket, align 4
152 %inc = add nsw i32 %l.bucket, %incval
153 store i32 %inc, ptr %gep.bucket, align 4
154 %iv.next = add nuw nsw i64 %iv, 1
155 %exitcond = icmp eq i64 %iv.next, %N
156 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
162 define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
163 ; CHECK-LABEL: define void @simple_histogram_sub(
164 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
166 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
167 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
168 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
169 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
171 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
172 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
173 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
174 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
175 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
176 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
177 ; CHECK: vector.body:
178 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
179 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
180 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
181 ; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
182 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
183 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 -1, <vscale x 4 x i1> splat (i1 true))
184 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
185 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
186 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
187 ; CHECK: middle.block:
188 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
189 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
191 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
192 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
194 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
195 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
196 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
197 ; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP12]] to i64
198 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
199 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
200 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], -1
201 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
202 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
203 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
204 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
206 ; CHECK-NEXT: ret void
212 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
213 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
214 %l.idx = load i32, ptr %gep.indices, align 4
215 %idxprom1 = sext i32 %l.idx to i64
216 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
217 %l.bucket = load i32, ptr %gep.bucket, align 4
218 %inc = sub nsw i32 %l.bucket, 1
219 store i32 %inc, ptr %gep.bucket, align 4
220 %iv.next = add nuw nsw i64 %iv, 1
221 %exitcond = icmp eq i64 %iv.next, %N
222 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
228 define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ptr readonly %conds, i64 %N) #0 {
229 ; CHECK-LABEL: define void @conditional_histogram(
230 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[CONDS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
232 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
233 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP6]], 2
234 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP3]]
235 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
237 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
238 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
239 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
240 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
241 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
242 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
243 ; CHECK: vector.body:
244 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
245 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
246 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
247 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
248 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
249 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[INDEX]]
250 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4
251 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 5100)
252 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> [[TMP13]])
253 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
254 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
255 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
256 ; CHECK: middle.block:
257 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
258 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
260 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
261 ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
263 ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[NEXT:%.*]] ]
264 ; CHECK-NEXT: [[CONDIDX:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[IV1]]
265 ; CHECK-NEXT: [[CONDDATA:%.*]] = load i32, ptr [[CONDIDX]], align 4
266 ; CHECK-NEXT: [[IFCOND:%.*]] = icmp sgt i32 [[CONDDATA]], 5100
267 ; CHECK-NEXT: br i1 [[IFCOND]], label [[IFTRUE:%.*]], label [[NEXT]]
269 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
270 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
271 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP1]] to i64
272 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
273 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
274 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1
275 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX3]], align 4
276 ; CHECK-NEXT: br label [[NEXT]]
278 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
279 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
280 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
282 ; CHECK-NEXT: ret void
288 %iv = phi i64 [ 0, %entry ], [ %iv.next, %next ]
289 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
290 %l.idx = load i32, ptr %gep.indices, align 4
291 %idxprom1 = zext i32 %l.idx to i64
292 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
293 %condidx = getelementptr inbounds i32, ptr %conds, i64 %iv
294 %conddata = load i32, ptr %condidx, align 4
295 %ifcond = icmp sgt i32 %conddata, 5100
296 br i1 %ifcond, label %iftrue, label %next
299 %l.bucket = load i32, ptr %gep.bucket, align 4
300 %inc = add nsw i32 %l.bucket, 1
301 store i32 %inc, ptr %gep.bucket, align 4
305 %iv.next = add nuw nsw i64 %iv, 1
306 %exitcond = icmp eq i64 %iv.next, %N
307 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
313 define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
314 ; CHECK-LABEL: define void @histogram_8bit(
315 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
317 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
318 ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 2
319 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]]
320 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
322 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
323 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
324 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
325 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
326 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
327 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
328 ; CHECK: vector.body:
329 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
330 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
331 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[ARRAYIDX]], align 4
332 ; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
333 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP6]]
334 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8(<vscale x 4 x ptr> [[TMP7]], i8 1, <vscale x 4 x i1> splat (i1 true))
335 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
336 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
337 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
338 ; CHECK: middle.block:
339 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
340 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
342 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
343 ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
345 ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
346 ; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
347 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
348 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
349 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
350 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4
351 ; CHECK-NEXT: [[INC:%.*]] = add nsw i8 [[TMP1]], 1
352 ; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4
353 ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
354 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
355 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]]
357 ; CHECK-NEXT: ret void
363 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
364 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
365 %l.idx = load i32, ptr %gep.indices, align 4
366 %idxprom1 = zext i32 %l.idx to i64
367 %gep.bucket = getelementptr inbounds i8, ptr %buckets, i64 %idxprom1
368 %l.bucket = load i8, ptr %gep.bucket, align 4
369 %inc = add nsw i8 %l.bucket, 1
370 store i8 %inc, ptr %gep.bucket, align 4
371 %iv.next = add nuw nsw i64 %iv, 1
372 %exitcond = icmp eq i64 %iv.next, %N
373 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
379 ;; We don't currently support floating point histograms.
380 define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
381 ; CHECK-LABEL: define void @histogram_float(
382 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
384 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
386 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
387 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
388 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
389 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
390 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[BUCKETS]], i64 [[IDXPROM1]]
391 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
392 ; CHECK-NEXT: [[INC:%.*]] = fadd fast float [[TMP1]], 1.000000e+00
393 ; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4
394 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
395 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
396 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
398 ; CHECK-NEXT: ret void
404 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
405 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
406 %l.idx = load i32, ptr %gep.indices, align 4
407 %idxprom1 = zext i32 %l.idx to i64
408 %gep.bucket = getelementptr inbounds float, ptr %buckets, i64 %idxprom1
409 %l.bucket = load float, ptr %gep.bucket, align 4
410 %inc = fadd fast float %l.bucket, 1.0
411 store float %inc, ptr %gep.bucket, align 4
412 %iv.next = add nuw nsw i64 %iv, 1
413 %exitcond = icmp eq i64 %iv.next, %N
414 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
420 ;; We don't support histograms with a update value that's not loop-invariant.
421 define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %indices, ptr readonly %incvals, i64 %N) #0 {
422 ; CHECK-LABEL: define void @histogram_varying_increment(
423 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[INCVALS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
425 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
427 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
428 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
429 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
430 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
431 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
432 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
433 ; CHECK-NEXT: [[INCIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INCVALS]], i64 [[IV]]
434 ; CHECK-NEXT: [[INCVAL:%.*]] = load i32, ptr [[INCIDX]], align 4
435 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], [[INCVAL]]
436 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
437 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
438 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
439 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12]]
441 ; CHECK-NEXT: ret void
447 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
448 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
449 %l.idx = load i32, ptr %gep.indices, align 4
450 %idxprom1 = zext i32 %l.idx to i64
451 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
452 %l.bucket = load i32, ptr %gep.bucket, align 4
453 %gep.incvals = getelementptr inbounds i32, ptr %incvals, i64 %iv
454 %l.incval = load i32, ptr %gep.incvals, align 4
455 %inc = add nsw i32 %l.bucket, %l.incval
456 store i32 %inc, ptr %gep.bucket, align 4
457 %iv.next = add nuw nsw i64 %iv, 1
458 %exitcond = icmp eq i64 %iv.next, %N
459 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
465 ;; Test that interleaving works when vectorizing.
466 define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
467 ; CHECK-LABEL: define void @simple_histogram_user_interleave(
468 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
470 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
471 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
472 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
473 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
475 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
476 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
477 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
478 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
479 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
480 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
481 ; CHECK: vector.body:
482 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
483 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
484 ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
485 ; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP15]], 4
486 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[DOTIDX]]
487 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
488 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP17]], align 4
489 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
490 ; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
491 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
492 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP19]]
493 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> splat (i1 true))
494 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP21]], i32 1, <vscale x 4 x i1> splat (i1 true))
495 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
496 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
497 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
498 ; CHECK: middle.block:
499 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
500 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
502 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
503 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
505 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
506 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
507 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
508 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
509 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
510 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
511 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
512 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
513 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
514 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
515 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
517 ; CHECK-NEXT: ret void
523 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
524 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
525 %l.idx = load i32, ptr %gep.indices, align 4
526 %idxprom1 = zext i32 %l.idx to i64
527 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
528 %l.bucket = load i32, ptr %gep.bucket, align 4
529 %inc = add nsw i32 %l.bucket, 1
530 store i32 %inc, ptr %gep.bucket, align 4
531 %iv.next = add nuw nsw i64 %iv, 1
532 %exitcond = icmp eq i64 %iv.next, %N
533 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !0
539 ;; Test that we can handle more than one GEP index.
540 @idx_array = dso_local local_unnamed_addr global [1048576 x i32] zeroinitializer, align 4
541 @data_array = dso_local local_unnamed_addr global [1048576 x i32] zeroinitializer, align 4
543 define void @histogram_array_3op_gep(i64 noundef %N) #0 {
544 ; CHECK-LABEL: define void @histogram_array_3op_gep(
545 ; CHECK-SAME: i64 noundef [[N:%.*]]) #[[ATTR0]] {
547 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
548 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
549 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
550 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
552 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
553 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
554 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
555 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
556 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
557 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
558 ; CHECK: vector.body:
559 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
560 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [1048576 x i32], ptr @idx_array, i64 0, i64 [[INDEX]]
561 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
562 ; CHECK-NEXT: [[TMP14:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
563 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, <vscale x 4 x i64> [[TMP14]]
564 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP11]], i32 1, <vscale x 4 x i1> splat (i1 true))
565 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
566 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
567 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
568 ; CHECK: middle.block:
569 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
570 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
572 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
573 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
575 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
576 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1048576 x i32], ptr @idx_array, i64 0, i64 [[IV]]
577 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
578 ; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64
579 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [1048576 x i32], ptr @data_array, i64 0, i64 [[IDXPROM5]]
580 ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
581 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1
582 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4
583 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
584 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
585 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
587 ; CHECK-NEXT: ret void
593 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
594 %gep.indices = getelementptr inbounds [1048576 x i32], ptr @idx_array, i64 0, i64 %iv
595 %l.idx = load i32, ptr %gep.indices, align 4
596 %idxprom5 = sext i32 %l.idx to i64
597 %gep.bucket = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, i64 %idxprom5
598 %l.bucket = load i32, ptr %gep.bucket, align 4
599 %inc = add nsw i32 %l.bucket, 1
600 store i32 %inc, ptr %gep.bucket, align 4
601 %iv.next = add nuw nsw i64 %iv, 1
602 %exitcond = icmp eq i64 %iv.next, %N
603 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
609 ;; Add a struct into the mix, use a different constant index.
610 ;; { unused, buckets }
611 %somestruct = type { [1048576 x i32], [1048576 x i32] }
613 define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr readonly %indices, ptr noalias %data.struct) #0 {
614 ; CHECK-LABEL: define void @histogram_array_4op_gep_nonzero_const_idx(
615 ; CHECK-SAME: i64 noundef [[N:%.*]], ptr readonly [[INDICES:%.*]], ptr noalias [[DATA_STRUCT:%.*]]) #[[ATTR0]] {
617 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
619 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
620 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
622 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
623 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
624 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
625 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
626 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
627 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
628 ; CHECK: vector.body:
629 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
630 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
631 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
632 ; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
633 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[SOMESTRUCT:%.*]], ptr [[DATA_STRUCT]], i64 1, i32 0, <vscale x 4 x i64> [[TMP6]]
634 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> splat (i1 true))
635 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
636 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
637 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
638 ; CHECK: middle.block:
639 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
640 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
642 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
643 ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
645 ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
646 ; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
647 ; CHECK-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
648 ; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[L_IDX]] to i64
649 ; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds [[SOMESTRUCT]], ptr [[DATA_STRUCT]], i64 1, i32 0, i64 [[IDXPROM5]]
650 ; CHECK-NEXT: [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4
651 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1
652 ; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4
653 ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
654 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
655 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]]
657 ; CHECK-NEXT: ret void
663 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
664 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
665 %l.idx = load i32, ptr %gep.indices, align 4
666 %idxprom5 = sext i32 %l.idx to i64
667 %gep.bucket = getelementptr inbounds %somestruct, ptr %data.struct, i32 1, i32 0, i64 %idxprom5
668 %l.bucket = load i32, ptr %gep.bucket, align 4
669 %inc = add nsw i32 %l.bucket, 1
670 store i32 %inc, ptr %gep.bucket, align 4
671 %iv.next = add nuw nsw i64 %iv, 1
672 %exitcond = icmp eq i64 %iv.next, %N
673 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
679 ;; Make sure the histogram intrinsic uses the active lane mask when tail folding.
680 define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
681 ; CHECK-LABEL: define void @simple_histogram_tailfold(
682 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
684 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
686 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
687 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP2]], 2
688 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
689 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
690 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP5]])
691 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
692 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
693 ; CHECK: vector.body:
694 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
695 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
696 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
697 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
698 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
699 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
700 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
701 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
702 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]])
703 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
704 ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]]
705 ; CHECK: middle.block:
706 ; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
708 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
710 ; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
712 ; CHECK-NEXT: ret void
718 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
719 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
720 %l.idx = load i32, ptr %gep.indices, align 4
721 %idxprom1 = zext i32 %l.idx to i64
722 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
723 %l.bucket = load i32, ptr %gep.bucket, align 4
724 %inc = add nsw i32 %l.bucket, 1
725 store i32 %inc, ptr %gep.bucket, align 4
726 %iv.next = add nuw nsw i64 %iv, 1
727 %exitcond = icmp eq i64 %iv.next, %N
728 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !2
734 ;; Check that we can still vectorize a histogram when LAA found another dependency
735 ;; that doesn't conflict with the buckets.
736 define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %indices, i64 %N) #0 {
737 ; CHECK-LABEL: define void @simple_histogram_rtdepcheck(
738 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr [[ARRAY:%.*]], ptr [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
740 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
741 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
742 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
743 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
744 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
745 ; CHECK: vector.memcheck:
746 ; CHECK-NEXT: [[ARRAY1:%.*]] = ptrtoint ptr [[ARRAY]] to i64
747 ; CHECK-NEXT: [[INDICES2:%.*]] = ptrtoint ptr [[INDICES]] to i64
748 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
749 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4
750 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[ARRAY1]], [[INDICES2]]
751 ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]]
752 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
754 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
755 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP6]], -4
756 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
757 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
758 ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
759 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
760 ; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP8]] to i32
761 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
762 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
763 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
764 ; CHECK: vector.body:
765 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
766 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
767 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
768 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4
769 ; CHECK-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
770 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP13]]
771 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP14]], i32 1, <vscale x 4 x i1> splat (i1 true))
772 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
773 ; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP15]], align 4
774 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
775 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
776 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
777 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
778 ; CHECK: middle.block:
779 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
780 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
782 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
783 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
785 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
786 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
787 ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
788 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP17]] to i64
789 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
790 ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
791 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1
792 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
793 ; CHECK-NEXT: [[IDX_ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV]]
794 ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
795 ; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4
796 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
797 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
798 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
800 ; CHECK-NEXT: ret void
806 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
807 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
808 %l.idx = load i32, ptr %gep.indices, align 4
809 %idxprom1 = zext i32 %l.idx to i64
810 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
811 %l.bucket = load i32, ptr %gep.bucket, align 4
812 %inc = add nsw i32 %l.bucket, 1
813 store i32 %inc, ptr %gep.bucket, align 4
814 %idx.addr = getelementptr inbounds i32, ptr %array, i64 %iv
815 %iv.trunc = trunc i64 %iv to i32
816 store i32 %iv.trunc, ptr %idx.addr, align 4
817 %iv.next = add nuw nsw i64 %iv, 1
818 %exitcond = icmp eq i64 %iv.next, %N
819 br i1 %exitcond, label %for.exit, label %for.body
825 ;; Make sure we don't vectorize if there's a potential alias between buckets
827 define void @simple_histogram_unsafe_alias(ptr %buckets, ptr %indices, i64 %N) #0 {
828 ; CHECK-LABEL: define void @simple_histogram_unsafe_alias(
829 ; CHECK-SAME: ptr [[BUCKETS:%.*]], ptr [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
831 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
833 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
834 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
835 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
836 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
837 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
838 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
839 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
840 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
841 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
842 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
843 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
845 ; CHECK-NEXT: ret void
851 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
852 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
853 %l.idx = load i32, ptr %gep.indices, align 4
854 %idxprom1 = zext i32 %l.idx to i64
855 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
856 %l.bucket = load i32, ptr %gep.bucket, align 4
857 %inc = add nsw i32 %l.bucket, 1
858 store i32 %inc, ptr %gep.bucket, align 4
859 %iv.next = add nuw nsw i64 %iv, 1
860 %exitcond = icmp eq i64 %iv.next, %N
861 br i1 %exitcond, label %for.exit, label %for.body
867 define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
868 ; CHECK-LABEL: define void @simple_histogram_64b(
869 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
871 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
872 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
873 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
874 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
876 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
877 ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2
878 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
879 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
880 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 1
881 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
882 ; CHECK: vector.body:
883 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
884 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[INDEX]]
885 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP5]], align 4
886 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], <vscale x 2 x i64> [[WIDE_LOAD]]
887 ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> splat (i1 true))
888 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
889 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
890 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
891 ; CHECK: middle.block:
892 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
893 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
895 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
896 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
898 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
899 ; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[IV]]
900 ; CHECK-NEXT: [[L_IDX:%.*]] = load i64, ptr [[GEP_INDICES]], align 4
901 ; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], i64 [[L_IDX]]
902 ; CHECK-NEXT: [[L_BUCKET:%.*]] = load i64, ptr [[GEP_BUCKET]], align 4
903 ; CHECK-NEXT: [[INC:%.*]] = add nsw i64 [[L_BUCKET]], 1
904 ; CHECK-NEXT: store i64 [[INC]], ptr [[GEP_BUCKET]], align 4
905 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
906 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
907 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
909 ; CHECK-NEXT: ret void
915 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
916 %gep.indices = getelementptr inbounds i64, ptr %indices, i64 %iv
917 %l.idx = load i64, ptr %gep.indices, align 4
918 %gep.bucket = getelementptr inbounds i64, ptr %buckets, i64 %l.idx
919 %l.bucket = load i64, ptr %gep.bucket, align 4
920 %inc = add nsw i64 %l.bucket, 1
921 store i64 %inc, ptr %gep.bucket, align 4
922 %iv.next = add nuw nsw i64 %iv, 1
923 %exitcond = icmp eq i64 %iv.next, %N
924 br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
930 attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }
932 !0 = distinct !{!0, !1}
933 !1 = !{!"llvm.loop.interleave.count", i32 2}
934 !2 = distinct !{!2, !3}
935 !3 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
936 !4 = distinct !{!4, !5}
937 !5 = !{!"llvm.loop.interleave.count", i32 1}