1 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
3 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6 ; Check vectorization of reduction code which has an fadd instruction after
7 ; an fcmp instruction which compares an array element and 0.
9 ; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
11 ; for (int i = 0; i < N; ++i)
12 ; if (x[i] > (float)0.)
17 ; CHECK-LABEL: @fcmp_0_fadd_select1(
18 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
19 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
20 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
21 define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
23 %cmp.1 = icmp sgt i32 %N, 0
24 br i1 %cmp.1, label %for.header, label %for.end
26 for.header: ; preds = %entry
27 %zext = zext i32 %N to i64
30 for.body: ; preds = %header, %for.body
31 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
32 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
33 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
34 %0 = load float, ptr %arrayidx, align 4
35 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
36 %add = fadd fast float %0, %sum.1
37 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
38 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
39 %exitcond = icmp eq i64 %indvars.iv.next, %zext
40 br i1 %exitcond, label %for.end, label %for.body
42 for.end: ; preds = %for.body, %entry
43 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
48 ; Check vectorization of reduction code which has an fadd instruction after
49 ; an fcmp instruction which compares an array element and 0.
51 ; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
53 ; for (int i = 0; i < N; ++i)
59 ; CHECK-LABEL: @fcmp_0_fadd_select2(
60 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
61 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
62 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
63 define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
65 %cmp.1 = icmp sgt i32 %N, 0
66 br i1 %cmp.1, label %for.header, label %for.end
68 for.header: ; preds = %entry
69 %zext = zext i32 %N to i64
72 for.body: ; preds = %header, %for.body
73 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
74 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
75 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
76 %0 = load double, ptr %arrayidx, align 4
77 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
78 %add = fadd fast double %0, %sum.1
79 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
80 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
81 %exitcond = icmp eq i64 %indvars.iv.next, %zext
82 br i1 %exitcond, label %for.end, label %for.body
84 for.end: ; preds = %for.body, %entry
85 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
90 ; Check vectorization of reduction code which has an fadd instruction after
91 ; an fcmp instruction which compares an array element and a floating-point
94 ; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
96 ; for (int i = 0; i < N; ++i)
102 ; CHECK-LABEL: @fcmp_val_fadd_select1(
103 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
104 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
105 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
106 define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
108 %cmp.1 = icmp sgt i32 %N, 0
109 br i1 %cmp.1, label %for.header, label %for.end
111 for.header: ; preds = %entry
112 %zext = zext i32 %N to i64
115 for.body: ; preds = %header, %for.body
116 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
117 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
118 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
119 %0 = load float, ptr %arrayidx, align 4
120 %cmp.2 = fcmp fast ogt float %0, %y
121 %add = fadd fast float %0, %sum.1
122 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
123 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
124 %exitcond = icmp eq i64 %indvars.iv.next, %zext
125 br i1 %exitcond, label %for.end, label %for.body
127 for.end: ; preds = %for.body, %entry
128 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
133 ; Check vectorization of reduction code which has an fadd instruction after
134 ; an fcmp instruction which compares an array element and a floating-point
137 ; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
139 ; for (int i = 0; i < N; ++i)
145 ; CHECK-LABEL: @fcmp_val_fadd_select2(
146 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
147 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
148 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
149 define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
151 %cmp.1 = icmp sgt i32 %N, 0
152 br i1 %cmp.1, label %for.header, label %for.end
154 for.header: ; preds = %entry
155 %zext = zext i32 %N to i64
158 for.body: ; preds = %header, %for.body
159 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
160 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
161 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
162 %0 = load double, ptr %arrayidx, align 4
163 %cmp.2 = fcmp fast ogt double %0, %y
164 %add = fadd fast double %0, %sum.1
165 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
166 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
167 %exitcond = icmp eq i64 %indvars.iv.next, %zext
168 br i1 %exitcond, label %for.end, label %for.body
170 for.end: ; preds = %for.body, %entry
171 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
176 ; Check vectorization of reduction code which has an fadd instruction after
177 ; an fcmp instruction which compares an array element and another array
180 ; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
183 ; for (int i = 0; i < N; ++i)
189 ; CHECK-LABEL: @fcmp_array_elm_fadd_select1(
190 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
191 ; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
192 ; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
193 define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
195 %cmp.1 = icmp sgt i32 %N, 0
196 br i1 %cmp.1, label %for.header, label %for.end
198 for.header: ; preds = %entry
199 %zext = zext i32 %N to i64
202 for.body: ; preds = %for.body, %for.header
203 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
204 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
205 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
206 %0 = load float, ptr %arrayidx.1, align 4
207 %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
208 %1 = load float, ptr %arrayidx.2, align 4
209 %cmp.2 = fcmp fast ogt float %0, %1
210 %add = fadd fast float %0, %sum.1
211 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
212 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
213 %exitcond = icmp eq i64 %indvars.iv.next, %zext
214 br i1 %exitcond, label %for.end, label %for.body
216 for.end: ; preds = %for.body, %entry
217 %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
222 ; Check vectorization of reduction code which has an fadd instruction after
223 ; an fcmp instruction which compares an array element and another array
226 ; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
229 ; for (int i = 0; i < N; ++i)
235 ; CHECK-LABEL: @fcmp_array_elm_fadd_select2(
236 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
237 ; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
238 ; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
239 define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
241 %cmp.1 = icmp sgt i32 %N, 0
242 br i1 %cmp.1, label %for.header, label %for.end
244 for.header: ; preds = %entry
245 %zext = zext i32 %N to i64
248 for.body: ; preds = %for.body, %for.header
249 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
250 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
251 %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
252 %0 = load double, ptr %arrayidx.1, align 4
253 %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
254 %1 = load double, ptr %arrayidx.2, align 4
255 %cmp.2 = fcmp fast ogt double %0, %1
256 %add = fadd fast double %0, %sum.1
257 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
258 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
259 %exitcond = icmp eq i64 %indvars.iv.next, %zext
260 br i1 %exitcond, label %for.end, label %for.body
262 for.end: ; preds = %for.body, %entry
263 %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
268 ; Check vectorization of reduction code which has an fsub instruction after
269 ; an fcmp instruction which compares an array element and 0.
271 ; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
273 ; for (int i = 0; i < N; ++i)
274 ; if (x[i] > (float)0.)
279 ; CHECK-LABEL: @fcmp_0_fsub_select1(
280 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
281 ; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
282 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
283 define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
285 %cmp.1 = icmp sgt i32 %N, 0
286 br i1 %cmp.1, label %for.header, label %for.end
288 for.header: ; preds = %entry
289 %zext = zext i32 %N to i64
292 for.body: ; preds = %for.body, %for.header
293 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
294 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
295 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
296 %0 = load float, ptr %arrayidx, align 4
297 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
298 %sub = fsub fast float %sum.1, %0
299 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
300 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
301 %exitcond = icmp eq i64 %indvars.iv.next, %zext
302 br i1 %exitcond, label %for.end, label %for.body
304 for.end: ; preds = %for.body, %entry
305 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
310 ; Check that is not vectorized if fp-instruction has no fast-math property.
311 ; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
313 ; for (int i = 0; i < N; ++i)
314 ; if (x[i] > (float)0.)
319 ; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
320 ; CHECK-NOT: <4 x float>
321 define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
323 %cmp.1 = icmp sgt i32 %N, 0
324 br i1 %cmp.1, label %for.header, label %for.end
326 for.header: ; preds = %entry
327 %zext = zext i32 %N to i64
330 for.body: ; preds = %for.body, %for.header
331 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
332 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
333 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
334 %0 = load float, ptr %arrayidx, align 4
335 %cmp.2 = fcmp ogt float %0, 0.000000e+00
336 %sub = fsub float %sum.1, %0
337 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
338 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
339 %exitcond = icmp eq i64 %indvars.iv.next, %zext
340 br i1 %exitcond, label %for.end, label %for.body
342 for.end: ; preds = %for.body, %entry
343 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
348 ; Check vectorization of reduction code which has an fsub instruction after
349 ; an fcmp instruction which compares an array element and 0.
351 ; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
353 ; for (int i = 0; i < N; ++i)
359 ; CHECK-LABEL: @fcmp_0_fsub_select2(
360 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
361 ; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
362 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
363 define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
365 %cmp.1 = icmp sgt i32 %N, 0
366 br i1 %cmp.1, label %for.header, label %for.end
368 for.header: ; preds = %entry
369 %zext = zext i32 %N to i64
372 for.body: ; preds = %for.body, %for.header
373 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
374 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
375 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
376 %0 = load double, ptr %arrayidx, align 4
377 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
378 %sub = fsub fast double %sum.1, %0
379 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
380 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
381 %exitcond = icmp eq i64 %indvars.iv.next, %zext
382 br i1 %exitcond, label %for.end, label %for.body
384 for.end: ; preds = %for.body, %entry
385 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
390 ; Check that is not vectorized if fp-instruction has no fast-math property.
392 ; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
394 ; for (int i = 0; i < N; ++i)
400 ; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
401 ; CHECK-NOT: <4 x doubole>
402 define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
404 %cmp.1 = icmp sgt i32 %N, 0
405 br i1 %cmp.1, label %for.header, label %for.end
407 for.header: ; preds = %entry
408 %zext = zext i32 %N to i64
411 for.body: ; preds = %for.body, %for.header
412 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
413 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
414 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
415 %0 = load double, ptr %arrayidx, align 4
416 %cmp.2 = fcmp ogt double %0, 0.000000e+00
417 %sub = fsub double %sum.1, %0
418 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
419 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
420 %exitcond = icmp eq i64 %indvars.iv.next, %zext
421 br i1 %exitcond, label %for.end, label %for.body
423 for.end: ; preds = %for.body, %entry
424 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
429 ; Check vectorization of reduction code which has an fmul instruction after
430 ; an fcmp instruction which compares an array element and 0.
432 ; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
434 ; for (int i = 0; i < N; ++i)
435 ; if (x[i] > (float)0.)
440 ; CHECK-LABEL: @fcmp_0_fmult_select1(
441 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
442 ; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
443 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
444 define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
446 %cmp.1 = icmp sgt i32 %N, 0
447 br i1 %cmp.1, label %for.header, label %for.end
449 for.header: ; preds = %entry
450 %zext = zext i32 %N to i64
453 for.body: ; preds = %for.body, %for.header
454 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
455 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
456 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
457 %0 = load float, ptr %arrayidx, align 4
458 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
459 %mult = fmul fast float %sum.1, %0
460 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
461 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
462 %exitcond = icmp eq i64 %indvars.iv.next, %zext
463 br i1 %exitcond, label %for.end, label %for.body
465 for.end: ; preds = %for.body, %entry
466 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
471 ; Check that is not vectorized if fp-instruction has no fast-math property.
473 ; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
475 ; for (int i = 0; i < N; ++i)
476 ; if (x[i] > (float)0.)
481 ; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
482 ; CHECK-NOT: <4 x float>
483 define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
485 %cmp.1 = icmp sgt i32 %N, 0
486 br i1 %cmp.1, label %for.header, label %for.end
488 for.header: ; preds = %entry
489 %zext = zext i32 %N to i64
492 for.body: ; preds = %for.body, %for.header
493 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
494 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
495 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
496 %0 = load float, ptr %arrayidx, align 4
497 %cmp.2 = fcmp ogt float %0, 0.000000e+00
498 %mult = fmul float %sum.1, %0
499 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
500 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
501 %exitcond = icmp eq i64 %indvars.iv.next, %zext
502 br i1 %exitcond, label %for.end, label %for.body
504 for.end: ; preds = %for.body, %entry
505 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
510 ; Check vectorization of reduction code which has an fmul instruction after
511 ; an fcmp instruction which compares an array element and 0.
513 ; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
515 ; for (int i = 0; i < N; ++i)
521 ; CHECK-LABEL: @fcmp_0_fmult_select2(
522 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
523 ; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
524 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
525 define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
527 %cmp.1 = icmp sgt i32 %N, 0
528 br i1 %cmp.1, label %for.header, label %for.end
530 for.header: ; preds = %entry
531 %zext = zext i32 %N to i64
534 for.body: ; preds = %for.body, %for.header
535 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
536 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
537 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
538 %0 = load double, ptr %arrayidx, align 4
539 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
540 %mult = fmul fast double %sum.1, %0
541 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
542 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
543 %exitcond = icmp eq i64 %indvars.iv.next, %zext
544 br i1 %exitcond, label %for.end, label %for.body
546 for.end: ; preds = %for.body, %entry
547 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
552 ; Check that is not vectorized if fp-instruction has no fast-math property.
554 ; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
556 ; for (int i = 0; i < N; ++i)
562 ; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
563 ; CHECK-NOT: <4 x double>
564 define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
566 %cmp.1 = icmp sgt i32 %N, 0
567 br i1 %cmp.1, label %for.header, label %for.end
569 for.header: ; preds = %entry
570 %zext = zext i32 %N to i64
573 for.body: ; preds = %for.body, %for.header
574 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
575 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
576 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
577 %0 = load double, ptr %arrayidx, align 4
578 %cmp.2 = fcmp ogt double %0, 0.000000e+00
579 %mult = fmul double %sum.1, %0
580 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
581 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
582 %exitcond = icmp eq i64 %indvars.iv.next, %zext
583 br i1 %exitcond, label %for.end, label %for.body
585 for.end: ; preds = %for.body, %entry
586 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
590 ; Float multi pattern
591 ; Check vectorisation of reduction code with a pair of selects to different
594 ; float fcmp_multi(ptr a, int n) {
596 ; for (int i=0;i<n;i++) {
607 ; CHECK-LABEL: @fcmp_multi(
608 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
609 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
610 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
611 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
612 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
613 ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
614 ; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
615 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
616 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
617 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
618 ; CHECK: fadd fast <4 x float> %[[S2]],
619 define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
621 %cmp10 = icmp sgt i32 %n, 0
622 br i1 %cmp10, label %for.body.preheader, label %for.end
624 for.body.preheader: ; preds = %entry
625 %wide.trip.count = zext i32 %n to i64
628 for.body: ; preds = %for.inc, %for.body.preheader
629 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
630 %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
631 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
632 %0 = load float, ptr %arrayidx, align 4
633 %cmp1 = fcmp ogt float %0, 1.000000e+00
634 br i1 %cmp1, label %for.inc, label %if.else
636 if.else: ; preds = %for.body
637 %cmp8 = fcmp olt float %0, 3.000000e+00
638 br i1 %cmp8, label %if.then10, label %if.else14
640 if.then10: ; preds = %if.else
641 %mul = fmul fast float %0, 2.000000e+00
644 if.else14: ; preds = %if.else
645 %mul17 = fmul fast float %0, 3.000000e+00
648 for.inc: ; preds = %for.body, %if.else14, %if.then10
649 %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
650 %sum.1 = fadd fast float %.pn, %sum.011
651 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
652 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
653 br i1 %exitcond, label %for.end, label %for.body
655 for.end: ; preds = %for.inc, %entry
656 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
657 ret float %sum.0.lcssa
660 ; Float fadd + fsub patterns
661 ; Check vectorisation of reduction code with a pair of selects to different
662 ; instructions { fadd, fsub } but equivalent (change in constant).
664 ; float fcmp_multi(ptr a, int n) {
666 ; for (int i=0;i<n;i++) {
675 ; CHECK-LABEL: @fcmp_fadd_fsub(
676 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
677 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
678 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
679 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
680 ; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
681 ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
682 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
683 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
684 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
685 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
686 define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
688 %cmp9 = icmp sgt i32 %n, 0
689 br i1 %cmp9, label %for.body.preheader, label %for.end
691 for.body.preheader: ; preds = %entry
692 %wide.trip.count = zext i32 %n to i64
695 for.body: ; preds = %for.inc, %for.body.preheader
696 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
697 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
698 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
699 %0 = load float, ptr %arrayidx, align 4
700 %cmp1 = fcmp ogt float %0, 1.000000e+00
701 br i1 %cmp1, label %if.then, label %if.else
703 if.then: ; preds = %for.body
704 %add = fadd fast float %0, %sum.010
707 if.else: ; preds = %for.body
708 %cmp8 = fcmp olt float %0, 3.000000e+00
709 br i1 %cmp8, label %if.then10, label %for.inc
711 if.then10: ; preds = %if.else
712 %sub = fsub fast float %sum.010, %0
715 for.inc: ; preds = %if.then, %if.then10, %if.else
716 %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
717 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
718 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
719 br i1 %exitcond, label %for.end, label %for.body
721 for.end: ; preds = %for.inc, %entry
722 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
723 ret float %sum.0.lcssa
726 ; Float fadd + fmul patterns
727 ; Check lack of vectorisation of reduction code with a pair of non-compatible
728 ; instructions { fadd, fmul }.
730 ; float fcmp_multi(ptr a, int n) {
732 ; for (int i=0;i<n;i++) {
741 ; CHECK-LABEL: @fcmp_fadd_fmul(
742 ; CHECK-NOT: <4 x float>
743 define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
745 %cmp9 = icmp sgt i32 %n, 0
746 br i1 %cmp9, label %for.body.preheader, label %for.end
748 for.body.preheader: ; preds = %entry
749 %wide.trip.count = zext i32 %n to i64
752 for.body: ; preds = %for.inc, %for.body.preheader
753 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
754 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
755 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
756 %0 = load float, ptr %arrayidx, align 4
757 %cmp1 = fcmp ogt float %0, 1.000000e+00
758 br i1 %cmp1, label %if.then, label %if.else
760 if.then: ; preds = %for.body
761 %add = fadd fast float %0, %sum.010
764 if.else: ; preds = %for.body
765 %cmp8 = fcmp olt float %0, 3.000000e+00
766 br i1 %cmp8, label %if.then10, label %for.inc
768 if.then10: ; preds = %if.else
769 %mul = fmul fast float %0, %sum.010
772 for.inc: ; preds = %if.then, %if.then10, %if.else
773 %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
774 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
775 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
776 br i1 %exitcond, label %for.end, label %for.body
778 for.end: ; preds = %for.inc, %entry
779 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
780 ret float %sum.0.lcssa
783 ; Float fadd + store patterns
784 ; Check lack of vectorisation of reduction code with a store back, given it
785 ; has loop dependency on a[i].
787 ; float fcmp_store_back(float a[], int LEN) {
789 ; for (int i = 0; i < LEN; i++) {
796 define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
797 ; CHECK-LABEL: @fcmp_store_back(
798 ; CHECK-NOT: <4 x float>
801 %cmp7 = icmp sgt i32 %LEN, 0
802 br i1 %cmp7, label %for.body.preheader, label %for.end
804 for.body.preheader: ; preds = %entry
805 %wide.trip.count = zext i32 %LEN to i64
808 for.body: ; preds = %for.body, %for.body.preheader
809 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
810 %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
811 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
812 %0 = load float, ptr %arrayidx, align 4
813 %add = fadd fast float %0, %sum.08
814 store float %add, ptr %arrayidx, align 4
815 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
816 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
817 br i1 %exitcond, label %for.end, label %for.body
819 for.end: ; preds = %for.body, %entry
820 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
821 ret float %sum.0.lcssa
824 ; CHECK-LABEL: @fcmp_0_add_select2(
825 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
826 ; CHECK: %[[V3:.*]] = add <4 x i64> %[[V2:.*]], <i64 2, i64 2, i64 2, i64 2>
827 ; CHECK: select <4 x i1> %[[V1]], <4 x i64> %[[V3]], <4 x i64> %[[V2]]
828 define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly {
830 %cmp.1 = icmp sgt i64 %N, 0
831 br i1 %cmp.1, label %for.header, label %for.end
833 for.header: ; preds = %entry
836 for.body: ; preds = %header, %for.body
837 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
838 %sum.1 = phi i64 [ 0, %for.header ], [ %sum.2, %for.body ]
839 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
840 %0 = load float, ptr %arrayidx, align 4
841 %cmp.2 = fcmp ogt float %0, 0.000000e+00
842 %add = add nsw i64 %sum.1, 2
843 %sum.2 = select i1 %cmp.2, i64 %add, i64 %sum.1
844 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
845 %exitcond = icmp eq i64 %indvars.iv.next, %N
846 br i1 %exitcond, label %for.end, label %for.body
848 for.end: ; preds = %for.body, %entry
849 %1 = phi i64 [ 0, %entry ], [ %sum.2, %for.body ]
853 ; CHECK-LABEL: @fcmp_0_sub_select1(
854 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
855 ; CHECK: %[[V3:.*]] = sub <4 x i32> %[[V2:.*]], <i32 2, i32 2, i32 2, i32 2>
856 ; CHECK: select <4 x i1> %[[V1]], <4 x i32> %[[V3]], <4 x i32> %[[V2]]
857 define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
859 %cmp.1 = icmp sgt i32 %N, 0
860 br i1 %cmp.1, label %for.header, label %for.end
862 for.header: ; preds = %entry
863 %zext = zext i32 %N to i64
866 for.body: ; preds = %header, %for.body
867 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
868 %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
869 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
870 %0 = load float, ptr %arrayidx, align 4
871 %cmp.2 = fcmp ogt float %0, 0.000000e+00
872 %sub = sub nsw i32 %sum.1, 2
873 %sum.2 = select i1 %cmp.2, i32 %sub, i32 %sum.1
874 %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1
875 %exitcond = icmp eq i64 %indvars.iv.next, %zext
876 br i1 %exitcond, label %for.end, label %for.body
878 for.end: ; preds = %for.body, %entry
879 %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
883 ; CHECK-LABEL: @fcmp_0_mult_select1(
884 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
885 ; CHECK: %[[V3:.*]] = mul <4 x i32> %[[V2:.*]], <i32 2, i32 2, i32 2, i32 2>
886 ; CHECK: select <4 x i1> %[[V1]], <4 x i32> %[[V3]], <4 x i32> %[[V2]]
887 define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
889 %cmp.1 = icmp sgt i32 %N, 0
890 br i1 %cmp.1, label %for.header, label %for.end
892 for.header: ; preds = %entry
893 %zext = zext i32 %N to i64
896 for.body: ; preds = %for.body, %for.header
897 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
898 %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
899 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
900 %0 = load float, ptr %arrayidx, align 4
901 %cmp.2 = fcmp ogt float %0, 0.000000e+00
902 %mult = mul nsw i32 %sum.1, 2
903 %sum.2 = select i1 %cmp.2, i32 %mult, i32 %sum.1
904 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
905 %exitcond = icmp eq i64 %indvars.iv.next, %zext
906 br i1 %exitcond, label %for.end, label %for.body
908 for.end: ; preds = %for.body, %entry
909 %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
913 @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
915 ; CHECK-LABEL: @non_reduction_index(
916 ; CHECK-NOT: <4 x i16>
917 define i16 @non_reduction_index(i16 noundef %val) {
921 for.cond.cleanup: ; preds = %for.body
922 %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
923 ret i16 %spec.select.lcssa
925 for.body: ; preds = %entry, %for.body
926 %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
927 %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
928 %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
929 %0 = load i16, ptr %arrayidx, align 1
930 %cmp1 = icmp ugt i16 %0, %val
931 %sub = add nsw i16 %i.05, -1
932 %spec.select = select i1 %cmp1, i16 %sub, i16 %k.04
933 %cmp.not = icmp eq i16 %sub, 0
934 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
937 @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
939 ; CHECK-LABEL: @non_reduction_index_half(
940 ; CHECK-NOT: <4 x half>
941 define i16 @non_reduction_index_half(half noundef %val) {
945 for.cond.cleanup: ; preds = %for.body
946 %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
947 ret i16 %spec.select.lcssa
949 for.body: ; preds = %entry, %for.body
950 %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
951 %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
952 %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
953 %0 = load half, ptr %arrayidx, align 1
954 %fcmp1 = fcmp ugt half %0, %val
955 %sub = add nsw i16 %i.05, -1
956 %spec.select = select i1 %fcmp1, i16 %sub, i16 %k.04
957 %cmp.not = icmp eq i16 %sub, 0
958 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
961 ; Make sure any check-not directives are not triggered by function declarations.