1 ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
3 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6 ; Check vectorization of reduction code which has an fadd instruction after
7 ; an fcmp instruction which compares an array element and 0.
9 ; float fcmp_0_fadd_select1(float * restrict x, const int N) {
11 ; for (int i = 0; i < N; ++i)
12 ; if (x[i] > (float)0.)
17 ; CHECK-LABEL: @fcmp_0_fadd_select1(
18 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
19 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
20 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
21 define float @fcmp_0_fadd_select1(float* noalias %x, i32 %N) nounwind readonly {
23 %cmp.1 = icmp sgt i32 %N, 0
24 br i1 %cmp.1, label %for.header, label %for.end
26 for.header: ; preds = %entry
27 %zext = zext i32 %N to i64
30 for.body: ; preds = %header, %for.body
31 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
32 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
33 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
34 %0 = load float, float* %arrayidx, align 4
35 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
36 %add = fadd fast float %0, %sum.1
37 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
38 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
39 %exitcond = icmp eq i64 %indvars.iv.next, %zext
40 br i1 %exitcond, label %for.end, label %for.body
42 for.end: ; preds = %for.body, %entry
43 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
48 ; Check vectorization of reduction code which has an fadd instruction after
49 ; an fcmp instruction which compares an array element and 0.
51 ; double fcmp_0_fadd_select2(double * restrict x, const int N) {
53 ; for (int i = 0; i < N; ++i)
59 ; CHECK-LABEL: @fcmp_0_fadd_select2(
60 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
61 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
62 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
63 define double @fcmp_0_fadd_select2(double* noalias %x, i32 %N) nounwind readonly {
65 %cmp.1 = icmp sgt i32 %N, 0
66 br i1 %cmp.1, label %for.header, label %for.end
68 for.header: ; preds = %entry
69 %zext = zext i32 %N to i64
72 for.body: ; preds = %header, %for.body
73 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
74 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
75 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
76 %0 = load double, double* %arrayidx, align 4
77 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
78 %add = fadd fast double %0, %sum.1
79 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
80 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
81 %exitcond = icmp eq i64 %indvars.iv.next, %zext
82 br i1 %exitcond, label %for.end, label %for.body
84 for.end: ; preds = %for.body, %entry
85 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
90 ; Check vectorization of reduction code which has an fadd instruction after
91 ; an fcmp instruction which compares an array element and a floating-point
94 ; float fcmp_val_fadd_select1(float * restrict x, float y, const int N) {
96 ; for (int i = 0; i < N; ++i)
102 ; CHECK-LABEL: @fcmp_val_fadd_select1(
103 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
104 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
105 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
106 define float @fcmp_val_fadd_select1(float* noalias %x, float %y, i32 %N) nounwind readonly {
108 %cmp.1 = icmp sgt i32 %N, 0
109 br i1 %cmp.1, label %for.header, label %for.end
111 for.header: ; preds = %entry
112 %zext = zext i32 %N to i64
115 for.body: ; preds = %header, %for.body
116 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
117 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
118 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
119 %0 = load float, float* %arrayidx, align 4
120 %cmp.2 = fcmp fast ogt float %0, %y
121 %add = fadd fast float %0, %sum.1
122 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
123 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
124 %exitcond = icmp eq i64 %indvars.iv.next, %zext
125 br i1 %exitcond, label %for.end, label %for.body
127 for.end: ; preds = %for.body, %entry
128 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
133 ; Check vectorization of reduction code which has an fadd instruction after
134 ; an fcmp instruction which compares an array element and a floating-point
137 ; double fcmp_val_fadd_select2(double * restrict x, double y, const int N) {
139 ; for (int i = 0; i < N; ++i)
145 ; CHECK-LABEL: @fcmp_val_fadd_select2(
146 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
147 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
148 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
149 define double @fcmp_val_fadd_select2(double* noalias %x, double %y, i32 %N) nounwind readonly {
151 %cmp.1 = icmp sgt i32 %N, 0
152 br i1 %cmp.1, label %for.header, label %for.end
154 for.header: ; preds = %entry
155 %zext = zext i32 %N to i64
158 for.body: ; preds = %header, %for.body
159 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
160 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
161 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
162 %0 = load double, double* %arrayidx, align 4
163 %cmp.2 = fcmp fast ogt double %0, %y
164 %add = fadd fast double %0, %sum.1
165 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
166 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
167 %exitcond = icmp eq i64 %indvars.iv.next, %zext
168 br i1 %exitcond, label %for.end, label %for.body
170 for.end: ; preds = %for.body, %entry
171 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
176 ; Check vectorization of reduction code which has an fadd instruction after
177 ; an fcmp instruction which compares an array element and another array
180 ; float fcmp_array_elm_fadd_select1(float * restrict x, float * restrict y,
183 ; for (int i = 0; i < N; ++i)
189 ; CHECK-LABEL: @fcmp_array_elm_fadd_select1(
190 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
191 ; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
192 ; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
193 define float @fcmp_array_elm_fadd_select1(float* noalias %x, float* noalias %y, i32 %N) nounwind readonly {
195 %cmp.1 = icmp sgt i32 %N, 0
196 br i1 %cmp.1, label %for.header, label %for.end
198 for.header: ; preds = %entry
199 %zext = zext i32 %N to i64
202 for.body: ; preds = %for.body, %for.header
203 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
204 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
205 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 %indvars.iv
206 %0 = load float, float* %arrayidx.1, align 4
207 %arrayidx.2 = getelementptr inbounds float, float* %y, i64 %indvars.iv
208 %1 = load float, float* %arrayidx.2, align 4
209 %cmp.2 = fcmp fast ogt float %0, %1
210 %add = fadd fast float %0, %sum.1
211 %sum.2 = select i1 %cmp.2, float %add, float %sum.1
212 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
213 %exitcond = icmp eq i64 %indvars.iv.next, %zext
214 br i1 %exitcond, label %for.end, label %for.body
216 for.end: ; preds = %for.body, %entry
217 %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
222 ; Check vectorization of reduction code which has an fadd instruction after
223 ; an fcmp instruction which compares an array element and another array
226 ; double fcmp_array_elm_fadd_select2(double * restrict x, double * restrict y,
229 ; for (int i = 0; i < N; ++i)
235 ; CHECK-LABEL: @fcmp_array_elm_fadd_select2(
236 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
237 ; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
238 ; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
239 define double @fcmp_array_elm_fadd_select2(double* noalias %x, double* noalias %y, i32 %N) nounwind readonly {
241 %cmp.1 = icmp sgt i32 %N, 0
242 br i1 %cmp.1, label %for.header, label %for.end
244 for.header: ; preds = %entry
245 %zext = zext i32 %N to i64
248 for.body: ; preds = %for.body, %for.header
249 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
250 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
251 %arrayidx.1 = getelementptr inbounds double, double* %x, i64 %indvars.iv
252 %0 = load double, double* %arrayidx.1, align 4
253 %arrayidx.2 = getelementptr inbounds double, double* %y, i64 %indvars.iv
254 %1 = load double, double* %arrayidx.2, align 4
255 %cmp.2 = fcmp fast ogt double %0, %1
256 %add = fadd fast double %0, %sum.1
257 %sum.2 = select i1 %cmp.2, double %add, double %sum.1
258 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
259 %exitcond = icmp eq i64 %indvars.iv.next, %zext
260 br i1 %exitcond, label %for.end, label %for.body
262 for.end: ; preds = %for.body, %entry
263 %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
268 ; Check vectorization of reduction code which has an fsub instruction after
269 ; an fcmp instruction which compares an array element and 0.
271 ; float fcmp_0_fsub_select1(float * restrict x, const int N) {
273 ; for (int i = 0; i < N; ++i)
274 ; if (x[i] > (float)0.)
279 ; CHECK-LABEL: @fcmp_0_fsub_select1(
280 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
281 ; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
282 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
283 define float @fcmp_0_fsub_select1(float* noalias %x, i32 %N) nounwind readonly {
285 %cmp.1 = icmp sgt i32 %N, 0
286 br i1 %cmp.1, label %for.header, label %for.end
288 for.header: ; preds = %entry
289 %zext = zext i32 %N to i64
292 for.body: ; preds = %for.body, %for.header
293 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
294 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
295 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
296 %0 = load float, float* %arrayidx, align 4
297 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
298 %sub = fsub fast float %sum.1, %0
299 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
300 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
301 %exitcond = icmp eq i64 %indvars.iv.next, %zext
302 br i1 %exitcond, label %for.end, label %for.body
304 for.end: ; preds = %for.body, %entry
305 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
310 ; Check that is not vectorized if fp-instruction has no fast-math property.
311 ; float fcmp_0_fsub_select1_novectorize(float * restrict x, const int N) {
313 ; for (int i = 0; i < N; ++i)
314 ; if (x[i] > (float)0.)
319 ; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
320 ; CHECK-NOT: <4 x float>
321 define float @fcmp_0_fsub_select1_novectorize(float* noalias %x, i32 %N) nounwind readonly {
323 %cmp.1 = icmp sgt i32 %N, 0
324 br i1 %cmp.1, label %for.header, label %for.end
326 for.header: ; preds = %entry
327 %zext = zext i32 %N to i64
330 for.body: ; preds = %for.body, %for.header
331 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
332 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
333 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
334 %0 = load float, float* %arrayidx, align 4
335 %cmp.2 = fcmp ogt float %0, 0.000000e+00
336 %sub = fsub float %sum.1, %0
337 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
338 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
339 %exitcond = icmp eq i64 %indvars.iv.next, %zext
340 br i1 %exitcond, label %for.end, label %for.body
342 for.end: ; preds = %for.body, %entry
343 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
348 ; Check vectorization of reduction code which has an fsub instruction after
349 ; an fcmp instruction which compares an array element and 0.
351 ; double fcmp_0_fsub_select2(double * restrict x, const int N) {
353 ; for (int i = 0; i < N; ++i)
359 ; CHECK-LABEL: @fcmp_0_fsub_select2(
360 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
361 ; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
362 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
363 define double @fcmp_0_fsub_select2(double* noalias %x, i32 %N) nounwind readonly {
365 %cmp.1 = icmp sgt i32 %N, 0
366 br i1 %cmp.1, label %for.header, label %for.end
368 for.header: ; preds = %entry
369 %zext = zext i32 %N to i64
372 for.body: ; preds = %for.body, %for.header
373 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
374 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
375 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
376 %0 = load double, double* %arrayidx, align 4
377 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
378 %sub = fsub fast double %sum.1, %0
379 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
380 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
381 %exitcond = icmp eq i64 %indvars.iv.next, %zext
382 br i1 %exitcond, label %for.end, label %for.body
384 for.end: ; preds = %for.body, %entry
385 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
390 ; Check that is not vectorized if fp-instruction has no fast-math property.
392 ; double fcmp_0_fsub_select2_notvectorize(double * restrict x, const int N) {
394 ; for (int i = 0; i < N; ++i)
400 ; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
401 ; CHECK-NOT: <4 x doubole>
402 define double @fcmp_0_fsub_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
404 %cmp.1 = icmp sgt i32 %N, 0
405 br i1 %cmp.1, label %for.header, label %for.end
407 for.header: ; preds = %entry
408 %zext = zext i32 %N to i64
411 for.body: ; preds = %for.body, %for.header
412 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
413 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
414 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
415 %0 = load double, double* %arrayidx, align 4
416 %cmp.2 = fcmp ogt double %0, 0.000000e+00
417 %sub = fsub double %sum.1, %0
418 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
419 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
420 %exitcond = icmp eq i64 %indvars.iv.next, %zext
421 br i1 %exitcond, label %for.end, label %for.body
423 for.end: ; preds = %for.body, %entry
424 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
429 ; Check vectorization of reduction code which has an fmul instruction after
430 ; an fcmp instruction which compares an array element and 0.
432 ; float fcmp_0_fmult_select1(float * restrict x, const int N) {
434 ; for (int i = 0; i < N; ++i)
435 ; if (x[i] > (float)0.)
440 ; CHECK-LABEL: @fcmp_0_fmult_select1(
441 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
442 ; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
443 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
444 define float @fcmp_0_fmult_select1(float* noalias %x, i32 %N) nounwind readonly {
446 %cmp.1 = icmp sgt i32 %N, 0
447 br i1 %cmp.1, label %for.header, label %for.end
449 for.header: ; preds = %entry
450 %zext = zext i32 %N to i64
453 for.body: ; preds = %for.body, %for.header
454 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
455 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
456 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
457 %0 = load float, float* %arrayidx, align 4
458 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
459 %mult = fmul fast float %sum.1, %0
460 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
461 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
462 %exitcond = icmp eq i64 %indvars.iv.next, %zext
463 br i1 %exitcond, label %for.end, label %for.body
465 for.end: ; preds = %for.body, %entry
466 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
471 ; Check that is not vectorized if fp-instruction has no fast-math property.
473 ; float fcmp_0_fmult_select1_notvectorize(float * restrict x, const int N) {
475 ; for (int i = 0; i < N; ++i)
476 ; if (x[i] > (float)0.)
481 ; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
482 ; CHECK-NOT: <4 x float>
483 define float @fcmp_0_fmult_select1_notvectorize(float* noalias %x, i32 %N) nounwind readonly {
485 %cmp.1 = icmp sgt i32 %N, 0
486 br i1 %cmp.1, label %for.header, label %for.end
488 for.header: ; preds = %entry
489 %zext = zext i32 %N to i64
492 for.body: ; preds = %for.body, %for.header
493 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
494 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
495 %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
496 %0 = load float, float* %arrayidx, align 4
497 %cmp.2 = fcmp ogt float %0, 0.000000e+00
498 %mult = fmul float %sum.1, %0
499 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
500 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
501 %exitcond = icmp eq i64 %indvars.iv.next, %zext
502 br i1 %exitcond, label %for.end, label %for.body
504 for.end: ; preds = %for.body, %entry
505 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
510 ; Check vectorization of reduction code which has an fmul instruction after
511 ; an fcmp instruction which compares an array element and 0.
513 ; double fcmp_0_fmult_select2(double * restrict x, const int N) {
515 ; for (int i = 0; i < N; ++i)
521 ; CHECK-LABEL: @fcmp_0_fmult_select2(
522 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
523 ; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
524 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
525 define double @fcmp_0_fmult_select2(double* noalias %x, i32 %N) nounwind readonly {
527 %cmp.1 = icmp sgt i32 %N, 0
528 br i1 %cmp.1, label %for.header, label %for.end
530 for.header: ; preds = %entry
531 %zext = zext i32 %N to i64
534 for.body: ; preds = %for.body, %for.header
535 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
536 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
537 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
538 %0 = load double, double* %arrayidx, align 4
539 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
540 %mult = fmul fast double %sum.1, %0
541 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
542 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
543 %exitcond = icmp eq i64 %indvars.iv.next, %zext
544 br i1 %exitcond, label %for.end, label %for.body
546 for.end: ; preds = %for.body, %entry
547 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
552 ; Check that is not vectorized if fp-instruction has no fast-math property.
554 ; double fcmp_0_fmult_select2_notvectorize(double * restrict x, const int N) {
556 ; for (int i = 0; i < N; ++i)
562 ; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
563 ; CHECK-NOT: <4 x double>
564 define double @fcmp_0_fmult_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
566 %cmp.1 = icmp sgt i32 %N, 0
567 br i1 %cmp.1, label %for.header, label %for.end
569 for.header: ; preds = %entry
570 %zext = zext i32 %N to i64
573 for.body: ; preds = %for.body, %for.header
574 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
575 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
576 %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
577 %0 = load double, double* %arrayidx, align 4
578 %cmp.2 = fcmp ogt double %0, 0.000000e+00
579 %mult = fmul double %sum.1, %0
580 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
581 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
582 %exitcond = icmp eq i64 %indvars.iv.next, %zext
583 br i1 %exitcond, label %for.end, label %for.body
585 for.end: ; preds = %for.body, %entry
586 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
590 ; Float multi pattern
591 ; Check vectorisation of reduction code with a pair of selects to different
594 ; float fcmp_multi(float *a, int n) {
596 ; for (int i=0;i<n;i++) {
607 ; CHECK-LABEL: @fcmp_multi(
608 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
609 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
610 ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
611 ; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
612 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
613 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
614 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
615 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
616 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
617 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
618 ; CHECK: fadd fast <4 x float> %[[S2]],
619 define float @fcmp_multi(float* nocapture readonly %a, i32 %n) nounwind readonly {
621 %cmp10 = icmp sgt i32 %n, 0
622 br i1 %cmp10, label %for.body.preheader, label %for.end
624 for.body.preheader: ; preds = %entry
625 %wide.trip.count = zext i32 %n to i64
628 for.body: ; preds = %for.inc, %for.body.preheader
629 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
630 %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
631 %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
632 %0 = load float, float* %arrayidx, align 4
633 %cmp1 = fcmp ogt float %0, 1.000000e+00
634 br i1 %cmp1, label %for.inc, label %if.else
636 if.else: ; preds = %for.body
637 %cmp8 = fcmp olt float %0, 3.000000e+00
638 br i1 %cmp8, label %if.then10, label %if.else14
640 if.then10: ; preds = %if.else
641 %mul = fmul fast float %0, 2.000000e+00
644 if.else14: ; preds = %if.else
645 %mul17 = fmul fast float %0, 3.000000e+00
648 for.inc: ; preds = %for.body, %if.else14, %if.then10
649 %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
650 %sum.1 = fadd fast float %.pn, %sum.011
651 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
652 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
653 br i1 %exitcond, label %for.end, label %for.body
655 for.end: ; preds = %for.inc, %entry
656 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
657 ret float %sum.0.lcssa
660 ; Float fadd + fsub patterns
661 ; Check vectorisation of reduction code with a pair of selects to different
662 ; instructions { fadd, fsub } but equivalent (change in constant).
664 ; float fcmp_multi(float *a, int n) {
666 ; for (int i=0;i<n;i++) {
675 ; CHECK-LABEL: @fcmp_fadd_fsub(
676 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
677 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
678 ; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
679 ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
680 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
681 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
682 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
683 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
684 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
685 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
686 define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {
688 %cmp9 = icmp sgt i32 %n, 0
689 br i1 %cmp9, label %for.body.preheader, label %for.end
691 for.body.preheader: ; preds = %entry
692 %wide.trip.count = zext i32 %n to i64
695 for.body: ; preds = %for.inc, %for.body.preheader
696 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
697 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
698 %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
699 %0 = load float, float* %arrayidx, align 4
700 %cmp1 = fcmp ogt float %0, 1.000000e+00
701 br i1 %cmp1, label %if.then, label %if.else
703 if.then: ; preds = %for.body
704 %add = fadd fast float %0, %sum.010
707 if.else: ; preds = %for.body
708 %cmp8 = fcmp olt float %0, 3.000000e+00
709 br i1 %cmp8, label %if.then10, label %for.inc
711 if.then10: ; preds = %if.else
712 %sub = fsub fast float %sum.010, %0
715 for.inc: ; preds = %if.then, %if.then10, %if.else
716 %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
717 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
718 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
719 br i1 %exitcond, label %for.end, label %for.body
721 for.end: ; preds = %for.inc, %entry
722 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
723 ret float %sum.0.lcssa
726 ; Float fadd + fmul patterns
727 ; Check lack of vectorisation of reduction code with a pair of non-compatible
728 ; instructions { fadd, fmul }.
730 ; float fcmp_multi(float *a, int n) {
732 ; for (int i=0;i<n;i++) {
741 ; CHECK-LABEL: @fcmp_fadd_fmul(
742 ; CHECK-NOT: <4 x float>
743 define float @fcmp_fadd_fmul(float* nocapture readonly %a, i32 %n) nounwind readonly {
745 %cmp9 = icmp sgt i32 %n, 0
746 br i1 %cmp9, label %for.body.preheader, label %for.end
748 for.body.preheader: ; preds = %entry
749 %wide.trip.count = zext i32 %n to i64
752 for.body: ; preds = %for.inc, %for.body.preheader
753 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
754 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
755 %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
756 %0 = load float, float* %arrayidx, align 4
757 %cmp1 = fcmp ogt float %0, 1.000000e+00
758 br i1 %cmp1, label %if.then, label %if.else
760 if.then: ; preds = %for.body
761 %add = fadd fast float %0, %sum.010
764 if.else: ; preds = %for.body
765 %cmp8 = fcmp olt float %0, 3.000000e+00
766 br i1 %cmp8, label %if.then10, label %for.inc
768 if.then10: ; preds = %if.else
769 %mul = fmul fast float %0, %sum.010
772 for.inc: ; preds = %if.then, %if.then10, %if.else
773 %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
774 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
775 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
776 br i1 %exitcond, label %for.end, label %for.body
778 for.end: ; preds = %for.inc, %entry
779 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
780 ret float %sum.0.lcssa
783 ; Float fadd + store patterns
784 ; Check lack of vectorisation of reduction code with a store back, given it
785 ; has loop dependency on a[i].
787 ; float fcmp_store_back(float a[], int LEN) {
789 ; for (int i = 0; i < LEN; i++) {
796 define float @fcmp_store_back(float* nocapture %a, i32 %LEN) nounwind readonly {
797 ; CHECK-LABEL: @fcmp_store_back(
798 ; CHECK-NOT: <4 x float>
801 %cmp7 = icmp sgt i32 %LEN, 0
802 br i1 %cmp7, label %for.body.preheader, label %for.end
804 for.body.preheader: ; preds = %entry
805 %wide.trip.count = zext i32 %LEN to i64
808 for.body: ; preds = %for.body, %for.body.preheader
809 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
810 %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
811 %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
812 %0 = load float, float* %arrayidx, align 4
813 %add = fadd fast float %0, %sum.08
814 store float %add, float* %arrayidx, align 4
815 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
816 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
817 br i1 %exitcond, label %for.end, label %for.body
819 for.end: ; preds = %for.body, %entry
820 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
821 ret float %sum.0.lcssa
824 ; Make sure any check-not directives are not triggered by function declarations.