1 ; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s
3 ; TODO: For now test for the `-epilogue-vectorization-minimum-VF` option. In
4 ; the future we need to replace this with a more meaningful test of the
5 ; epilogue vectorization cost-model.
6 ; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-minimum-VF=4 -S | FileCheck %s --check-prefix=CHECK-MIN-4
7 ; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s --check-prefix=CHECK-MIN-D
9 target datalayout = "e-m:e-i64:64-n32:64"
10 target triple = "powerpc64le-unknown-linux-gnu"
12 ; Do not vectorize epilogues for loops with minsize attribute
14 ; CHECK-NOT: vector.main.loop.iter.check
15 ; CHECK-NOT: vec.epilog.iter.check
16 ; CHECK-NOT: vec.epilog.ph
17 ; CHECK-NOT: vec.epilog.vector.body
18 ; CHECK-NOT: vec.epilog.middle.block
21 define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 {
23 %cmp1 = icmp sgt i32 %N, 0
24 br i1 %cmp1, label %for.body.preheader, label %for.end
26 for.body.preheader: ; preds = %entry
27 %wide.trip.count = zext i32 %N to i64
30 for.body: ; preds = %for.body.preheader, %for.body
31 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
32 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
33 %0 = load float, float* %arrayidx, align 4
34 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
35 %1 = load float, float* %arrayidx2, align 4
36 %add = fadd fast float %0, %1
37 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
38 store float %add, float* %arrayidx4, align 4
39 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
40 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
41 br i1 %exitcond, label %for.body, label %for.end.loopexit
43 for.end.loopexit: ; preds = %for.body
46 for.end: ; preds = %for.end.loopexit, %entry
50 ; Do not vectorize epilogues for loops with optsize attribute
52 ; CHECK-NOT: vector.main.loop.iter.check
53 ; CHECK-NOT: vec.epilog.iter.check
54 ; CHECK-NOT: vec.epilog.ph
55 ; CHECK-NOT: vec.epilog.vector.body
56 ; CHECK-NOT: vec.epilog.middle.block
59 define dso_local void @f2(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #1 {
61 %cmp1 = icmp sgt i32 %N, 0
62 br i1 %cmp1, label %for.body.preheader, label %for.end
64 for.body.preheader: ; preds = %entry
65 %wide.trip.count = zext i32 %N to i64
68 for.body: ; preds = %for.body.preheader, %for.body
69 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
70 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
71 %0 = load float, float* %arrayidx, align 4
72 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
73 %1 = load float, float* %arrayidx2, align 4
74 %add = fadd fast float %0, %1
75 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
76 store float %add, float* %arrayidx4, align 4
77 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
78 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
79 br i1 %exitcond, label %for.body, label %for.end.loopexit
81 for.end.loopexit: ; preds = %for.body
84 for.end: ; preds = %for.end.loopexit, %entry
88 ; Do not vectorize the epilogue for loops with VF less than the default -epilogue-vectorization-minimum-VF of 16.
89 ; CHECK-MIN-D-LABLE: @f3
90 ; CHECK-MIN-D-NOT: vector.main.loop.iter.check
91 ; CHECK-MIN-D-NOT: vec.epilog.iter.check
92 ; CHECK-MIN-D-NOT: vec.epilog.ph
93 ; CHECK-MIN-D-NOT: vec.epilog.vector.body
94 ; CHECK-MIN-D-NOT: vec.epilog.middle.block
95 ; CHECK-MIN-D: ret void
97 ; Specify a smaller minimum VF (via `-epilogue-vectorization-minimum-VF=4`) and
98 ; make sure the epilogue gets vectorized in that case.
99 ; CHECK-MIN-D-LABLE: @f3
100 ; CHECK-MIN-4: vector.main.loop.iter.check
101 ; CHECK-MIN-4: vec.epilog.iter.check
102 ; CHECK-MIN-4: vec.epilog.ph
103 ; CHECK-MIN-4: vec.epilog.vector.body
104 ; CHECK-MIN-4: vec.epilog.middle.block
105 ; CHECK-MIN-4: ret void
107 define dso_local void @f3(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) {
109 %cmp1 = icmp sgt i32 %N, 0
110 br i1 %cmp1, label %for.body.preheader, label %for.end
112 for.body.preheader: ; preds = %entry
113 %wide.trip.count = zext i32 %N to i64
116 for.body: ; preds = %for.body.preheader, %for.body
117 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
118 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
119 %0 = load float, float* %arrayidx, align 4
120 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
121 %1 = load float, float* %arrayidx2, align 4
122 %add = fadd fast float %0, %1
123 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
124 store float %add, float* %arrayidx4, align 4
125 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
126 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
127 br i1 %exitcond, label %for.body, label %for.end.loopexit
129 for.end.loopexit: ; preds = %for.body
132 for.end: ; preds = %for.end.loopexit, %entry
136 attributes #0 = { minsize }
137 attributes #1 = { optsize }