1 ; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck --check-prefixes=CHECK,PREDICATED %s
2 ; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue -S | FileCheck --check-prefixes=CHECK,SCALAR %s
4 ; This file contains the same function but with different trip-count PGO hints
6 ; The function is vectorized if there are no trip-count hints
7 define i32 @foo_no_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
8 ; CHECK-LABEL: @foo_no_trip_count(
9 ; PREDICATED: vector.body
14 for.body: ; preds = %for.body, %entry
15 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ]
16 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx
17 %0 = load i8, ptr %a.index, align 1
18 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx
19 %1 = load i8, ptr %b.index, align 1
21 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx
22 store i8 %2, ptr %c.index, align 1
23 %inc = add nsw i32 %idx, 1
24 %exitcond = icmp eq i32 %idx, %bound
25 br i1 %exitcond, label %for.end, label %for.body
27 for.end: ; preds = %for.body
31 ; If trip-count is equal to 4, the function is not vectorised
32 define i32 @foo_low_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
33 ; CHECK-LABEL: @foo_low_trip_count(
34 ; PREDICATED-NOT: vector.body
35 ; SCALAR-NOT: vector.body
39 for.body: ; preds = %for.body, %entry
40 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ]
41 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx
42 %0 = load i8, ptr %a.index, align 1
43 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx
44 %1 = load i8, ptr %b.index, align 1
46 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx
47 store i8 %2, ptr %c.index, align 1
48 %inc = add nsw i32 %idx, 1
49 %exitcond = icmp eq i32 %idx, %bound
50 br i1 %exitcond, label %for.end, label %for.body, !prof !0
52 for.end: ; preds = %for.body
56 ; If trip-count is equal to 10, the function is vectorised when predicated tail folding is chosen
57 define i32 @foo_mid_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
58 ; CHECK-LABEL: @foo_mid_trip_count(
59 ; PREDICATED: vector.body
60 ; SCALAR-NOT: vector.body
64 for.body: ; preds = %for.body, %entry
65 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ]
66 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx
67 %0 = load i8, ptr %a.index, align 1
68 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx
69 %1 = load i8, ptr %b.index, align 1
71 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx
72 store i8 %2, ptr %c.index, align 1
73 %inc = add nsw i32 %idx, 1
74 %exitcond = icmp eq i32 %idx, %bound
75 br i1 %exitcond, label %for.end, label %for.body, !prof !1
77 for.end: ; preds = %for.body
81 ; If trip-count is equal to 40, the function is always vectorised
82 define i32 @foo_high_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
83 ; CHECK-LABEL: @foo_high_trip_count(
84 ; PREDICATED: vector.body
89 for.body: ; preds = %for.body, %entry
90 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ]
91 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx
92 %0 = load i8, ptr %a.index, align 1
93 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx
94 %1 = load i8, ptr %b.index, align 1
96 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx
97 store i8 %2, ptr %c.index, align 1
98 %inc = add nsw i32 %idx, 1
99 %exitcond = icmp eq i32 %idx, %bound
100 br i1 %exitcond, label %for.end, label %for.body, !prof !2
102 for.end: ; preds = %for.body
106 !0 = !{!"branch_weights", i32 10, i32 30}
107 !1 = !{!"branch_weights", i32 10, i32 90}
108 !2 = !{!"branch_weights", i32 10, i32 390}