2 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
3 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
4 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
5 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW
7 ; Test that the MaxVF for the following loop, that has no dependence distances,
8 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
9 ; (maximized bandwidth for i8 in the loop).
10 define void @test0(i32* %a, i8* %b, i32* %c) #0 {
11 ; CHECK: LV: Checking a loop in "test0"
12 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
13 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
14 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
15 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
17 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
18 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
19 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: vscale x 16
24 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
25 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
26 %0 = load i32, i32* %arrayidx, align 4
27 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
28 %1 = load i8, i8* %arrayidx2, align 4
29 %zext = zext i8 %1 to i32
30 %add = add nsw i32 %zext, %0
31 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
32 store i32 %add, i32* %arrayidx5, align 4
33 %iv.next = add nuw nsw i64 %iv, 1
34 %exitcond.not = icmp eq i64 %iv.next, 1024
35 br i1 %exitcond.not, label %exit, label %loop
41 ; Test that the MaxVF for the following loop, with a dependence distance
42 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
43 define void @test1(i32* %a, i8* %b) #0 {
44 ; CHECK: LV: Checking a loop in "test1"
45 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
46 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
47 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
48 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
49 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
50 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
51 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
52 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
57 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
58 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
59 %0 = load i32, i32* %arrayidx, align 4
60 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
61 %1 = load i8, i8* %arrayidx2, align 4
62 %zext = zext i8 %1 to i32
63 %add = add nsw i32 %zext, %0
64 %2 = add nuw nsw i64 %iv, 64
65 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
66 store i32 %add, i32* %arrayidx5, align 4
67 %iv.next = add nuw nsw i64 %iv, 1
68 %exitcond.not = icmp eq i64 %iv.next, 1024
69 br i1 %exitcond.not, label %exit, label %loop
75 ; Test that the MaxVF for the following loop, with a dependence distance
76 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
77 define void @test2(i32* %a, i8* %b) #0 {
78 ; CHECK: LV: Checking a loop in "test2"
79 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
80 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
81 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
82 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
83 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
84 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
85 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
86 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
91 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
92 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
93 %0 = load i32, i32* %arrayidx, align 4
94 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
95 %1 = load i8, i8* %arrayidx2, align 4
96 %zext = zext i8 %1 to i32
97 %add = add nsw i32 %zext, %0
98 %2 = add nuw nsw i64 %iv, 32
99 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
100 store i32 %add, i32* %arrayidx5, align 4
101 %iv.next = add nuw nsw i64 %iv, 1
102 %exitcond.not = icmp eq i64 %iv.next, 1024
103 br i1 %exitcond.not, label %exit, label %loop
109 ; Test that the MaxVF for the following loop, with a dependence distance
110 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
111 define void @test3(i32* %a, i8* %b) #0 {
112 ; CHECK: LV: Checking a loop in "test3"
113 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
114 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
115 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
116 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
117 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
118 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
119 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
120 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
125 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
126 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
127 %0 = load i32, i32* %arrayidx, align 4
128 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
129 %1 = load i8, i8* %arrayidx2, align 4
130 %zext = zext i8 %1 to i32
131 %add = add nsw i32 %zext, %0
132 %2 = add nuw nsw i64 %iv, 16
133 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
134 store i32 %add, i32* %arrayidx5, align 4
135 %iv.next = add nuw nsw i64 %iv, 1
136 %exitcond.not = icmp eq i64 %iv.next, 1024
137 br i1 %exitcond.not, label %exit, label %loop
143 ; Test the fallback mechanism when scalable vectors are not feasible due
144 ; to e.g. dependence distance.
145 define void @test4(i32* %a, i32* %b) #0 {
146 ; CHECK: LV: Checking a loop in "test4"
147 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
148 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
149 ; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
150 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
151 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
152 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
153 ; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
154 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 4
159 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
160 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
161 %0 = load i32, i32* %arrayidx, align 4
162 %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
163 %1 = load i32, i32* %arrayidx2, align 4
164 %add = add nsw i32 %1, %0
165 %2 = add nuw nsw i64 %iv, 8
166 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
167 store i32 %add, i32* %arrayidx5, align 4
168 %iv.next = add nuw nsw i64 %iv, 1
169 %exitcond.not = icmp eq i64 %iv.next, 1024
170 br i1 %exitcond.not, label %exit, label %loop
176 attributes #0 = { vscale_range(0, 16) }