2 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
3 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
4 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
6 ; Test that the MaxVF for the following loop, that has no dependence distances,
7 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
8 ; (maximized bandwidth for i8 in the loop).
9 define void @test0(ptr %a, ptr %b, ptr %c) #0 {
10 ; CHECK: LV: Checking a loop in 'test0'
11 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
12 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
13 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
14 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
15 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
16 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
21 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
22 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
23 %0 = load i32, ptr %arrayidx, align 4
24 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
25 %1 = load i8, ptr %arrayidx2, align 4
26 %zext = zext i8 %1 to i32
27 %add = add nsw i32 %zext, %0
28 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
29 store i32 %add, ptr %arrayidx5, align 4
30 %iv.next = add nuw nsw i64 %iv, 1
31 %exitcond.not = icmp eq i64 %iv.next, 1024
32 br i1 %exitcond.not, label %exit, label %loop
38 ; Test that the MaxVF for the following loop, with a dependence distance
39 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
40 define void @test1(ptr %a, ptr %b) #0 {
41 ; CHECK: LV: Checking a loop in 'test1'
42 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
43 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
44 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
45 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
46 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
47 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
52 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
53 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
54 %0 = load i32, ptr %arrayidx, align 4
55 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
56 %1 = load i8, ptr %arrayidx2, align 4
57 %zext = zext i8 %1 to i32
58 %add = add nsw i32 %zext, %0
59 %2 = add nuw nsw i64 %iv, 64
60 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
61 store i32 %add, ptr %arrayidx5, align 4
62 %iv.next = add nuw nsw i64 %iv, 1
63 %exitcond.not = icmp eq i64 %iv.next, 1024
64 br i1 %exitcond.not, label %exit, label %loop
70 ; Test that the MaxVF for the following loop, with a dependence distance
71 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
72 define void @test2(ptr %a, ptr %b) #0 {
73 ; CHECK: LV: Checking a loop in 'test2'
74 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
75 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
76 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
77 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
78 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
79 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
84 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
85 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
86 %0 = load i32, ptr %arrayidx, align 4
87 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
88 %1 = load i8, ptr %arrayidx2, align 4
89 %zext = zext i8 %1 to i32
90 %add = add nsw i32 %zext, %0
91 %2 = add nuw nsw i64 %iv, 32
92 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
93 store i32 %add, ptr %arrayidx5, align 4
94 %iv.next = add nuw nsw i64 %iv, 1
95 %exitcond.not = icmp eq i64 %iv.next, 1024
96 br i1 %exitcond.not, label %exit, label %loop
102 ; Test that the MaxVF for the following loop, with a dependence distance
103 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
104 define void @test3(ptr %a, ptr %b) #0 {
105 ; CHECK: LV: Checking a loop in 'test3'
106 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
107 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
108 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
109 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
110 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
111 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
116 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
117 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
118 %0 = load i32, ptr %arrayidx, align 4
119 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
120 %1 = load i8, ptr %arrayidx2, align 4
121 %zext = zext i8 %1 to i32
122 %add = add nsw i32 %zext, %0
123 %2 = add nuw nsw i64 %iv, 16
124 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
125 store i32 %add, ptr %arrayidx5, align 4
126 %iv.next = add nuw nsw i64 %iv, 1
127 %exitcond.not = icmp eq i64 %iv.next, 1024
128 br i1 %exitcond.not, label %exit, label %loop
134 ; Test the fallback mechanism when scalable vectors are not feasible due
135 ; to e.g. dependence distance.
136 define void @test4(ptr %a, ptr %b) #0 {
137 ; CHECK: LV: Checking a loop in 'test4'
138 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
139 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
140 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
141 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
142 ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
143 ; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
144 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
149 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
150 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
151 %0 = load i32, ptr %arrayidx, align 4
152 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
153 %1 = load i32, ptr %arrayidx2, align 4
154 %add = add nsw i32 %1, %0
155 %2 = add nuw nsw i64 %iv, 8
156 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
157 store i32 %add, ptr %arrayidx5, align 4
158 %iv.next = add nuw nsw i64 %iv, 1
159 %exitcond.not = icmp eq i64 %iv.next, 1024
160 br i1 %exitcond.not, label %exit, label %loop
166 attributes #0 = { vscale_range(1, 16) }