2 ; RUN: opt -passes=loop-vectorize -mcpu=neoverse-v1 -disable-output %s -debug \
3 ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue 2>&1 | FileCheck %s
5 target triple="aarch64--linux-gnu"
7 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'
8 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx3, align 4
9 define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
13 for.body: ; preds = %entry, %for.body
14 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
15 %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
16 %0 = load i64, ptr %arrayidx, align 8
17 %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0
18 %1 = load float, ptr %arrayidx3, align 4
19 %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
20 store float %1, ptr %arrayidx5, align 4
21 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
22 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
23 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
25 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
29 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index'
30 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, ptr %arrayidx5, align 4
31 define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
35 for.body: ; preds = %entry, %for.body
36 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
37 %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
38 %0 = load i64, ptr %arrayidx, align 8
39 %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
40 %1 = load float, ptr %arrayidx3, align 4
41 %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %0
42 store float %1, ptr %arrayidx5, align 4
43 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
44 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
45 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
47 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
51 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
52 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1.
53 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride'
54 ; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
55 define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
59 for.body: ; preds = %entry, %for.body
60 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
61 %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
62 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
63 %0 = load float, ptr %arrayidx, align 4
64 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
65 store float %0, ptr %arrayidx2, align 4
66 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
67 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
68 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
70 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
74 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
75 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1.
76 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride'
77 ; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
78 define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
82 for.body: ; preds = %entry, %for.body
83 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
84 %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
85 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
86 %0 = load float, ptr %arrayidx, align 4
87 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
88 store float %0, ptr %arrayidx2, align 4
89 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
90 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
91 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
93 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
97 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
98 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
99 define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
103 for.body: ; preds = %entry, %for.body
104 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
105 %indvars.iv.stride2 = mul i64 %indvars.iv, 2
106 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
107 %0 = load float, ptr %arrayidx, align 4
108 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
109 store float %0, ptr %arrayidx2, align 4
110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
111 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
112 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
114 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
118 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2'
119 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
120 define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
124 for.body: ; preds = %entry, %for.body
125 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
126 %indvars.iv.stride2 = mul i64 %indvars.iv, 2
127 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
128 %0 = load float, ptr %arrayidx, align 4
129 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
130 store float %0, ptr %arrayidx2, align 4
131 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
132 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
133 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
135 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
140 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64'
141 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
142 define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
146 for.body: ; preds = %entry, %for.body
147 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
148 %indvars.iv.stride2 = mul i64 %indvars.iv, 64
149 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
150 %0 = load float, ptr %arrayidx, align 4
151 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
152 store float %0, ptr %arrayidx2, align 4
153 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
154 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
155 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
157 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
161 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64'
162 ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
163 define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
167 for.body: ; preds = %entry, %for.body
168 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
169 %indvars.iv.stride2 = mul i64 %indvars.iv, 64
170 %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
171 %0 = load float, ptr %arrayidx, align 4
172 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
173 store float %0, ptr %arrayidx2, align 4
174 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
175 %exitcond.not = icmp eq i64 %indvars.iv.next, %n
176 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
178 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
183 attributes #0 = { vscale_range(1, 16) "target-features"="+sve" }
185 !0 = distinct !{!0, !1, !2, !3, !4, !5}
186 !1 = !{!"llvm.loop.mustprogress"}
187 !2 = !{!"llvm.loop.vectorize.width", i32 4}
188 !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
189 !4 = !{!"llvm.loop.interleave.count", i32 1}
190 !5 = !{!"llvm.loop.vectorize.enable", i1 true}