1 ; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
3 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
4 target triple = "aarch64-unknown-linux-gnu"
6 define noundef i32 @V1(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) #0 {
8 ; CHECK-NOT: vec.epilog.ph:
9 ; CHECK-NOT: vec.epilog.vector.body:
10 ; CHECK-NOT: vec.epilog.middle.block:
11 ; CHECK-NOT: vec.epilog.scalar.ph:
14 %4 = icmp sgt i32 %2, 0
15 br i1 %4, label %5, label %8
18 %6 = zext nneg i32 %2 to i64
28 %10 = phi i64 [ 0, %5 ], [ %16, %9 ]
29 %11 = getelementptr inbounds double, ptr %0, i64 %10
30 %12 = load double, ptr %11, align 8
31 %13 = getelementptr inbounds double, ptr %1, i64 %10
32 %14 = load double, ptr %13, align 8
33 %15 = fadd fast double %14, %12
34 store double %15, ptr %11, align 8
35 %16 = add nuw nsw i64 %10, 1
36 %17 = icmp eq i64 %16, %6
37 br i1 %17, label %7, label %9
40 define noundef i32 @V2(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) #1 {
43 ; CHECK: vec.epilog.ph:
44 ; CHECK: vec.epilog.vector.body:
45 ; CHECK: vec.epilog.middle.block:
46 ; CHECK: vec.epilog.scalar.ph:
49 %4 = icmp sgt i32 %2, 0
50 br i1 %4, label %5, label %8
53 %6 = zext nneg i32 %2 to i64
63 %10 = phi i64 [ 0, %5 ], [ %16, %9 ]
64 %11 = getelementptr inbounds double, ptr %0, i64 %10
65 %12 = load double, ptr %11, align 8
66 %13 = getelementptr inbounds double, ptr %1, i64 %10
67 %14 = load double, ptr %13, align 8
68 %15 = fadd fast double %14, %12
69 store double %15, ptr %11, align 8
70 %16 = add nuw nsw i64 %10, 1
71 %17 = icmp eq i64 %16, %6
72 br i1 %17, label %7, label %9
75 ; TODO: The V3 will generate a scalable vector body, so doesn't need a
76 ; epilogue loop, but will need to be checked that is really the best thing to
79 define noundef i32 @V3(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) #2 {
82 ; CHECK-NOT: vec.epilog.ph:
83 ; CHECK-NOT: vec.epilog.vector.body:
84 ; CHECK-NOT: vec.epilog.middle.block:
85 ; CHECK-NOT: vec.epilog.scalar.ph:
88 %4 = icmp sgt i32 %2, 0
89 br i1 %4, label %5, label %8
92 %6 = zext nneg i32 %2 to i64
102 %10 = phi i64 [ 0, %5 ], [ %16, %9 ]
103 %11 = getelementptr inbounds double, ptr %0, i64 %10
104 %12 = load double, ptr %11, align 8
105 %13 = getelementptr inbounds double, ptr %1, i64 %10
106 %14 = load double, ptr %13, align 8
107 %15 = fadd fast double %14, %12
108 store double %15, ptr %11, align 8
109 %16 = add nuw nsw i64 %10, 1
110 %17 = icmp eq i64 %16, %6
111 br i1 %17, label %7, label %9
114 attributes #0 = { vscale_range(1,16) "target-cpu"="neoverse-v1" "target-features"="+sve2" }
116 attributes #1 = { vscale_range(1,16) "target-cpu"="neoverse-v2" "target-features"="+sve2" }
118 attributes #2 = { vscale_range(1,16) "target-cpu"="neoverse-v3" "target-features"="+sve2" }