1 ; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF1
2 ; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF2
4 ; CHECKUF1: for.body.preheader:
5 ; CHECKUF1-DAG: %wide.trip.count = zext nneg i32 %N to i64
6 ; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
7 ; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2
8 ; CHECKUF1-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX4]], %wide.trip.count
10 ; CHECKUF1: vector.ph:
11 ; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
12 ; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2
13 ; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]]
14 ; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf
15 ; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
16 ; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2
18 ; CHECKUF1: vector.body:
19 ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
20 ; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
21 ; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
22 ; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00)
23 ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
24 ; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
25 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]]
26 ; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec
27 ; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
30 ; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2).
31 ; There is also the increment for the next iteration, e.g. instead of indexing IDXB, it indexes at IDXB + vscale * 4.
33 ; CHECKUF2: for.body.preheader:
34 ; CHECKUF2-DAG: %wide.trip.count = zext nneg i32 %N to i64
35 ; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
36 ; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3
37 ; CHECKUF2-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX8]], %wide.trip.count
39 ; CHECKUF2: vector.ph:
40 ; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
41 ; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3
42 ; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]]
43 ; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf
44 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
45 ; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3
47 ; CHECKUF2: vector.body:
48 ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
49 ; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
50 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
51 ; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 5
52 ; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds i8, ptr %[[IDXB]], i64 %[[VSCALE2]]
53 ; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
54 ; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8
55 ; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00)
56 ; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, splat (double 1.000000e+00)
57 ; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
58 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
59 ; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 5
60 ; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds i8, ptr %[[IDXA]], i64 %[[VSCALE2]]
61 ; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
62 ; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8
63 ; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]]
64 ; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec
65 ; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
67 define void @loop(i32 %N, ptr nocapture %a, ptr nocapture readonly %b) {
69 %cmp7 = icmp sgt i32 %N, 0
70 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
72 for.body.preheader: ; preds = %entry
73 %wide.trip.count = zext i32 %N to i64
76 for.cond.cleanup: ; preds = %for.body, %entry
79 for.body: ; preds = %for.body.preheader, %for.body
80 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
81 %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
82 %0 = load double, ptr %arrayidx, align 8
83 %add = fadd double %0, 1.000000e+00
84 %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
85 store double %add, ptr %arrayidx2, align 8
86 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
87 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
88 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
91 !1 = distinct !{!1, !2}
92 !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}