llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll

   1 ; REQUIRES: asserts
   2 ; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
   3
   4 target triple="aarch64--linux-gnu"
   5
   6 ; This test shows that comparison and next iteration IV have zero cost if the
   7 ; vector loop gets executed exactly once with the given VF.
   8 define i64 @test(ptr %a, ptr %b) #0 {
   9 ; CHECK-LABEL: LV: Checking a loop in 'test'
  10 ; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
  11 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  12 ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
  13 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  14 ; CHECK: Cost for VF 8: 26
  15 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  16 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  17 ; CHECK: Cost for VF 16: 48
  18 ; CHECK: LV: Selecting VF: 16
  19 entry:
  20   br label %for.body
  21
  22 exit:                                 ; preds = %for.body
  23   ret i64 %add
  24
  25 for.body:                                         ; preds = %entry, %for.body
  26   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  27   %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
  28   %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
  29   %0 = load i8, ptr %arrayidx, align 1
  30   %conv = zext i8 %0 to i64
  31   %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %i.iv
  32   %1 = load i8, ptr %arrayidx2, align 1
  33   %conv3 = zext i8 %1 to i64
  34   %mul = mul nuw nsw i64 %conv3, %conv
  35   %add = add i64 %mul, %sum
  36   %i.iv.next = add nuw nsw i64 %i.iv, 1
  37   %exitcond.not = icmp eq i64 %i.iv.next, 16
  38   br i1 %exitcond.not, label %exit, label %for.body
  39 }
  40
  41 ; Same as above, but in the next iteration IV has extra users, and thus, the cost is not zero.
  42 define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
  43 ; CHECK-LABEL: LV: Checking a loop in 'test_external_iv_user'
  44 ; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
  45 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  46 ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
  47 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  48 ; CHECK: Cost for VF 8: 26
  49 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
  50 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  51 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  52 ; CHECK: Cost for VF 16: 49
  53 ; CHECK: LV: Selecting VF: vscale x 2
  54 entry:
  55   br label %for.body
  56
  57 for.body:                                         ; preds = %entry, %for.body
  58   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  59   %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
  60   %arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv
  61   %0 = load i8, ptr %arrayidx, align 1
  62   %conv = zext i8 %0 to i64
  63   %i.iv.next = add nuw nsw i64 %i.iv, 1
  64   %arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next
  65   %1 = load i8, ptr %arrayidx2, align 1
  66   %conv3 = zext i8 %1 to i64
  67   %mul = mul nuw nsw i64 %conv3, %conv
  68   %add = add i64 %sum, %mul
  69   %exitcond.not = icmp eq i64 %i.iv.next, 16
  70   br i1 %exitcond.not, label %exit, label %for.body
  71
  72 exit:                                 ; preds = %for.body
  73   ret i64 %add
  74 }
  75
  76 ; Same as above but with two IVs without extra users. They all have zero cost when VF equals the number of iterations.
  77 define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
  78 ; CHECK-LABEL: LV: Checking a loop in 'test_two_ivs'
  79 ; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
  80 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  81 ; CHECK-NEXT: Cost of 1 for VF 8: induction instruction   %j.iv.next = add nuw nsw i64 %j.iv, 1
  82 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
  83 ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
  84 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  85 ; CHECK: Cost for VF 8: 27
  86 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  87 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
  88 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
  89 ; CHECK: Cost for VF 16: 48
  90 ; CHECK: LV: Selecting VF: 16
  91 entry:
  92   br label %for.body
  93
  94 exit:                                 ; preds = %for.body
  95   ret i64 %add
  96
  97 for.body:                                         ; preds = %entry, %for.body
  98   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
  99   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
 100   %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
 101   %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
 102   %0 = load i8, ptr %arrayidx, align 1
 103   %conv = zext i8 %0 to i64
 104   %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %j.iv
 105   %1 = load i8, ptr %arrayidx2, align 1
 106   %conv3 = zext i8 %1 to i64
 107   %mul = mul nuw nsw i64 %conv3, %conv
 108   %add = add i64 %mul, %sum
 109   %i.iv.next = add nuw nsw i64 %i.iv, 1
 110   %j.iv.next = add nuw nsw i64 %j.iv, 1
 111   %exitcond.not = icmp eq i64 %i.iv.next, 16
 112   br i1 %exitcond.not, label %exit, label %for.body
 113 }
 114
 115 define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src) {
 116 ; CHECK-LABEL: LV: Checking a loop in 'test_extra_cmp_user'
 117 ; CHECK: Cost of 4 for VF 8: induction instruction   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 118 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 119 ; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %indvars.iv.next, 16
 120 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 121 ; CHECK: Cost for VF 8: 12
 122 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 123 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 124 ; CHECK: Cost for VF 16: 4
 125 ; CHECK: LV: Selecting VF: 16
 126 entry:
 127   br label %for.body
 128
 129 for.body:
 130   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 131   %arrayidx = getelementptr inbounds nuw i8, ptr %src, i64 %indvars.iv
 132   %0 = load i8, ptr %arrayidx, align 4
 133   %arrayidx2 = getelementptr inbounds nuw i8, ptr %dst, i64 %indvars.iv
 134   %1 = load i8, ptr %arrayidx2, align 4
 135   %add = add nsw i8 %1, %0
 136   store i8 %add, ptr %arrayidx2, align 4
 137   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 138   %exitcond.not = icmp eq i64 %indvars.iv.next, 16
 139   br i1 %exitcond.not, label %exit, label %for.body
 140
 141 exit:
 142   ret i1 %exitcond.not
 143 }
 144
 145 attributes #0 = { vscale_range(1, 16) "target-features"="+sve" }