llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll

   1 ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
   2 ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
   3 ; REQUIRES: asserts
   4
   5 @a = global [1024 x i8] zeroinitializer, align 16
   6 @b = global [1024 x i8] zeroinitializer, align 16
   7
   8 define i32 @foo() {
   9 ; CHECK-LABEL: foo
  10
  11 ; CHECK-PWR8: Setting best plan to VF=16, UF=4
  12
  13 ; CHECK-PWR9: Setting best plan to VF=8, UF=8
  14
  15
  16 entry:
  17   br label %for.body
  18
  19 for.cond.cleanup:
  20   %add.lcssa = phi i32 [ %add, %for.body ]
  21   ret i32 %add.lcssa
  22
  23 for.body:
  24   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  25   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
  26   %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
  27   %0 = load i8, i8* %arrayidx, align 1
  28   %conv = zext i8 %0 to i32
  29   %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
  30   %1 = load i8, i8* %arrayidx2, align 1
  31   %conv3 = zext i8 %1 to i32
  32   %sub = sub nsw i32 %conv, %conv3
  33   %ispos = icmp sgt i32 %sub, -1
  34   %neg = sub nsw i32 0, %sub
  35   %2 = select i1 %ispos, i32 %sub, i32 %neg
  36   %add = add nsw i32 %2, %s.015
  37   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  38   %exitcond = icmp eq i64 %indvars.iv.next, 1024
  39   br i1 %exitcond, label %for.cond.cleanup, label %for.body
  40 }
  41
  42 define i32 @goo() {
  43 ; For indvars.iv used in a computating chain only feeding into getelementptr or cmp,
  44 ; it will not have vector version and the vector register usage will not exceed the
  45 ; available vector register number.
  46
  47 ; CHECK-LABEL: goo
  48
  49 ; CHECK: Setting best plan to VF=16, UF=4
  50
  51 entry:
  52   br label %for.body
  53
  54 for.cond.cleanup:                                 ; preds = %for.body
  55   %add.lcssa = phi i32 [ %add, %for.body ]
  56   ret i32 %add.lcssa
  57
  58 for.body:                                         ; preds = %for.body, %entry
  59   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  60   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
  61   %tmp1 = add nsw i64 %indvars.iv, 3
  62   %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
  63   %tmp = load i8, i8* %arrayidx, align 1
  64   %conv = zext i8 %tmp to i32
  65   %tmp2 = add nsw i64 %indvars.iv, 2
  66   %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
  67   %tmp3 = load i8, i8* %arrayidx2, align 1
  68   %conv3 = zext i8 %tmp3 to i32
  69   %sub = sub nsw i32 %conv, %conv3
  70   %ispos = icmp sgt i32 %sub, -1
  71   %neg = sub nsw i32 0, %sub
  72   %tmp4 = select i1 %ispos, i32 %sub, i32 %neg
  73   %add = add nsw i32 %tmp4, %s.015
  74   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  75   %exitcond = icmp eq i64 %indvars.iv.next, 1024
  76   br i1 %exitcond, label %for.cond.cleanup, label %for.body
  77 }
  78
  79 define i64 @bar(i64* nocapture %a) {
  80 ; CHECK-LABEL: bar
  81
  82 ; CHECK: Setting best plan to VF=2, UF=12
  83
  84 entry:
  85   br label %for.body
  86
  87 for.cond.cleanup:
  88   %add2.lcssa = phi i64 [ %add2, %for.body ]
  89   ret i64 %add2.lcssa
  90
  91 for.body:
  92   %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  93   %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
  94   %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
  95   %0 = load i64, i64* %arrayidx, align 8
  96   %add = add nsw i64 %0, %i.012
  97   store i64 %add, i64* %arrayidx, align 8
  98   %add2 = add nsw i64 %add, %s.011
  99   %inc = add nuw nsw i64 %i.012, 1
 100   %exitcond = icmp eq i64 %inc, 1024
 101   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 102 }
 103
 104 @d = external global [0 x i64], align 8
 105 @e = external global [0 x i32], align 4
 106 @c = external global [0 x i32], align 4
 107
 108 define void @hoo(i32 %n) {
 109 ; CHECK-LABEL: hoo
 110 ; CHECK: Setting best plan to VF=1, UF=12
 111
 112 entry:
 113   br label %for.body
 114
 115 for.body:                                         ; preds = %for.body, %entry
 116   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 117   %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
 118   %tmp = load i64, i64* %arrayidx, align 8
 119   %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
 120   %tmp1 = load i32, i32* %arrayidx1, align 4
 121   %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
 122   store i32 %tmp1, i32* %arrayidx3, align 4
 123   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 124   %exitcond = icmp eq i64 %indvars.iv.next, 10000
 125   br i1 %exitcond, label %for.end, label %for.body
 126
 127 for.end:                                          ; preds = %for.body
 128   ret void
 129 }
 130
 131 define float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
 132 ;CHECK-LABEL: float_
 133 ;CHECK: LV(REG): VF = 1
 134 ;CHECK: LV(REG): Found max usage: 2 item
 135 ;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
 136 ;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers
 137 ;CHECK: LV(REG): Found invariant usage: 1 item
 138 ;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
 139
 140 entry:
 141   %cmp = icmp sgt i32 %n, 0
 142   br i1 %cmp, label %preheader, label %for.end
 143
 144 preheader:
 145   %t0 = sext i32 %n to i64
 146   br label %for
 147
 148 for:
 149   %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
 150   %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
 151   %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
 152   %t1 = load float, float* %arrayidx, align 4
 153   %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
 154   %t2 = load float, float* %arrayidx3, align 4
 155   %add = fadd fast float %t1, %s.02
 156   %add4 = fadd fast float %add, %t2
 157   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
 158   %cmp1 = icmp slt i64 %indvars.iv.next, %t0
 159   br i1 %cmp1, label %for, label %loopexit
 160
 161 loopexit:
 162   %add4.lcssa = phi float [ %add4, %for ]
 163   br label %for.end
 164
 165 for.end:
 166   %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
 167   ret float %s.0.lcssa
 168 }
 169
 170
 171 define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
 172 ;CHECK-LABEL: double_
 173 ;CHECK-PWR8: LV(REG): VF = 2
 174 ;CHECK-PWR8: LV(REG): Found max usage: 2 item
 175 ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
 176 ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
 177 ;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
 178 ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
 179
 180 ;CHECK-PWR9: LV(REG): VF = 1
 181 ;CHECK-PWR9: LV(REG): Found max usage: 2 item
 182 ;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
 183 ;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
 184 ;CHECK-PWR9: LV(REG): Found invariant usage: 1 item
 185 ;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
 186
 187   %1 = sext i32 %n to i64
 188   br label %2
 189
 190 ; <label>:2                                       ; preds = %2, %0
 191   %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
 192   %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
 193   %4 = load double, double* %3, align 8
 194   %5 = fadd double %4, 3.000000e+00
 195   %6 = fmul double %4, 2.000000e+00
 196   %7 = fadd double %5, %6
 197   %8 = fadd double %7, 2.000000e+00
 198   %9 = fmul double %8, 5.000000e-01
 199   %10 = fadd double %6, %9
 200   %11 = fsub double %10, %5
 201   %12 = fadd double %4, %11
 202   %13 = fdiv double %8, %12
 203   %14 = fmul double %13, %8
 204   %15 = fmul double %6, %14
 205   %16 = fmul double %5, %15
 206   %17 = fadd double %16, -3.000000e+00
 207   %18 = fsub double %4, %5
 208   %19 = fadd double %6, %18
 209   %20 = fadd double %13, %19
 210   %21 = fadd double %20, %17
 211   %22 = fadd double %21, 3.000000e+00
 212   %23 = fmul double %4, %22
 213   store double %23, double* %3, align 8
 214   %indvars.iv.next = add i64 %indvars.iv, -1
 215   %24 = trunc i64 %indvars.iv to i32
 216   %25 = icmp eq i32 %24, 0
 217   br i1 %25, label %26, label %2
 218
 219 ; <label>:26                                      ; preds = %2
 220   ret void
 221 }
 222
 223 define ppc_fp128 @fp128_(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
 224 ;CHECK-LABEL: fp128_
 225 ;CHECK: LV(REG): VF = 1
 226 ;CHECK: LV(REG): Found max usage: 2 item
 227 ;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
 228 ;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers
 229 entry:
 230   br label %for.body
 231
 232 for.body:                                         ; preds = %for.body, %entry
 233   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
 234   %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ]
 235   %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
 236   %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
 237   %sub = fsub fast ppc_fp128 %x.05, %0
 238   %inc = add nsw i32 %i.06, 1
 239   %exitcond = icmp eq i32 %inc, 2048
 240   br i1 %exitcond, label %for.end, label %for.body
 241
 242 for.end:                                          ; preds = %for.body
 243   ret ppc_fp128 %sub
 244 }
 245
 246
 247 define void @fp16_(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
 248 ;CHECK-LABEL: fp16_
 249 ;CHECK: LV(REG): VF = 1
 250 ;CHECK: LV(REG): Found max usage: 2 item
 251 ;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers
 252 ;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers
 253 entry:
 254   %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
 255   %0 = bitcast i16 %tmp.0.extract.trunc to half
 256   %mul = mul i32 %numCols, %numRows
 257   %shr = lshr i32 %mul, 2
 258   %cmp26 = icmp eq i32 %shr, 0
 259   br i1 %cmp26, label %while.end, label %while.body
 260
 261 while.body:                                       ; preds = %entry, %while.body
 262   %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
 263   %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ]
 264   %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
 265   %1 = load half, half* %pIn.addr.029, align 2
 266   %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1
 267   %2 = load half, half* %arrayidx2, align 2
 268   %mul3 = fmul half %1, %0
 269   %mul4 = fmul half %2, %0
 270   store half %mul3, half* %pOut.addr.028, align 2
 271   %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1
 272   store half %mul4, half* %arrayidx6, align 2
 273   %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2
 274   %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2
 275   %dec = add nsw i32 %blkCnt.027, -1
 276   %cmp = icmp eq i32 %dec, 0
 277   br i1 %cmp, label %while.end, label %while.body
 278
 279 while.end:                                        ; preds = %while.body, %entry
 280   ret void
 281 }