test/Transforms/LoopVectorize/tbaa-nodep.ll

   1 ; RUN: opt < %s  -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
   2 ; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   4
   5 ; Function Attrs: nounwind uwtable
   6 define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
   7 entry:
   8   br label %for.body
   9
  10 for.body:                                         ; preds = %for.body, %entry
  11   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  12   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
  13   %0 = load float, float* %arrayidx, align 4, !tbaa !0
  14   %conv = fptosi float %0 to i32
  15   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  16   store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
  17   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  18   %exitcond = icmp eq i64 %indvars.iv.next, 1600
  19   br i1 %exitcond, label %for.end, label %for.body
  20
  21 for.end:                                          ; preds = %for.body
  22   ret i32 0
  23
  24 ; TBAA partitions the accesses in this loop, so it can be vectorized without
  25 ; runtime checks.
  26
  27 ; CHECK-LABEL: @test1
  28 ; CHECK: entry:
  29 ; CHECK-NEXT: br label %vector.body
  30 ; CHECK: vector.body:
  31
  32 ; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
  33 ; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
  34
  35 ; CHECK: ret i32 0
  36
  37 ; CHECK-NOTBAA-LABEL: @test1
  38 ; CHECK-NOTBAA: icmp ugt i32*
  39
  40 ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
  41 ; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
  42
  43 ; CHECK-NOTBAA: ret i32 0
  44 }
  45
  46 ; Function Attrs: nounwind uwtable
  47 define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 {
  48 entry:
  49   br label %for.body
  50
  51 for.body:                                         ; preds = %for.body, %entry
  52   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  53   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
  54   %0 = load float, float* %arrayidx, align 4, !tbaa !0
  55   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  56   %1 = load i32, i32* %arrayidx2, align 4, !tbaa !4
  57   %conv = sitofp i32 %1 to float
  58   %mul = fmul float %0, %conv
  59   %arrayidx4 = getelementptr inbounds float, float* %c, i64 %indvars.iv
  60   store float %mul, float* %arrayidx4, align 4, !tbaa !0
  61   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  62   %exitcond = icmp eq i64 %indvars.iv.next, 1600
  63   br i1 %exitcond, label %for.end, label %for.body
  64
  65 for.end:                                          ; preds = %for.body
  66   ret i32 0
  67
  68 ; This test is like the first, except here there is still one runtime check
  69 ; required. Without TBAA, however, two checks are required.
  70
  71 ; CHECK-LABEL: @test2
  72 ; CHECK: icmp ugt float*
  73 ; CHECK: icmp ugt float*
  74 ; CHECK-NOT: icmp uge i32*
  75
  76 ; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
  77 ; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
  78
  79 ; CHECK: ret i32 0
  80
  81 ; CHECK-NOTBAA-LABEL: @test2
  82 ; CHECK-NOTBAA: icmp ugt float*
  83 ; CHECK-NOTBAA: icmp ugt float*
  84 ; CHECK-NOTBAA-DAG: icmp ugt float*
  85 ; CHECK-NOTBAA-DAG: icmp ugt i32*
  86
  87 ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
  88 ; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
  89
  90 ; CHECK-NOTBAA: ret i32 0
  91 }
  92
  93 attributes #0 = { nounwind uwtable }
  94
  95 !0 = !{!1, !1, i64 0}
  96 !1 = !{!"float", !2, i64 0}
  97 !2 = !{!"omnipotent char", !3, i64 0}
  98 !3 = !{!"Simple C/C++ TBAA"}
  99 !4 = !{!5, !5, i64 0}
 100 !5 = !{!"int", !2, i64 0}
 101