test/Transforms/LoopVectorize/scalar_after_vectorization.ll

   1 ; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -instcombine -S | FileCheck %s
   2 ; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s --check-prefix=NO-IC
   3
   4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
   5
   6 ; CHECK-LABEL: @scalar_after_vectorization_0
   7 ;
   8 ; CHECK: vector.body:
   9 ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  10 ; CHECK:   %offset.idx = or i64 %index, 1
  11 ; CHECK:   %[[T2:.+]] = add nuw nsw i64 %offset.idx, %tmp0
  12 ; CHECK:   %[[T3:.+]] = sub nsw i64 %[[T2]], %x
  13 ; CHECK:   %[[T4:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T3]]
  14 ; CHECK:   %[[T5:.+]] = bitcast i32* %[[T4]] to <4 x i32>*
  15 ; CHECK:   load <4 x i32>, <4 x i32>* %[[T5]], align 4
  16 ; CHECK:   %[[T6:.+]] = getelementptr inbounds i32, i32* %[[T4]], i64 4
  17 ; CHECK:   %[[T7:.+]] = bitcast i32* %[[T6]] to <4 x i32>*
  18 ; CHECK:   load <4 x i32>, <4 x i32>* %[[T7]], align 4
  19 ; CHECK:   br {{.*}}, label %middle.block, label %vector.body
  20 ;
  21 ; NO-IC-LABEL: @scalar_after_vectorization_0
  22 ;
  23 ; NO-IC: vector.body:
  24 ; NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  25 ; NO-IC:   %offset.idx = add i64 1, %index
  26 ; NO-IC:   %[[T2:.+]] = add i64 %offset.idx, 0
  27 ; NO-IC:   %[[T3:.+]] = add i64 %offset.idx, 4
  28 ; NO-IC:   %[[T4:.+]] = add nuw nsw i64 %[[T2]], %tmp0
  29 ; NO-IC:   %[[T5:.+]] = add nuw nsw i64 %[[T3]], %tmp0
  30 ; NO-IC:   %[[T6:.+]] = sub nsw i64 %[[T4]], %x
  31 ; NO-IC:   %[[T7:.+]] = sub nsw i64 %[[T5]], %x
  32 ; NO-IC:   %[[T8:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T6]]
  33 ; NO-IC:   %[[T9:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T7]]
  34 ; NO-IC:   %[[T10:.+]] = getelementptr inbounds i32, i32* %[[T8]], i32 0
  35 ; NO-IC:   %[[T11:.+]] = bitcast i32* %[[T10]] to <4 x i32>*
  36 ; NO-IC:   load <4 x i32>, <4 x i32>* %[[T11]], align 4
  37 ; NO-IC:   %[[T12:.+]] = getelementptr inbounds i32, i32* %[[T8]], i32 4
  38 ; NO-IC:   %[[T13:.+]] = bitcast i32* %[[T12]] to <4 x i32>*
  39 ; NO-IC:   load <4 x i32>, <4 x i32>* %[[T13]], align 4
  40 ; NO-IC:   br {{.*}}, label %middle.block, label %vector.body
  41 ;
  42 define void @scalar_after_vectorization_0(i32* noalias %a, i32* noalias %b, i64 %x, i64 %y) {
  43
  44 outer.ph:
  45   br label %outer.body
  46
  47 outer.body:
  48   %i = phi i64 [ 1, %outer.ph ], [ %i.next, %inner.end ]
  49   %tmp0 = mul nuw nsw i64 %i, %x
  50   br label %inner.ph
  51
  52 inner.ph:
  53   br label %inner.body
  54
  55 inner.body:
  56   %j = phi i64 [ 1, %inner.ph ], [ %j.next, %inner.body ]
  57   %tmp1 = add nuw nsw i64 %j, %tmp0
  58   %tmp2 = sub nsw i64 %tmp1, %x
  59   %tmp3 = getelementptr inbounds i32, i32* %a, i64 %tmp2
  60   %tmp4 = load i32, i32* %tmp3, align 4
  61   %tmp5 = getelementptr inbounds i32, i32* %b, i64 %tmp1
  62   store i32 %tmp4, i32* %tmp5, align 4
  63   %j.next = add i64 %j, 1
  64   %cond.j = icmp slt i64 %j.next, %y
  65   br i1 %cond.j, label %inner.body, label %inner.end
  66
  67 inner.end:
  68   %i.next = add i64 %i, 1
  69   %cond.i = icmp slt i64 %i.next, %y
  70   br i1 %cond.i, label %outer.body, label %outer.end
  71
  72 outer.end:
  73   ret void
  74 }