llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll

   1 ; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
   2
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
   4 target triple = "x86_64-unknown-linux-gnu"
   5
   6 ; PR34965/D39346
   7
   8 ; LV retains the original scalar loop intact as remainder loop. However,
   9 ; after this transformation, analysis information concerning the remainder
  10 ; loop may differ from the original scalar loop. This test is an example of
  11 ; that behaviour, where values inside the remainder loop which SCEV could
  12 ; originally analyze now require flow-sensitive analysis currently not
  13 ; supported in SCEV. In particular, during LV code generation, after turning
  14 ; the original scalar loop into the remainder loop, LV expected
  15 ; Legal->isConsecutivePtr() to be consistent and return the same output as
  16 ; during legal/cost model phases (original scalar loop). Unfortunately, that
  17 ; condition was not satisfied because of the aforementioned SCEV limitation.
  18 ; After D39346, LV code generation doesn't rely on Legal->isConsecutivePtr(),
  19 ; i.e., SCEV. This test verifies that LV is able to handle the described cases.
  20 ;
  21 ; TODO: The SCEV limitation described before may affect plans to further
  22 ; optimize the remainder loop of this particular test case. One tentative
  23 ; solution is to detect the problematic IVs in LV (%7 and %8) and perform an
  24 ; in-place IV optimization by replacing:
  25 ;   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] with
  26 ; with
  27 ;   %8 = sub i32 %7, 1.
  28
  29
  30 ; Verify that store is vectorized as stride-1 memory access.
  31
  32 ; CHECK-LABEL: @test_01(
  33 ; CHECK-NOT: vector.body:
  34
  35 ; This test was originally vectorized, but now SCEV is smart enough to prove
  36 ; that its trip count is 1, so it gets ignored by vectorizer.
  37 ; Function Attrs: uwtable
  38 define void @test_01() {
  39   br label %.outer
  40
  41 ; <label>:1:                                      ; preds = %2
  42   ret void
  43
  44 ; <label>:2:                                      ; preds = %._crit_edge.loopexit
  45   %3 = add nsw i32 %.ph, -2
  46   br i1 undef, label %1, label %.outer
  47
  48 .outer:                                           ; preds = %2, %0
  49   %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
  50   %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
  51   %4 = and i32 %.ph, 30
  52   %5 = add i32 %.ph2, 1
  53   br label %6
  54
  55 ; <label>:6:                                      ; preds = %6, %.outer
  56   %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
  57   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
  58   %9 = add i32 %8, 2
  59   %10 = zext i32 %9 to i64
  60   %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
  61   %12 = ashr i32 undef, %4
  62   store i32 %12, ptr addrspace(1) %11, align 4
  63   %13 = add i32 %7, 1
  64   %14 = icmp sgt i32 %13, 61
  65   br i1 %14, label %._crit_edge.loopexit, label %6
  66
  67 ._crit_edge.loopexit:                             ; preds = %._crit_edge.loopexit, %6
  68   br i1 undef, label %2, label %._crit_edge.loopexit
  69 }
  70
  71 ; After trip count is increased, the test gets vectorized.
  72 ; CHECK-LABEL: @test_02(
  73 ; CHECK: vector.body:
  74 ; CHECK: store <4 x i32>
  75
  76 ; Function Attrs: uwtable
  77 define void @test_02() {
  78   br label %.outer
  79
  80 ; <label>:1:                                      ; preds = %2
  81   ret void
  82
  83 ; <label>:2:                                      ; preds = %._crit_edge.loopexit
  84   %3 = add nsw i32 %.ph, -2
  85   br i1 undef, label %1, label %.outer
  86
  87 .outer:                                           ; preds = %2, %0
  88   %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
  89   %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
  90   %4 = and i32 %.ph, 30
  91   %5 = add i32 %.ph2, 1
  92   br label %6
  93
  94 ; <label>:6:                                      ; preds = %6, %.outer
  95   %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
  96   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
  97   %9 = add i32 %8, 2
  98   %10 = zext i32 %9 to i64
  99   %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
 100   %12 = ashr i32 undef, %4
 101   store i32 %12, ptr addrspace(1) %11, align 4
 102   %13 = add i32 %7, 1
 103   %14 = icmp sgt i32 %13, 610
 104   br i1 %14, label %._crit_edge.loopexit, label %6
 105
 106 ._crit_edge.loopexit:                             ; preds = %._crit_edge.loopexit, %6
 107   br i1 undef, label %2, label %._crit_edge.loopexit
 108 }