1 ; RUN: opt -loop-vectorize -S < %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
4 target triple = "x86_64-unknown-linux-gnu"
8 ; LV retains the original scalar loop intact as remainder loop. However,
9 ; after this transformation, analysis information concerning the remainder
10 ; loop may differ from the original scalar loop. This test is an example of
11 ; that behaviour, where values inside the remainder loop which SCEV could
12 ; originally analyze now require flow-sensitive analysis currently not
13 ; supported in SCEV. In particular, during LV code generation, after turning
14 ; the original scalar loop into the remainder loop, LV expected
15 ; Legal->isConsecutivePtr() to be consistent and return the same output as
16 ; during legal/cost model phases (original scalar loop). Unfortunately, that
17 ; condition was not satisfied because of the aforementioned SCEV limitation.
18 ; After D39346, LV code generation doesn't rely on Legal->isConsecutivePtr(),
19 ; i.e., SCEV. This test verifies that LV is able to handle the described cases.
21 ; TODO: The SCEV limitation described before may affect plans to further
22 ; optimize the remainder loop of this particular test case. One tentative
23 ; solution is to detect the problematic IVs in LV (%7 and %8) and perform an
24 ; in-place IV optimization by replacing:
25 ; %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] with
30 ; Verify that store is vectorized as stride-1 memory access.
32 ; CHECK-LABEL: @test_01(
33 ; CHECK-NOT: vector.body:
35 ; This test was originally vectorized, but now SCEV is smart enough to prove
36 ; that its trip count is 1, so it gets ignored by vectorizer.
37 ; Function Attrs: uwtable
38 define void @test_01() {
41 ; <label>:1: ; preds = %2
44 ; <label>:2: ; preds = %._crit_edge.loopexit
45 %3 = add nsw i32 %.ph, -2
46 br i1 undef, label %1, label %.outer
48 .outer: ; preds = %2, %0
49 %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
50 %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
55 ; <label>:6: ; preds = %6, %.outer
56 %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
57 %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
59 %10 = zext i32 %9 to i64
60 %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
61 %12 = ashr i32 undef, %4
62 store i32 %12, i32 addrspace(1)* %11, align 4
64 %14 = icmp sgt i32 %13, 61
65 br i1 %14, label %._crit_edge.loopexit, label %6
67 ._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6
68 br i1 undef, label %2, label %._crit_edge.loopexit
71 ; After trip count is increased, the test gets vectorized.
72 ; CHECK-LABEL: @test_02(
74 ; CHECK: store <4 x i32>
76 ; Function Attrs: uwtable
77 define void @test_02() {
80 ; <label>:1: ; preds = %2
83 ; <label>:2: ; preds = %._crit_edge.loopexit
84 %3 = add nsw i32 %.ph, -2
85 br i1 undef, label %1, label %.outer
87 .outer: ; preds = %2, %0
88 %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
89 %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
94 ; <label>:6: ; preds = %6, %.outer
95 %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
96 %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
98 %10 = zext i32 %9 to i64
99 %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
100 %12 = ashr i32 undef, %4
101 store i32 %12, i32 addrspace(1)* %11, align 4
103 %14 = icmp sgt i32 %13, 610
104 br i1 %14, label %._crit_edge.loopexit, label %6
106 ._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6
107 br i1 undef, label %2, label %._crit_edge.loopexit