1 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6 ; Vectorized subsets of the load/store chains in the presence of
7 ; interleaved loads/stores
9 ; CHECK-LABEL: @interleave_2L_2S(
10 ; CHECK: load <2 x i32>
11 ; CHECK: store <2 x i32>
12 ; CHECK: load <2 x i32>
13 define void @interleave_2L_2S(ptr noalias %ptr) {
14 %next.gep1 = getelementptr i32, ptr %ptr, i64 1
15 %next.gep2 = getelementptr i32, ptr %ptr, i64 2
17 %l1 = load i32, ptr %next.gep1, align 4
18 %l2 = load i32, ptr %ptr, align 4
19 store i32 0, ptr %next.gep1, align 4
20 store i32 0, ptr %ptr, align 4
21 %l3 = load i32, ptr %next.gep1, align 4
22 %l4 = load i32, ptr %next.gep2, align 4
27 ; CHECK-LABEL: @interleave_3L_2S_1L(
28 ; CHECK: load <2 x i32>
29 ; CHECK: store <2 x i32>
30 ; CHECK: load <2 x i32>
32 define void @interleave_3L_2S_1L(ptr noalias %ptr) {
33 %next.gep1 = getelementptr i32, ptr %ptr, i64 1
34 %next.gep2 = getelementptr i32, ptr %ptr, i64 2
36 %l2 = load i32, ptr %ptr, align 4
37 %l1 = load i32, ptr %next.gep1, align 4
38 store i32 0, ptr %next.gep1, align 4
39 store i32 0, ptr %ptr, align 4
40 %l3 = load i32, ptr %next.gep1, align 4
41 %l4 = load i32, ptr %next.gep2, align 4
46 ; CHECK-LABEL: @chain_suffix(
48 ; CHECK: store <2 x i32>
49 ; CHECK: load <2 x i32>
50 define void @chain_suffix(ptr noalias %ptr) {
51 %next.gep1 = getelementptr i32, ptr %ptr, i64 1
52 %next.gep2 = getelementptr i32, ptr %ptr, i64 2
54 %l2 = load i32, ptr %ptr, align 4
55 store i32 0, ptr %next.gep1, align 4
56 store i32 0, ptr %ptr, align 4
57 %l3 = load i32, ptr %next.gep1, align 4
58 %l4 = load i32, ptr %next.gep2, align 4
64 ; CHECK-LABEL: @chain_prefix_suffix(
65 ; CHECK: load <2 x i32>
66 ; CHECK: store <2 x i32>
67 ; CHECK: load <3 x i32>
68 define void @chain_prefix_suffix(ptr noalias %ptr) {
69 %next.gep1 = getelementptr i32, ptr %ptr, i64 1
70 %next.gep2 = getelementptr i32, ptr %ptr, i64 2
71 %next.gep3 = getelementptr i32, ptr %ptr, i64 3
73 %l1 = load i32, ptr %ptr, align 4
74 %l2 = load i32, ptr %next.gep1, align 4
75 store i32 0, ptr %next.gep1, align 4
76 store i32 0, ptr %next.gep2, align 4
77 %l3 = load i32, ptr %next.gep1, align 4
78 %l4 = load i32, ptr %next.gep2, align 4
79 %l5 = load i32, ptr %next.gep3, align 4
84 ; CHECK-LABEL: @interleave_get_longest
85 ; CHECK: load <2 x i32>
86 ; CHECK: store <2 x i32> zeroinitializer
87 ; CHECK: load <3 x i32>
91 define void @interleave_get_longest(ptr noalias %ptr) {
92 %tmp2 = getelementptr i32, ptr %ptr, i64 1
93 %tmp3 = getelementptr i32, ptr %ptr, i64 2
94 %tmp4 = getelementptr i32, ptr %ptr, i64 3
95 %tmp5 = getelementptr i32, ptr %ptr, i64 4
97 %l1 = load i32, ptr %tmp2, align 4
98 %l2 = load i32, ptr %ptr, align 4
99 store i32 0, ptr %tmp2, align 4
100 store i32 0, ptr %ptr, align 4
101 %l3 = load i32, ptr %tmp2, align 4
102 %l4 = load i32, ptr %tmp3, align 4
103 %l5 = load i32, ptr %tmp4, align 4
104 %l6 = load i32, ptr %tmp5, align 4
105 %l7 = load i32, ptr %tmp5, align 4
110 ; CHECK-LABEL: @interleave_get_longest_aligned
111 ; CHECK: load <2 x i32>
112 ; CHECK: store <2 x i32> zeroinitializer
113 ; CHECK: load <4 x i32>
115 define void @interleave_get_longest_aligned(ptr noalias %ptr) {
116 %tmp2 = getelementptr i32, ptr %ptr, i64 1
117 %tmp3 = getelementptr i32, ptr %ptr, i64 2
118 %tmp4 = getelementptr i32, ptr %ptr, i64 3
119 %tmp5 = getelementptr i32, ptr %ptr, i64 4
121 %l1 = load i32, ptr %tmp2, align 4
122 %l2 = load i32, ptr %ptr, align 4
123 store i32 0, ptr %tmp2, align 4
124 store i32 0, ptr %ptr, align 4
125 %l3 = load i32, ptr %tmp2, align 16
126 %l4 = load i32, ptr %tmp3, align 4
127 %l5 = load i32, ptr %tmp4, align 8
128 %l6 = load i32, ptr %tmp5, align 4
129 %l7 = load i32, ptr %tmp5, align 4