1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
4 ;void Distance(ptr p1, int p2, unsigned long p3[], float p4[]) {
13 ; p4[0] += p1[p3[0] & a];
16 define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
17 ; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_(
19 ; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
20 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
21 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
22 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
23 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
24 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
25 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
26 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
27 ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
28 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
29 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
30 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
31 ; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
32 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
33 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP6]], 5
34 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR11]], i64 [[AND]]
35 ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
36 ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[P4]], align 4
37 ; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
38 ; CHECK-NEXT: store float [[ADD15]], ptr [[P4]], align 4
39 ; CHECK-NEXT: ret void
42 store i64 5, ptr %p3, align 8
43 %idx.ext = sext i32 %p2 to i64
44 %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
45 %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
46 %0 = load float, ptr %arrayidx1, align 4
47 %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
48 %1 = load float, ptr %arrayidx2, align 4
49 %add = fadd float %0, %1
50 store float %add, ptr %arrayidx2, align 4
51 %2 = load i64, ptr %p3, align 8
53 store i64 %shr, ptr %p3, align 8
54 %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
55 %3 = load i64, ptr %arrayidx4, align 8
56 %shr5 = lshr i64 %3, 5
57 store i64 %shr5, ptr %arrayidx4, align 8
58 %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
59 %4 = load i64, ptr %arrayidx6, align 8
60 %shr7 = lshr i64 %4, 5
61 store i64 %shr7, ptr %arrayidx6, align 8
62 %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
63 %5 = load i64, ptr %arrayidx8, align 8
64 %shr9 = lshr i64 %5, 5
65 store i64 %shr9, ptr %arrayidx8, align 8
66 %add.ptr11 = getelementptr inbounds float, ptr %add.ptr, i64 %idx.ext
67 %and = and i64 %shr, 5
68 %arrayidx13 = getelementptr inbounds float, ptr %add.ptr11, i64 %and
69 %6 = load float, ptr %arrayidx13, align 4
70 %7 = load float, ptr %p4, align 4
71 %add15 = fadd float %6, %7
72 store float %add15, ptr %p4, align 4
76 define void @store_reverse(ptr %p3) {
77 ; CHECK-LABEL: @store_reverse(
79 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[P3:%.*]], i64 8
80 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 4
81 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P3]], align 8
82 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8
83 ; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
84 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
85 ; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], ptr [[ARRAYIDX14]], align 8
86 ; CHECK-NEXT: ret void
89 %0 = load i64, ptr %p3, align 8
90 %arrayidx1 = getelementptr inbounds i64, ptr %p3, i64 8
91 %1 = load i64, ptr %arrayidx1, align 8
93 %arrayidx2 = getelementptr inbounds i64, ptr %p3, i64 7
94 store i64 %shl, ptr %arrayidx2, align 8
95 %arrayidx3 = getelementptr inbounds i64, ptr %p3, i64 1
96 %2 = load i64, ptr %arrayidx3, align 8
97 %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 9
98 %3 = load i64, ptr %arrayidx4, align 8
99 %shl5 = shl i64 %2, %3
100 %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 6
101 store i64 %shl5, ptr %arrayidx6, align 8
102 %arrayidx7 = getelementptr inbounds i64, ptr %p3, i64 2
103 %4 = load i64, ptr %arrayidx7, align 8
104 %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 10
105 %5 = load i64, ptr %arrayidx8, align 8
106 %shl9 = shl i64 %4, %5
107 %arrayidx10 = getelementptr inbounds i64, ptr %p3, i64 5
108 store i64 %shl9, ptr %arrayidx10, align 8
109 %arrayidx11 = getelementptr inbounds i64, ptr %p3, i64 3
110 %6 = load i64, ptr %arrayidx11, align 8
111 %arrayidx12 = getelementptr inbounds i64, ptr %p3, i64 11
112 %7 = load i64, ptr %arrayidx12, align 8
113 %shl13 = shl i64 %6, %7
114 %arrayidx14 = getelementptr inbounds i64, ptr %p3, i64 4
115 store i64 %shl13, ptr %arrayidx14, align 8
119 define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
120 ; CHECK-LABEL: @store15(
122 ; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
123 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
124 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
125 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
126 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
127 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
128 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
129 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
130 ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
131 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
132 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
133 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
134 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
135 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
136 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
137 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
138 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
139 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
140 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
141 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
142 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
143 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
144 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
145 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
146 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
147 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
148 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
149 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
150 ; CHECK-NEXT: ret void
153 store i64 5, ptr %p3, align 8
154 %idx.ext = sext i32 %p2 to i64
155 %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
156 %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
157 %0 = load float, ptr %arrayidx1, align 4
158 %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
159 %1 = load float, ptr %arrayidx2, align 4
160 %add = fadd float %0, %1
161 store float %add, ptr %arrayidx2, align 4
162 %2 = load i64, ptr %p3, align 8
163 %shr = lshr i64 %2, 5
164 store i64 %shr, ptr %p3, align 8
165 %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
166 %3 = load i64, ptr %arrayidx4, align 8
167 %shr5 = lshr i64 %3, 5
168 store i64 %shr5, ptr %arrayidx4, align 8
169 %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
170 %4 = load i64, ptr %arrayidx6, align 8
171 %shr7 = lshr i64 %4, 5
172 store i64 %shr7, ptr %arrayidx6, align 8
173 %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
174 %5 = load i64, ptr %arrayidx8, align 8
175 %shr9 = lshr i64 %5, 5
176 %arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
177 store i64 5, ptr %arrayidx9, align 8
178 store i64 5, ptr %arrayidx9, align 8
179 store i64 5, ptr %arrayidx9, align 8
180 store i64 5, ptr %arrayidx9, align 8
181 store i64 5, ptr %arrayidx9, align 8
182 store i64 5, ptr %arrayidx9, align 8
183 store i64 5, ptr %arrayidx9, align 8
184 store i64 5, ptr %arrayidx9, align 8
185 store i64 5, ptr %arrayidx9, align 8
186 store i64 5, ptr %arrayidx9, align 8
187 store i64 5, ptr %arrayidx9, align 8
188 store i64 5, ptr %arrayidx9, align 8
189 store i64 5, ptr %arrayidx9, align 8
190 store i64 5, ptr %arrayidx9, align 8
191 store i64 5, ptr %arrayidx9, align 8
192 store i64 %shr9, ptr %arrayidx8, align 8
196 define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
197 ; CHECK-LABEL: @store16(
199 ; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
200 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
201 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
202 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
203 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
204 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
205 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
206 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
207 ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
208 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
209 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
210 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
211 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
212 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
213 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
214 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
215 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
216 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
217 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
218 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
219 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
220 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
221 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
222 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
223 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
224 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
225 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
226 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
227 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
228 ; CHECK-NEXT: ret void
231 store i64 5, ptr %p3, align 8
232 %idx.ext = sext i32 %p2 to i64
233 %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
234 %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
235 %0 = load float, ptr %arrayidx1, align 4
236 %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
237 %1 = load float, ptr %arrayidx2, align 4
238 %add = fadd float %0, %1
239 store float %add, ptr %arrayidx2, align 4
240 %2 = load i64, ptr %p3, align 8
241 %shr = lshr i64 %2, 5
242 store i64 %shr, ptr %p3, align 8
243 %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
244 %3 = load i64, ptr %arrayidx4, align 8
245 %shr5 = lshr i64 %3, 5
246 store i64 %shr5, ptr %arrayidx4, align 8
247 %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
248 %4 = load i64, ptr %arrayidx6, align 8
249 %shr7 = lshr i64 %4, 5
250 store i64 %shr7, ptr %arrayidx6, align 8
251 %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
252 %5 = load i64, ptr %arrayidx8, align 8
253 %shr9 = lshr i64 %5, 5
254 %arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
255 store i64 5, ptr %arrayidx9, align 8
256 store i64 5, ptr %arrayidx9, align 8
257 store i64 5, ptr %arrayidx9, align 8
258 store i64 5, ptr %arrayidx9, align 8
259 store i64 5, ptr %arrayidx9, align 8
260 store i64 5, ptr %arrayidx9, align 8
261 store i64 5, ptr %arrayidx9, align 8
262 store i64 5, ptr %arrayidx9, align 8
263 store i64 5, ptr %arrayidx9, align 8
264 store i64 5, ptr %arrayidx9, align 8
265 store i64 5, ptr %arrayidx9, align 8
266 store i64 5, ptr %arrayidx9, align 8
267 store i64 5, ptr %arrayidx9, align 8
268 store i64 5, ptr %arrayidx9, align 8
269 store i64 5, ptr %arrayidx9, align 8
270 store i64 5, ptr %arrayidx9, align 8
271 store i64 %shr9, ptr %arrayidx8, align 8