1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
4 ;void Distance(float *p1, int p2, unsigned long p3[], float p4[]) {
13 ; p4[0] += p1[p3[0] & a];
16 define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %p4) {
17 ; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_(
19 ; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8
20 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
21 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
22 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
23 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
24 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
25 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
26 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
27 ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4
28 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1
29 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2
30 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3
31 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
32 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
33 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
34 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
35 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
36 ; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]]
37 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
38 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP6]], 5
39 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]]
40 ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX13]], align 4
41 ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[P4]], align 4
42 ; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
43 ; CHECK-NEXT: store float [[ADD15]], float* [[P4]], align 4
44 ; CHECK-NEXT: ret void
47 store i64 5, i64* %p3, align 8
48 %idx.ext = sext i32 %p2 to i64
49 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
50 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
51 %0 = load float, float* %arrayidx1, align 4
52 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
53 %1 = load float, float* %arrayidx2, align 4
54 %add = fadd float %0, %1
55 store float %add, float* %arrayidx2, align 4
56 %2 = load i64, i64* %p3, align 8
58 store i64 %shr, i64* %p3, align 8
59 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
60 %3 = load i64, i64* %arrayidx4, align 8
61 %shr5 = lshr i64 %3, 5
62 store i64 %shr5, i64* %arrayidx4, align 8
63 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
64 %4 = load i64, i64* %arrayidx6, align 8
65 %shr7 = lshr i64 %4, 5
66 store i64 %shr7, i64* %arrayidx6, align 8
67 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
68 %5 = load i64, i64* %arrayidx8, align 8
69 %shr9 = lshr i64 %5, 5
70 store i64 %shr9, i64* %arrayidx8, align 8
71 %add.ptr11 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext
72 %and = and i64 %shr, 5
73 %arrayidx13 = getelementptr inbounds float, float* %add.ptr11, i64 %and
74 %6 = load float, float* %arrayidx13, align 4
75 %7 = load float, float* %p4, align 4
76 %add15 = fadd float %6, %7
77 store float %add15, float* %p4, align 4
81 define void @store_reverse(i64* %p3) {
82 ; CHECK-LABEL: @store_reverse(
84 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P3:%.*]], i64 8
85 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 7
86 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1
87 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 9
88 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 6
89 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2
90 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 10
91 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
92 ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3
93 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
94 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8
95 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 11
96 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>*
97 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
98 ; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
99 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4
100 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
101 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*
102 ; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* [[TMP6]], align 8
103 ; CHECK-NEXT: ret void
106 %0 = load i64, i64* %p3, align 8
107 %arrayidx1 = getelementptr inbounds i64, i64* %p3, i64 8
108 %1 = load i64, i64* %arrayidx1, align 8
109 %shl = shl i64 %0, %1
110 %arrayidx2 = getelementptr inbounds i64, i64* %p3, i64 7
111 store i64 %shl, i64* %arrayidx2, align 8
112 %arrayidx3 = getelementptr inbounds i64, i64* %p3, i64 1
113 %2 = load i64, i64* %arrayidx3, align 8
114 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 9
115 %3 = load i64, i64* %arrayidx4, align 8
116 %shl5 = shl i64 %2, %3
117 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 6
118 store i64 %shl5, i64* %arrayidx6, align 8
119 %arrayidx7 = getelementptr inbounds i64, i64* %p3, i64 2
120 %4 = load i64, i64* %arrayidx7, align 8
121 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 10
122 %5 = load i64, i64* %arrayidx8, align 8
123 %shl9 = shl i64 %4, %5
124 %arrayidx10 = getelementptr inbounds i64, i64* %p3, i64 5
125 store i64 %shl9, i64* %arrayidx10, align 8
126 %arrayidx11 = getelementptr inbounds i64, i64* %p3, i64 3
127 %6 = load i64, i64* %arrayidx11, align 8
128 %arrayidx12 = getelementptr inbounds i64, i64* %p3, i64 11
129 %7 = load i64, i64* %arrayidx12, align 8
130 %shl13 = shl i64 %6, %7
131 %arrayidx14 = getelementptr inbounds i64, i64* %p3, i64 4
132 store i64 %shl13, i64* %arrayidx14, align 8
136 define void @store15(float* %p1, i32 %p2, i64* %p3, float* %p4) {
137 ; CHECK-LABEL: @store15(
139 ; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8
140 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
141 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
142 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
143 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
144 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
145 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
146 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
147 ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4
148 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1
149 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2
150 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3
151 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
152 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
153 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
154 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
155 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
156 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
157 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
158 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
159 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
160 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
161 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
162 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
163 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
164 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
165 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
166 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
167 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
168 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
169 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
170 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
171 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
172 ; CHECK-NEXT: ret void
175 store i64 5, i64* %p3, align 8
176 %idx.ext = sext i32 %p2 to i64
177 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
178 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
179 %0 = load float, float* %arrayidx1, align 4
180 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
181 %1 = load float, float* %arrayidx2, align 4
182 %add = fadd float %0, %1
183 store float %add, float* %arrayidx2, align 4
184 %2 = load i64, i64* %p3, align 8
185 %shr = lshr i64 %2, 5
186 store i64 %shr, i64* %p3, align 8
187 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
188 %3 = load i64, i64* %arrayidx4, align 8
189 %shr5 = lshr i64 %3, 5
190 store i64 %shr5, i64* %arrayidx4, align 8
191 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
192 %4 = load i64, i64* %arrayidx6, align 8
193 %shr7 = lshr i64 %4, 5
194 store i64 %shr7, i64* %arrayidx6, align 8
195 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
196 %5 = load i64, i64* %arrayidx8, align 8
197 %shr9 = lshr i64 %5, 5
198 %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
199 store i64 5, i64* %arrayidx9, align 8
200 store i64 5, i64* %arrayidx9, align 8
201 store i64 5, i64* %arrayidx9, align 8
202 store i64 5, i64* %arrayidx9, align 8
203 store i64 5, i64* %arrayidx9, align 8
204 store i64 5, i64* %arrayidx9, align 8
205 store i64 5, i64* %arrayidx9, align 8
206 store i64 5, i64* %arrayidx9, align 8
207 store i64 5, i64* %arrayidx9, align 8
208 store i64 5, i64* %arrayidx9, align 8
209 store i64 5, i64* %arrayidx9, align 8
210 store i64 5, i64* %arrayidx9, align 8
211 store i64 5, i64* %arrayidx9, align 8
212 store i64 5, i64* %arrayidx9, align 8
213 store i64 5, i64* %arrayidx9, align 8
214 store i64 %shr9, i64* %arrayidx8, align 8
218 define void @store16(float* %p1, i32 %p2, i64* %p3, float* %p4) {
219 ; CHECK-LABEL: @store16(
221 ; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8
222 ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
223 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
224 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
225 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
226 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
227 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
228 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
229 ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4
230 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1
231 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2
232 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3
233 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
234 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
235 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
236 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
237 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
238 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
239 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
240 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
241 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
242 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
243 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
244 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
245 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
246 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
247 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
248 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
249 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
250 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
251 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
252 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8
253 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
254 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
255 ; CHECK-NEXT: ret void
258 store i64 5, i64* %p3, align 8
259 %idx.ext = sext i32 %p2 to i64
260 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
261 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
262 %0 = load float, float* %arrayidx1, align 4
263 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
264 %1 = load float, float* %arrayidx2, align 4
265 %add = fadd float %0, %1
266 store float %add, float* %arrayidx2, align 4
267 %2 = load i64, i64* %p3, align 8
268 %shr = lshr i64 %2, 5
269 store i64 %shr, i64* %p3, align 8
270 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
271 %3 = load i64, i64* %arrayidx4, align 8
272 %shr5 = lshr i64 %3, 5
273 store i64 %shr5, i64* %arrayidx4, align 8
274 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
275 %4 = load i64, i64* %arrayidx6, align 8
276 %shr7 = lshr i64 %4, 5
277 store i64 %shr7, i64* %arrayidx6, align 8
278 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
279 %5 = load i64, i64* %arrayidx8, align 8
280 %shr9 = lshr i64 %5, 5
281 %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
282 store i64 5, i64* %arrayidx9, align 8
283 store i64 5, i64* %arrayidx9, align 8
284 store i64 5, i64* %arrayidx9, align 8
285 store i64 5, i64* %arrayidx9, align 8
286 store i64 5, i64* %arrayidx9, align 8
287 store i64 5, i64* %arrayidx9, align 8
288 store i64 5, i64* %arrayidx9, align 8
289 store i64 5, i64* %arrayidx9, align 8
290 store i64 5, i64* %arrayidx9, align 8
291 store i64 5, i64* %arrayidx9, align 8
292 store i64 5, i64* %arrayidx9, align 8
293 store i64 5, i64* %arrayidx9, align 8
294 store i64 5, i64* %arrayidx9, align 8
295 store i64 5, i64* %arrayidx9, align 8
296 store i64 5, i64* %arrayidx9, align 8
297 store i64 5, i64* %arrayidx9, align 8
298 store i64 %shr9, i64* %arrayidx8, align 8