1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64 -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=CHECK,SSE
4 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=CHECK,AVX
5 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=CHECK,AVX
8 ; void baz(unsigned char *dst, unsigned char *src) {
9 ; for( int x = 0; x < 8; x++ ) {
10 ; dst[x] = src[x]&(~63);
14 define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) {
17 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
18 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64>
19 ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
20 ; CHECK-NEXT: ret void
23 %0 = load i8, ptr %src, align 1
25 store i8 %1, ptr %dst, align 1
26 %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
27 %2 = load i8, ptr %arrayidx.1, align 1
29 %arrayidx3.1 = getelementptr inbounds i8, ptr %dst, i64 1
30 store i8 %3, ptr %arrayidx3.1, align 1
31 %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
32 %4 = load i8, ptr %arrayidx.2, align 1
34 %arrayidx3.2 = getelementptr inbounds i8, ptr %dst, i64 2
35 store i8 %5, ptr %arrayidx3.2, align 1
36 %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
37 %6 = load i8, ptr %arrayidx.3, align 1
39 %arrayidx3.3 = getelementptr inbounds i8, ptr %dst, i64 3
40 store i8 %7, ptr %arrayidx3.3, align 1
44 define void @and8(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) {
47 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
48 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
49 ; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
50 ; CHECK-NEXT: ret void
53 %0 = load i8, ptr %src, align 1
55 store i8 %1, ptr %dst, align 1
56 %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
57 %2 = load i8, ptr %arrayidx.1, align 1
59 %arrayidx3.1 = getelementptr inbounds i8, ptr %dst, i64 1
60 store i8 %3, ptr %arrayidx3.1, align 1
61 %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
62 %4 = load i8, ptr %arrayidx.2, align 1
64 %arrayidx3.2 = getelementptr inbounds i8, ptr %dst, i64 2
65 store i8 %5, ptr %arrayidx3.2, align 1
66 %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
67 %6 = load i8, ptr %arrayidx.3, align 1
69 %arrayidx3.3 = getelementptr inbounds i8, ptr %dst, i64 3
70 store i8 %7, ptr %arrayidx3.3, align 1
71 %arrayidx.4 = getelementptr inbounds i8, ptr %src, i64 4
72 %8 = load i8, ptr %arrayidx.4, align 1
74 %arrayidx3.4 = getelementptr inbounds i8, ptr %dst, i64 4
75 store i8 %9, ptr %arrayidx3.4, align 1
76 %arrayidx.5 = getelementptr inbounds i8, ptr %src, i64 5
77 %10 = load i8, ptr %arrayidx.5, align 1
79 %arrayidx3.5 = getelementptr inbounds i8, ptr %dst, i64 5
80 store i8 %11, ptr %arrayidx3.5, align 1
81 %arrayidx.6 = getelementptr inbounds i8, ptr %src, i64 6
82 %12 = load i8, ptr %arrayidx.6, align 1
84 %arrayidx3.6 = getelementptr inbounds i8, ptr %dst, i64 6
85 store i8 %13, ptr %arrayidx3.6, align 1
86 %arrayidx.7 = getelementptr inbounds i8, ptr %src, i64 7
87 %14 = load i8, ptr %arrayidx.7, align 1
89 %arrayidx3.7 = getelementptr inbounds i8, ptr %dst, i64 7
90 store i8 %15, ptr %arrayidx3.7, align 1
94 define void @and16(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) {
95 ; CHECK-LABEL: @and16(
97 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
98 ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
99 ; CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
100 ; CHECK-NEXT: ret void
103 %0 = load i8, ptr %src, align 1
105 store i8 %1, ptr %dst, align 1
106 %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
107 %2 = load i8, ptr %arrayidx.1, align 1
109 %arrayidx3.1 = getelementptr inbounds i8, ptr %dst, i64 1
110 store i8 %3, ptr %arrayidx3.1, align 1
111 %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
112 %4 = load i8, ptr %arrayidx.2, align 1
114 %arrayidx3.2 = getelementptr inbounds i8, ptr %dst, i64 2
115 store i8 %5, ptr %arrayidx3.2, align 1
116 %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
117 %6 = load i8, ptr %arrayidx.3, align 1
119 %arrayidx3.3 = getelementptr inbounds i8, ptr %dst, i64 3
120 store i8 %7, ptr %arrayidx3.3, align 1
121 %arrayidx.4 = getelementptr inbounds i8, ptr %src, i64 4
122 %8 = load i8, ptr %arrayidx.4, align 1
124 %arrayidx3.4 = getelementptr inbounds i8, ptr %dst, i64 4
125 store i8 %9, ptr %arrayidx3.4, align 1
126 %arrayidx.5 = getelementptr inbounds i8, ptr %src, i64 5
127 %10 = load i8, ptr %arrayidx.5, align 1
128 %11 = and i8 %10, -64
129 %arrayidx3.5 = getelementptr inbounds i8, ptr %dst, i64 5
130 store i8 %11, ptr %arrayidx3.5, align 1
131 %arrayidx.6 = getelementptr inbounds i8, ptr %src, i64 6
132 %12 = load i8, ptr %arrayidx.6, align 1
133 %13 = and i8 %12, -64
134 %arrayidx3.6 = getelementptr inbounds i8, ptr %dst, i64 6
135 store i8 %13, ptr %arrayidx3.6, align 1
136 %arrayidx.7 = getelementptr inbounds i8, ptr %src, i64 7
137 %14 = load i8, ptr %arrayidx.7, align 1
138 %15 = and i8 %14, -64
139 %arrayidx3.7 = getelementptr inbounds i8, ptr %dst, i64 7
140 store i8 %15, ptr %arrayidx3.7, align 1
141 %arrayidx.8 = getelementptr inbounds i8, ptr %src, i64 8
142 %16 = load i8, ptr %arrayidx.8, align 1
143 %17 = and i8 %16, -64
144 %arrayidx3.8 = getelementptr inbounds i8, ptr %dst, i64 8
145 store i8 %17, ptr %arrayidx3.8, align 1
146 %arrayidx.9 = getelementptr inbounds i8, ptr %src, i64 9
147 %18 = load i8, ptr %arrayidx.9, align 1
148 %19 = and i8 %18, -64
149 %arrayidx3.9 = getelementptr inbounds i8, ptr %dst, i64 9
150 store i8 %19, ptr %arrayidx3.9, align 1
151 %arrayidx.10 = getelementptr inbounds i8, ptr %src, i64 10
152 %20 = load i8, ptr %arrayidx.10, align 1
153 %21 = and i8 %20, -64
154 %arrayidx3.10 = getelementptr inbounds i8, ptr %dst, i64 10
155 store i8 %21, ptr %arrayidx3.10, align 1
156 %arrayidx.11 = getelementptr inbounds i8, ptr %src, i64 11
157 %22 = load i8, ptr %arrayidx.11, align 1
158 %23 = and i8 %22, -64
159 %arrayidx3.11 = getelementptr inbounds i8, ptr %dst, i64 11
160 store i8 %23, ptr %arrayidx3.11, align 1
161 %arrayidx.12 = getelementptr inbounds i8, ptr %src, i64 12
162 %24 = load i8, ptr %arrayidx.12, align 1
163 %25 = and i8 %24, -64
164 %arrayidx3.12 = getelementptr inbounds i8, ptr %dst, i64 12
165 store i8 %25, ptr %arrayidx3.12, align 1
166 %arrayidx.13 = getelementptr inbounds i8, ptr %src, i64 13
167 %26 = load i8, ptr %arrayidx.13, align 1
168 %27 = and i8 %26, -64
169 %arrayidx3.13 = getelementptr inbounds i8, ptr %dst, i64 13
170 store i8 %27, ptr %arrayidx3.13, align 1
171 %arrayidx.14 = getelementptr inbounds i8, ptr %src, i64 14
172 %28 = load i8, ptr %arrayidx.14, align 1
173 %29 = and i8 %28, -64
174 %arrayidx3.14 = getelementptr inbounds i8, ptr %dst, i64 14
175 store i8 %29, ptr %arrayidx3.14, align 1
176 %arrayidx.15 = getelementptr inbounds i8, ptr %src, i64 15
177 %30 = load i8, ptr %arrayidx.15, align 1
178 %31 = and i8 %30, -64
179 %arrayidx3.15 = getelementptr inbounds i8, ptr %dst, i64 15
180 store i8 %31, ptr %arrayidx3.15, align 1
184 define void @and32(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) {
187 ; SSE-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
188 ; SSE-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
189 ; SSE-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
190 ; SSE-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 16
191 ; SSE-NEXT: [[ARRAYIDX3_16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 16
192 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX_16]], align 1
193 ; SSE-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
194 ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr [[ARRAYIDX3_16]], align 1
199 ; AVX-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
200 ; AVX-NEXT: [[TMP1:%.*]] = and <32 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
201 ; AVX-NEXT: store <32 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
205 %0 = load i8, ptr %src, align 1
207 store i8 %1, ptr %dst, align 1
208 %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
209 %2 = load i8, ptr %arrayidx.1, align 1
211 %arrayidx3.1 = getelementptr inbounds i8, ptr %dst, i64 1
212 store i8 %3, ptr %arrayidx3.1, align 1
213 %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
214 %4 = load i8, ptr %arrayidx.2, align 1
216 %arrayidx3.2 = getelementptr inbounds i8, ptr %dst, i64 2
217 store i8 %5, ptr %arrayidx3.2, align 1
218 %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
219 %6 = load i8, ptr %arrayidx.3, align 1
221 %arrayidx3.3 = getelementptr inbounds i8, ptr %dst, i64 3
222 store i8 %7, ptr %arrayidx3.3, align 1
223 %arrayidx.4 = getelementptr inbounds i8, ptr %src, i64 4
224 %8 = load i8, ptr %arrayidx.4, align 1
226 %arrayidx3.4 = getelementptr inbounds i8, ptr %dst, i64 4
227 store i8 %9, ptr %arrayidx3.4, align 1
228 %arrayidx.5 = getelementptr inbounds i8, ptr %src, i64 5
229 %10 = load i8, ptr %arrayidx.5, align 1
230 %11 = and i8 %10, -64
231 %arrayidx3.5 = getelementptr inbounds i8, ptr %dst, i64 5
232 store i8 %11, ptr %arrayidx3.5, align 1
233 %arrayidx.6 = getelementptr inbounds i8, ptr %src, i64 6
234 %12 = load i8, ptr %arrayidx.6, align 1
235 %13 = and i8 %12, -64
236 %arrayidx3.6 = getelementptr inbounds i8, ptr %dst, i64 6
237 store i8 %13, ptr %arrayidx3.6, align 1
238 %arrayidx.7 = getelementptr inbounds i8, ptr %src, i64 7
239 %14 = load i8, ptr %arrayidx.7, align 1
240 %15 = and i8 %14, -64
241 %arrayidx3.7 = getelementptr inbounds i8, ptr %dst, i64 7
242 store i8 %15, ptr %arrayidx3.7, align 1
243 %arrayidx.8 = getelementptr inbounds i8, ptr %src, i64 8
244 %16 = load i8, ptr %arrayidx.8, align 1
245 %17 = and i8 %16, -64
246 %arrayidx3.8 = getelementptr inbounds i8, ptr %dst, i64 8
247 store i8 %17, ptr %arrayidx3.8, align 1
248 %arrayidx.9 = getelementptr inbounds i8, ptr %src, i64 9
249 %18 = load i8, ptr %arrayidx.9, align 1
250 %19 = and i8 %18, -64
251 %arrayidx3.9 = getelementptr inbounds i8, ptr %dst, i64 9
252 store i8 %19, ptr %arrayidx3.9, align 1
253 %arrayidx.10 = getelementptr inbounds i8, ptr %src, i64 10
254 %20 = load i8, ptr %arrayidx.10, align 1
255 %21 = and i8 %20, -64
256 %arrayidx3.10 = getelementptr inbounds i8, ptr %dst, i64 10
257 store i8 %21, ptr %arrayidx3.10, align 1
258 %arrayidx.11 = getelementptr inbounds i8, ptr %src, i64 11
259 %22 = load i8, ptr %arrayidx.11, align 1
260 %23 = and i8 %22, -64
261 %arrayidx3.11 = getelementptr inbounds i8, ptr %dst, i64 11
262 store i8 %23, ptr %arrayidx3.11, align 1
263 %arrayidx.12 = getelementptr inbounds i8, ptr %src, i64 12
264 %24 = load i8, ptr %arrayidx.12, align 1
265 %25 = and i8 %24, -64
266 %arrayidx3.12 = getelementptr inbounds i8, ptr %dst, i64 12
267 store i8 %25, ptr %arrayidx3.12, align 1
268 %arrayidx.13 = getelementptr inbounds i8, ptr %src, i64 13
269 %26 = load i8, ptr %arrayidx.13, align 1
270 %27 = and i8 %26, -64
271 %arrayidx3.13 = getelementptr inbounds i8, ptr %dst, i64 13
272 store i8 %27, ptr %arrayidx3.13, align 1
273 %arrayidx.14 = getelementptr inbounds i8, ptr %src, i64 14
274 %28 = load i8, ptr %arrayidx.14, align 1
275 %29 = and i8 %28, -64
276 %arrayidx3.14 = getelementptr inbounds i8, ptr %dst, i64 14
277 store i8 %29, ptr %arrayidx3.14, align 1
278 %arrayidx.15 = getelementptr inbounds i8, ptr %src, i64 15
279 %30 = load i8, ptr %arrayidx.15, align 1
280 %31 = and i8 %30, -64
281 %arrayidx3.15 = getelementptr inbounds i8, ptr %dst, i64 15
282 store i8 %31, ptr %arrayidx3.15, align 1
283 %arrayidx.16 = getelementptr inbounds i8, ptr %src, i64 16
284 %32 = load i8, ptr %arrayidx.16, align 1
285 %33 = and i8 %32, -64
286 %arrayidx3.16 = getelementptr inbounds i8, ptr %dst, i64 16
287 store i8 %33, ptr %arrayidx3.16, align 1
288 %arrayidx.17 = getelementptr inbounds i8, ptr %src, i64 17
289 %34 = load i8, ptr %arrayidx.17, align 1
290 %35 = and i8 %34, -64
291 %arrayidx3.17 = getelementptr inbounds i8, ptr %dst, i64 17
292 store i8 %35, ptr %arrayidx3.17, align 1
293 %arrayidx.18 = getelementptr inbounds i8, ptr %src, i64 18
294 %36 = load i8, ptr %arrayidx.18, align 1
295 %37 = and i8 %36, -64
296 %arrayidx3.18 = getelementptr inbounds i8, ptr %dst, i64 18
297 store i8 %37, ptr %arrayidx3.18, align 1
298 %arrayidx.19 = getelementptr inbounds i8, ptr %src, i64 19
299 %38 = load i8, ptr %arrayidx.19, align 1
300 %39 = and i8 %38, -64
301 %arrayidx3.19 = getelementptr inbounds i8, ptr %dst, i64 19
302 store i8 %39, ptr %arrayidx3.19, align 1
303 %arrayidx.20 = getelementptr inbounds i8, ptr %src, i64 20
304 %40 = load i8, ptr %arrayidx.20, align 1
305 %41 = and i8 %40, -64
306 %arrayidx3.20 = getelementptr inbounds i8, ptr %dst, i64 20
307 store i8 %41, ptr %arrayidx3.20, align 1
308 %arrayidx.21 = getelementptr inbounds i8, ptr %src, i64 21
309 %42 = load i8, ptr %arrayidx.21, align 1
310 %43 = and i8 %42, -64
311 %arrayidx3.21 = getelementptr inbounds i8, ptr %dst, i64 21
312 store i8 %43, ptr %arrayidx3.21, align 1
313 %arrayidx.22 = getelementptr inbounds i8, ptr %src, i64 22
314 %44 = load i8, ptr %arrayidx.22, align 1
315 %45 = and i8 %44, -64
316 %arrayidx3.22 = getelementptr inbounds i8, ptr %dst, i64 22
317 store i8 %45, ptr %arrayidx3.22, align 1
318 %arrayidx.23 = getelementptr inbounds i8, ptr %src, i64 23
319 %46 = load i8, ptr %arrayidx.23, align 1
320 %47 = and i8 %46, -64
321 %arrayidx3.23 = getelementptr inbounds i8, ptr %dst, i64 23
322 store i8 %47, ptr %arrayidx3.23, align 1
323 %arrayidx.24 = getelementptr inbounds i8, ptr %src, i64 24
324 %48 = load i8, ptr %arrayidx.24, align 1
325 %49 = and i8 %48, -64
326 %arrayidx3.24 = getelementptr inbounds i8, ptr %dst, i64 24
327 store i8 %49, ptr %arrayidx3.24, align 1
328 %arrayidx.25 = getelementptr inbounds i8, ptr %src, i64 25
329 %50 = load i8, ptr %arrayidx.25, align 1
330 %51 = and i8 %50, -64
331 %arrayidx3.25 = getelementptr inbounds i8, ptr %dst, i64 25
332 store i8 %51, ptr %arrayidx3.25, align 1
333 %arrayidx.26 = getelementptr inbounds i8, ptr %src, i64 26
334 %52 = load i8, ptr %arrayidx.26, align 1
335 %53 = and i8 %52, -64
336 %arrayidx3.26 = getelementptr inbounds i8, ptr %dst, i64 26
337 store i8 %53, ptr %arrayidx3.26, align 1
338 %arrayidx.27 = getelementptr inbounds i8, ptr %src, i64 27
339 %54 = load i8, ptr %arrayidx.27, align 1
340 %55 = and i8 %54, -64
341 %arrayidx3.27 = getelementptr inbounds i8, ptr %dst, i64 27
342 store i8 %55, ptr %arrayidx3.27, align 1
343 %arrayidx.28 = getelementptr inbounds i8, ptr %src, i64 28
344 %56 = load i8, ptr %arrayidx.28, align 1
345 %57 = and i8 %56, -64
346 %arrayidx3.28 = getelementptr inbounds i8, ptr %dst, i64 28
347 store i8 %57, ptr %arrayidx3.28, align 1
348 %arrayidx.29 = getelementptr inbounds i8, ptr %src, i64 29
349 %58 = load i8, ptr %arrayidx.29, align 1
350 %59 = and i8 %58, -64
351 %arrayidx3.29 = getelementptr inbounds i8, ptr %dst, i64 29
352 store i8 %59, ptr %arrayidx3.29, align 1
353 %arrayidx.30 = getelementptr inbounds i8, ptr %src, i64 30
354 %60 = load i8, ptr %arrayidx.30, align 1
355 %61 = and i8 %60, -64
356 %arrayidx3.30 = getelementptr inbounds i8, ptr %dst, i64 30
357 store i8 %61, ptr %arrayidx3.30, align 1
358 %arrayidx.31 = getelementptr inbounds i8, ptr %src, i64 31
359 %62 = load i8, ptr %arrayidx.31, align 1
360 %63 = and i8 %62, -64
361 %arrayidx3.31 = getelementptr inbounds i8, ptr %dst, i64 31
362 store i8 %63, ptr %arrayidx3.31, align 1