1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
8 ; Widened shuffle broadcast loads
10 define <4 x float> @load_splat_4f32_4f32_0101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
11 ; SSE2-LABEL: load_splat_4f32_4f32_0101:
12 ; SSE2: # %bb.0: # %entry
13 ; SSE2-NEXT: movaps (%rdi), %xmm0
14 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
17 ; SSE42-LABEL: load_splat_4f32_4f32_0101:
18 ; SSE42: # %bb.0: # %entry
19 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
22 ; AVX-LABEL: load_splat_4f32_4f32_0101:
23 ; AVX: # %bb.0: # %entry
24 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
27 %ld = load <4 x float>, <4 x float>* %ptr
28 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
32 define <8 x float> @load_splat_8f32_4f32_01010101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
33 ; SSE2-LABEL: load_splat_8f32_4f32_01010101:
34 ; SSE2: # %bb.0: # %entry
35 ; SSE2-NEXT: movaps (%rdi), %xmm0
36 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
37 ; SSE2-NEXT: movaps %xmm0, %xmm1
40 ; SSE42-LABEL: load_splat_8f32_4f32_01010101:
41 ; SSE42: # %bb.0: # %entry
42 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
43 ; SSE42-NEXT: movapd %xmm0, %xmm1
46 ; AVX1-LABEL: load_splat_8f32_4f32_01010101:
47 ; AVX1: # %bb.0: # %entry
48 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
49 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
52 ; AVX2-LABEL: load_splat_8f32_4f32_01010101:
53 ; AVX2: # %bb.0: # %entry
54 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
57 ; AVX512-LABEL: load_splat_8f32_4f32_01010101:
58 ; AVX512: # %bb.0: # %entry
59 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
62 %ld = load <4 x float>, <4 x float>* %ptr
63 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
67 define <8 x float> @load_splat_8f32_8f32_01010101(<8 x float>* %ptr) nounwind uwtable readnone ssp {
68 ; SSE2-LABEL: load_splat_8f32_8f32_01010101:
69 ; SSE2: # %bb.0: # %entry
70 ; SSE2-NEXT: movaps (%rdi), %xmm0
71 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
72 ; SSE2-NEXT: movaps %xmm0, %xmm1
75 ; SSE42-LABEL: load_splat_8f32_8f32_01010101:
76 ; SSE42: # %bb.0: # %entry
77 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
78 ; SSE42-NEXT: movapd %xmm0, %xmm1
81 ; AVX-LABEL: load_splat_8f32_8f32_01010101:
82 ; AVX: # %bb.0: # %entry
83 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
86 %ld = load <8 x float>, <8 x float>* %ptr
87 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
91 define <4 x i32> @load_splat_4i32_4i32_0101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
92 ; SSE-LABEL: load_splat_4i32_4i32_0101:
93 ; SSE: # %bb.0: # %entry
94 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
97 ; AVX1-LABEL: load_splat_4i32_4i32_0101:
98 ; AVX1: # %bb.0: # %entry
99 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
102 ; AVX2-LABEL: load_splat_4i32_4i32_0101:
103 ; AVX2: # %bb.0: # %entry
104 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
107 ; AVX512-LABEL: load_splat_4i32_4i32_0101:
108 ; AVX512: # %bb.0: # %entry
109 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
112 %ld = load <4 x i32>, <4 x i32>* %ptr
113 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
117 define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
118 ; SSE-LABEL: load_splat_8i32_4i32_01010101:
119 ; SSE: # %bb.0: # %entry
120 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
121 ; SSE-NEXT: movdqa %xmm0, %xmm1
124 ; AVX1-LABEL: load_splat_8i32_4i32_01010101:
125 ; AVX1: # %bb.0: # %entry
126 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
127 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
130 ; AVX2-LABEL: load_splat_8i32_4i32_01010101:
131 ; AVX2: # %bb.0: # %entry
132 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
135 ; AVX512-LABEL: load_splat_8i32_4i32_01010101:
136 ; AVX512: # %bb.0: # %entry
137 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
140 %ld = load <4 x i32>, <4 x i32>* %ptr
141 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
145 define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
146 ; SSE-LABEL: load_splat_8i32_8i32_01010101:
147 ; SSE: # %bb.0: # %entry
148 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
149 ; SSE-NEXT: movdqa %xmm0, %xmm1
152 ; AVX-LABEL: load_splat_8i32_8i32_01010101:
153 ; AVX: # %bb.0: # %entry
154 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
157 %ld = load <8 x i32>, <8 x i32>* %ptr
158 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
162 define <8 x i16> @load_splat_8i16_8i16_01010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
163 ; SSE-LABEL: load_splat_8i16_8i16_01010101:
164 ; SSE: # %bb.0: # %entry
165 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
168 ; AVX1-LABEL: load_splat_8i16_8i16_01010101:
169 ; AVX1: # %bb.0: # %entry
170 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
173 ; AVX2-LABEL: load_splat_8i16_8i16_01010101:
174 ; AVX2: # %bb.0: # %entry
175 ; AVX2-NEXT: vbroadcastss (%rdi), %xmm0
178 ; AVX512-LABEL: load_splat_8i16_8i16_01010101:
179 ; AVX512: # %bb.0: # %entry
180 ; AVX512-NEXT: vbroadcastss (%rdi), %xmm0
183 %ld = load <8 x i16>, <8 x i16>* %ptr
184 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
188 define <8 x i16> @load_splat_8i16_8i16_01230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
189 ; SSE-LABEL: load_splat_8i16_8i16_01230123:
190 ; SSE: # %bb.0: # %entry
191 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
194 ; AVX1-LABEL: load_splat_8i16_8i16_01230123:
195 ; AVX1: # %bb.0: # %entry
196 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
199 ; AVX2-LABEL: load_splat_8i16_8i16_01230123:
200 ; AVX2: # %bb.0: # %entry
201 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
204 ; AVX512-LABEL: load_splat_8i16_8i16_01230123:
205 ; AVX512: # %bb.0: # %entry
206 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
209 %ld = load <8 x i16>, <8 x i16>* %ptr
210 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
214 define <16 x i16> @load_splat_16i16_8i16_0101010101010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
215 ; SSE-LABEL: load_splat_16i16_8i16_0101010101010101:
216 ; SSE: # %bb.0: # %entry
217 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
218 ; SSE-NEXT: movdqa %xmm0, %xmm1
221 ; AVX1-LABEL: load_splat_16i16_8i16_0101010101010101:
222 ; AVX1: # %bb.0: # %entry
223 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
224 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
227 ; AVX2-LABEL: load_splat_16i16_8i16_0101010101010101:
228 ; AVX2: # %bb.0: # %entry
229 ; AVX2-NEXT: vbroadcastss (%rdi), %ymm0
232 ; AVX512-LABEL: load_splat_16i16_8i16_0101010101010101:
233 ; AVX512: # %bb.0: # %entry
234 ; AVX512-NEXT: vbroadcastss (%rdi), %ymm0
237 %ld = load <8 x i16>, <8 x i16>* %ptr
238 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
242 define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
243 ; SSE-LABEL: load_splat_16i16_8i16_0123012301230123:
244 ; SSE: # %bb.0: # %entry
245 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
246 ; SSE-NEXT: movdqa %xmm0, %xmm1
249 ; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123:
250 ; AVX1: # %bb.0: # %entry
251 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
252 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
255 ; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123:
256 ; AVX2: # %bb.0: # %entry
257 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
260 ; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123:
261 ; AVX512: # %bb.0: # %entry
262 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
265 %ld = load <8 x i16>, <8 x i16>* %ptr
266 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
270 define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
271 ; SSE-LABEL: load_splat_16i16_16i16_0101010101010101:
272 ; SSE: # %bb.0: # %entry
273 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
274 ; SSE-NEXT: movdqa %xmm0, %xmm1
277 ; AVX-LABEL: load_splat_16i16_16i16_0101010101010101:
278 ; AVX: # %bb.0: # %entry
279 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
282 %ld = load <16 x i16>, <16 x i16>* %ptr
283 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
287 define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
288 ; SSE-LABEL: load_splat_16i16_16i16_0123012301230123:
289 ; SSE: # %bb.0: # %entry
290 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
291 ; SSE-NEXT: movdqa %xmm0, %xmm1
294 ; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
295 ; AVX: # %bb.0: # %entry
296 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
299 %ld = load <16 x i16>, <16 x i16>* %ptr
300 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
304 define <16 x i8> @load_splat_16i8_16i8_0101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
305 ; SSE-LABEL: load_splat_16i8_16i8_0101010101010101:
306 ; SSE: # %bb.0: # %entry
307 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
308 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
311 ; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101:
312 ; AVX1: # %bb.0: # %entry
313 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
314 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
317 ; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101:
318 ; AVX2: # %bb.0: # %entry
319 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
322 ; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101:
323 ; AVX512: # %bb.0: # %entry
324 ; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0
327 %ld = load <16 x i8>, <16 x i8>* %ptr
328 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
332 define <16 x i8> @load_splat_16i8_16i8_0123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
333 ; SSE-LABEL: load_splat_16i8_16i8_0123012301230123:
334 ; SSE: # %bb.0: # %entry
335 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
338 ; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123:
339 ; AVX1: # %bb.0: # %entry
340 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
343 ; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123:
344 ; AVX2: # %bb.0: # %entry
345 ; AVX2-NEXT: vbroadcastss (%rdi), %xmm0
348 ; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123:
349 ; AVX512: # %bb.0: # %entry
350 ; AVX512-NEXT: vbroadcastss (%rdi), %xmm0
353 %ld = load <16 x i8>, <16 x i8>* %ptr
354 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
358 define <16 x i8> @load_splat_16i8_16i8_0123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
359 ; SSE-LABEL: load_splat_16i8_16i8_0123456701234567:
360 ; SSE: # %bb.0: # %entry
361 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
364 ; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567:
365 ; AVX1: # %bb.0: # %entry
366 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
369 ; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567:
370 ; AVX2: # %bb.0: # %entry
371 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
374 ; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567:
375 ; AVX512: # %bb.0: # %entry
376 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
379 %ld = load <16 x i8>, <16 x i8>* %ptr
380 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384 define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
385 ; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
386 ; SSE: # %bb.0: # %entry
387 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
388 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
389 ; SSE-NEXT: movdqa %xmm0, %xmm1
392 ; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
393 ; AVX1: # %bb.0: # %entry
394 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
395 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
399 ; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
400 ; AVX2: # %bb.0: # %entry
401 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
404 ; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
405 ; AVX512: # %bb.0: # %entry
406 ; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0
409 %ld = load <16 x i8>, <16 x i8>* %ptr
410 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
414 define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
415 ; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
416 ; SSE: # %bb.0: # %entry
417 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
418 ; SSE-NEXT: movdqa %xmm0, %xmm1
421 ; AVX1-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
422 ; AVX1: # %bb.0: # %entry
423 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
424 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
427 ; AVX2-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
428 ; AVX2: # %bb.0: # %entry
429 ; AVX2-NEXT: vbroadcastss (%rdi), %ymm0
432 ; AVX512-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
433 ; AVX512: # %bb.0: # %entry
434 ; AVX512-NEXT: vbroadcastss (%rdi), %ymm0
437 %ld = load <16 x i8>, <16 x i8>* %ptr
438 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
442 define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
443 ; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
444 ; SSE: # %bb.0: # %entry
445 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
446 ; SSE-NEXT: movdqa %xmm0, %xmm1
449 ; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
450 ; AVX1: # %bb.0: # %entry
451 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
455 ; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
456 ; AVX2: # %bb.0: # %entry
457 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
460 ; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
461 ; AVX512: # %bb.0: # %entry
462 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
465 %ld = load <16 x i8>, <16 x i8>* %ptr
466 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
470 define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
471 ; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
472 ; SSE: # %bb.0: # %entry
473 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
474 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
475 ; SSE-NEXT: movdqa %xmm0, %xmm1
478 ; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
479 ; AVX1: # %bb.0: # %entry
480 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
481 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
482 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
485 ; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
486 ; AVX2: # %bb.0: # %entry
487 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
490 ; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
491 ; AVX512: # %bb.0: # %entry
492 ; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0
495 %ld = load <32 x i8>, <32 x i8>* %ptr
496 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
500 define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
501 ; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
502 ; SSE: # %bb.0: # %entry
503 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
504 ; SSE-NEXT: movdqa %xmm0, %xmm1
507 ; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
508 ; AVX: # %bb.0: # %entry
509 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
512 %ld = load <32 x i8>, <32 x i8>* %ptr
513 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
517 define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
518 ; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
519 ; SSE: # %bb.0: # %entry
520 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
521 ; SSE-NEXT: movdqa %xmm0, %xmm1
524 ; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
525 ; AVX: # %bb.0: # %entry
526 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
529 %ld = load <32 x i8>, <32 x i8>* %ptr
530 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
534 define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtable readnone ssp {
535 ; SSE-LABEL: load_splat_4f32_8f32_0000:
536 ; SSE: # %bb.0: # %entry
537 ; SSE-NEXT: movaps (%rdi), %xmm0
538 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
541 ; AVX-LABEL: load_splat_4f32_8f32_0000:
542 ; AVX: # %bb.0: # %entry
543 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
546 %ld = load <8 x float>, <8 x float>* %ptr
547 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
551 define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind uwtable readnone ssp {
552 ; SSE2-LABEL: load_splat_8f32_16f32_89898989:
553 ; SSE2: # %bb.0: # %entry
554 ; SSE2-NEXT: movaps 32(%rdi), %xmm0
555 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
556 ; SSE2-NEXT: movaps %xmm0, %xmm1
559 ; SSE42-LABEL: load_splat_8f32_16f32_89898989:
560 ; SSE42: # %bb.0: # %entry
561 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
562 ; SSE42-NEXT: movapd %xmm0, %xmm1
565 ; AVX-LABEL: load_splat_8f32_16f32_89898989:
566 ; AVX: # %bb.0: # %entry
567 ; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0
570 %ld = load <16 x float>, <16 x float>* %ptr
571 %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
576 define <4 x i32> @load_splat_4i32_2i32_0101(<2 x i32>* %vp) {
577 ; SSE-LABEL: load_splat_4i32_2i32_0101:
579 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
580 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
583 ; AVX-LABEL: load_splat_4i32_2i32_0101:
585 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
587 %vec = load <2 x i32>, <2 x i32>* %vp
588 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
592 define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) {
593 ; SSE-LABEL: load_splat_8i32_2i32_0101:
595 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
596 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
597 ; SSE-NEXT: movdqa %xmm0, %xmm1
600 ; AVX-LABEL: load_splat_8i32_2i32_0101:
602 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
604 %vec = load <2 x i32>, <2 x i32>* %vp
605 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
609 define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) {
610 ; SSE-LABEL: load_splat_16i32_2i32_0101:
612 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
613 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
614 ; SSE-NEXT: movdqa %xmm0, %xmm1
615 ; SSE-NEXT: movdqa %xmm0, %xmm2
616 ; SSE-NEXT: movdqa %xmm0, %xmm3
619 ; AVX1-LABEL: load_splat_16i32_2i32_0101:
621 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
622 ; AVX1-NEXT: vmovaps %ymm0, %ymm1
625 ; AVX2-LABEL: load_splat_16i32_2i32_0101:
627 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
628 ; AVX2-NEXT: vmovaps %ymm0, %ymm1
631 ; AVX512-LABEL: load_splat_16i32_2i32_0101:
633 ; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
635 %vec = load <2 x i32>, <2 x i32>* %vp
636 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>