1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
8 ; Widened shuffle broadcast loads
10 define <4 x float> @load_splat_4f32_4f32_0101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
11 ; SSE2-LABEL: load_splat_4f32_4f32_0101:
12 ; SSE2: # %bb.0: # %entry
13 ; SSE2-NEXT: movaps (%rdi), %xmm0
14 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
17 ; SSE42-LABEL: load_splat_4f32_4f32_0101:
18 ; SSE42: # %bb.0: # %entry
19 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
22 ; AVX-LABEL: load_splat_4f32_4f32_0101:
23 ; AVX: # %bb.0: # %entry
24 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
27 %ld = load <4 x float>, <4 x float>* %ptr
28 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
32 define <8 x float> @load_splat_8f32_4f32_01010101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
33 ; SSE2-LABEL: load_splat_8f32_4f32_01010101:
34 ; SSE2: # %bb.0: # %entry
35 ; SSE2-NEXT: movaps (%rdi), %xmm0
36 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
37 ; SSE2-NEXT: movaps %xmm0, %xmm1
40 ; SSE42-LABEL: load_splat_8f32_4f32_01010101:
41 ; SSE42: # %bb.0: # %entry
42 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
43 ; SSE42-NEXT: movapd %xmm0, %xmm1
46 ; AVX-LABEL: load_splat_8f32_4f32_01010101:
47 ; AVX: # %bb.0: # %entry
48 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
51 %ld = load <4 x float>, <4 x float>* %ptr
52 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
56 define <8 x float> @load_splat_8f32_8f32_01010101(<8 x float>* %ptr) nounwind uwtable readnone ssp {
57 ; SSE2-LABEL: load_splat_8f32_8f32_01010101:
58 ; SSE2: # %bb.0: # %entry
59 ; SSE2-NEXT: movaps (%rdi), %xmm0
60 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
61 ; SSE2-NEXT: movaps %xmm0, %xmm1
64 ; SSE42-LABEL: load_splat_8f32_8f32_01010101:
65 ; SSE42: # %bb.0: # %entry
66 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
67 ; SSE42-NEXT: movapd %xmm0, %xmm1
70 ; AVX-LABEL: load_splat_8f32_8f32_01010101:
71 ; AVX: # %bb.0: # %entry
72 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
75 %ld = load <8 x float>, <8 x float>* %ptr
76 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
80 define <4 x i32> @load_splat_4i32_4i32_0101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
81 ; SSE-LABEL: load_splat_4i32_4i32_0101:
82 ; SSE: # %bb.0: # %entry
83 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
86 ; AVX1-LABEL: load_splat_4i32_4i32_0101:
87 ; AVX1: # %bb.0: # %entry
88 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
91 ; AVX2-LABEL: load_splat_4i32_4i32_0101:
92 ; AVX2: # %bb.0: # %entry
93 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
96 ; AVX512-LABEL: load_splat_4i32_4i32_0101:
97 ; AVX512: # %bb.0: # %entry
98 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
101 %ld = load <4 x i32>, <4 x i32>* %ptr
102 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
106 define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
107 ; SSE-LABEL: load_splat_8i32_4i32_01010101:
108 ; SSE: # %bb.0: # %entry
109 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
110 ; SSE-NEXT: movdqa %xmm0, %xmm1
113 ; AVX-LABEL: load_splat_8i32_4i32_01010101:
114 ; AVX: # %bb.0: # %entry
115 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
118 %ld = load <4 x i32>, <4 x i32>* %ptr
119 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
123 define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
124 ; SSE-LABEL: load_splat_8i32_8i32_01010101:
125 ; SSE: # %bb.0: # %entry
126 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
127 ; SSE-NEXT: movdqa %xmm0, %xmm1
130 ; AVX-LABEL: load_splat_8i32_8i32_01010101:
131 ; AVX: # %bb.0: # %entry
132 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
135 %ld = load <8 x i32>, <8 x i32>* %ptr
136 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
140 define <8 x i16> @load_splat_8i16_8i16_01010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
141 ; SSE-LABEL: load_splat_8i16_8i16_01010101:
142 ; SSE: # %bb.0: # %entry
143 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
146 ; AVX1-LABEL: load_splat_8i16_8i16_01010101:
147 ; AVX1: # %bb.0: # %entry
148 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
151 ; AVX2-LABEL: load_splat_8i16_8i16_01010101:
152 ; AVX2: # %bb.0: # %entry
153 ; AVX2-NEXT: vbroadcastss (%rdi), %xmm0
156 ; AVX512-LABEL: load_splat_8i16_8i16_01010101:
157 ; AVX512: # %bb.0: # %entry
158 ; AVX512-NEXT: vbroadcastss (%rdi), %xmm0
161 %ld = load <8 x i16>, <8 x i16>* %ptr
162 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
166 define <8 x i16> @load_splat_8i16_8i16_01230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
167 ; SSE-LABEL: load_splat_8i16_8i16_01230123:
168 ; SSE: # %bb.0: # %entry
169 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
172 ; AVX1-LABEL: load_splat_8i16_8i16_01230123:
173 ; AVX1: # %bb.0: # %entry
174 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
177 ; AVX2-LABEL: load_splat_8i16_8i16_01230123:
178 ; AVX2: # %bb.0: # %entry
179 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
182 ; AVX512-LABEL: load_splat_8i16_8i16_01230123:
183 ; AVX512: # %bb.0: # %entry
184 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
187 %ld = load <8 x i16>, <8 x i16>* %ptr
188 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
192 define <16 x i16> @load_splat_16i16_8i16_0101010101010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
193 ; SSE-LABEL: load_splat_16i16_8i16_0101010101010101:
194 ; SSE: # %bb.0: # %entry
195 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
196 ; SSE-NEXT: movdqa %xmm0, %xmm1
199 ; AVX-LABEL: load_splat_16i16_8i16_0101010101010101:
200 ; AVX: # %bb.0: # %entry
201 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
204 %ld = load <8 x i16>, <8 x i16>* %ptr
205 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
209 define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
210 ; SSE-LABEL: load_splat_16i16_8i16_0123012301230123:
211 ; SSE: # %bb.0: # %entry
212 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
213 ; SSE-NEXT: movdqa %xmm0, %xmm1
216 ; AVX-LABEL: load_splat_16i16_8i16_0123012301230123:
217 ; AVX: # %bb.0: # %entry
218 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
221 %ld = load <8 x i16>, <8 x i16>* %ptr
222 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
226 define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
227 ; SSE-LABEL: load_splat_16i16_16i16_0101010101010101:
228 ; SSE: # %bb.0: # %entry
229 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
230 ; SSE-NEXT: movdqa %xmm0, %xmm1
233 ; AVX-LABEL: load_splat_16i16_16i16_0101010101010101:
234 ; AVX: # %bb.0: # %entry
235 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
238 %ld = load <16 x i16>, <16 x i16>* %ptr
239 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
243 define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
244 ; SSE-LABEL: load_splat_16i16_16i16_0123012301230123:
245 ; SSE: # %bb.0: # %entry
246 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
247 ; SSE-NEXT: movdqa %xmm0, %xmm1
250 ; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
251 ; AVX: # %bb.0: # %entry
252 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
255 %ld = load <16 x i16>, <16 x i16>* %ptr
256 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
260 define <16 x i8> @load_splat_16i8_16i8_0101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
261 ; SSE-LABEL: load_splat_16i8_16i8_0101010101010101:
262 ; SSE: # %bb.0: # %entry
263 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
264 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
267 ; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101:
268 ; AVX1: # %bb.0: # %entry
269 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
270 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
273 ; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101:
274 ; AVX2: # %bb.0: # %entry
275 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
278 ; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101:
279 ; AVX512: # %bb.0: # %entry
280 ; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0
283 %ld = load <16 x i8>, <16 x i8>* %ptr
284 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
288 define <16 x i8> @load_splat_16i8_16i8_0123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
289 ; SSE-LABEL: load_splat_16i8_16i8_0123012301230123:
290 ; SSE: # %bb.0: # %entry
291 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
294 ; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123:
295 ; AVX1: # %bb.0: # %entry
296 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
299 ; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123:
300 ; AVX2: # %bb.0: # %entry
301 ; AVX2-NEXT: vbroadcastss (%rdi), %xmm0
304 ; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123:
305 ; AVX512: # %bb.0: # %entry
306 ; AVX512-NEXT: vbroadcastss (%rdi), %xmm0
309 %ld = load <16 x i8>, <16 x i8>* %ptr
310 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
314 define <16 x i8> @load_splat_16i8_16i8_0123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
315 ; SSE-LABEL: load_splat_16i8_16i8_0123456701234567:
316 ; SSE: # %bb.0: # %entry
317 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
320 ; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567:
321 ; AVX1: # %bb.0: # %entry
322 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
325 ; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567:
326 ; AVX2: # %bb.0: # %entry
327 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
330 ; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567:
331 ; AVX512: # %bb.0: # %entry
332 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
335 %ld = load <16 x i8>, <16 x i8>* %ptr
336 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
340 define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
341 ; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
342 ; SSE: # %bb.0: # %entry
343 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
344 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
345 ; SSE-NEXT: movdqa %xmm0, %xmm1
348 ; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
349 ; AVX1: # %bb.0: # %entry
350 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
351 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
352 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
355 ; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
356 ; AVX2: # %bb.0: # %entry
357 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
360 ; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
361 ; AVX512: # %bb.0: # %entry
362 ; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0
365 %ld = load <16 x i8>, <16 x i8>* %ptr
366 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
370 define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
371 ; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
372 ; SSE: # %bb.0: # %entry
373 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
374 ; SSE-NEXT: movdqa %xmm0, %xmm1
377 ; AVX-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
378 ; AVX: # %bb.0: # %entry
379 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
382 %ld = load <16 x i8>, <16 x i8>* %ptr
383 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
387 define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
388 ; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
389 ; SSE: # %bb.0: # %entry
390 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
391 ; SSE-NEXT: movdqa %xmm0, %xmm1
394 ; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
395 ; AVX: # %bb.0: # %entry
396 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
399 %ld = load <16 x i8>, <16 x i8>* %ptr
400 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
404 define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
405 ; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
406 ; SSE: # %bb.0: # %entry
407 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
408 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
409 ; SSE-NEXT: movdqa %xmm0, %xmm1
412 ; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
413 ; AVX1: # %bb.0: # %entry
414 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
415 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
416 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
419 ; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
420 ; AVX2: # %bb.0: # %entry
421 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
424 ; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
425 ; AVX512: # %bb.0: # %entry
426 ; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0
429 %ld = load <32 x i8>, <32 x i8>* %ptr
430 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
434 define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
435 ; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
436 ; SSE: # %bb.0: # %entry
437 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
438 ; SSE-NEXT: movdqa %xmm0, %xmm1
441 ; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
442 ; AVX: # %bb.0: # %entry
443 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
446 %ld = load <32 x i8>, <32 x i8>* %ptr
447 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
451 define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
452 ; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
453 ; SSE: # %bb.0: # %entry
454 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
455 ; SSE-NEXT: movdqa %xmm0, %xmm1
458 ; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
459 ; AVX: # %bb.0: # %entry
460 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
463 %ld = load <32 x i8>, <32 x i8>* %ptr
464 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
468 define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtable readnone ssp {
469 ; SSE-LABEL: load_splat_4f32_8f32_0000:
470 ; SSE: # %bb.0: # %entry
471 ; SSE-NEXT: movaps (%rdi), %xmm0
472 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
475 ; AVX-LABEL: load_splat_4f32_8f32_0000:
476 ; AVX: # %bb.0: # %entry
477 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
480 %ld = load <8 x float>, <8 x float>* %ptr
481 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
485 define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind uwtable readnone ssp {
486 ; SSE2-LABEL: load_splat_8f32_16f32_89898989:
487 ; SSE2: # %bb.0: # %entry
488 ; SSE2-NEXT: movaps 32(%rdi), %xmm0
489 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
490 ; SSE2-NEXT: movaps %xmm0, %xmm1
493 ; SSE42-LABEL: load_splat_8f32_16f32_89898989:
494 ; SSE42: # %bb.0: # %entry
495 ; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
496 ; SSE42-NEXT: movapd %xmm0, %xmm1
499 ; AVX-LABEL: load_splat_8f32_16f32_89898989:
500 ; AVX: # %bb.0: # %entry
501 ; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0
504 %ld = load <16 x float>, <16 x float>* %ptr
505 %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
510 define <4 x i32> @load_splat_4i32_2i32_0101(<2 x i32>* %vp) {
511 ; SSE-LABEL: load_splat_4i32_2i32_0101:
513 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
514 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
517 ; AVX-LABEL: load_splat_4i32_2i32_0101:
519 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
521 %vec = load <2 x i32>, <2 x i32>* %vp
522 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
526 define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) {
527 ; SSE-LABEL: load_splat_8i32_2i32_0101:
529 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
530 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
531 ; SSE-NEXT: movdqa %xmm0, %xmm1
534 ; AVX-LABEL: load_splat_8i32_2i32_0101:
536 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
538 %vec = load <2 x i32>, <2 x i32>* %vp
539 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
543 define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) {
544 ; SSE-LABEL: load_splat_16i32_2i32_0101:
546 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
547 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
548 ; SSE-NEXT: movdqa %xmm0, %xmm1
549 ; SSE-NEXT: movdqa %xmm0, %xmm2
550 ; SSE-NEXT: movdqa %xmm0, %xmm3
553 ; AVX1-LABEL: load_splat_16i32_2i32_0101:
555 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
556 ; AVX1-NEXT: vmovaps %ymm0, %ymm1
559 ; AVX2-LABEL: load_splat_16i32_2i32_0101:
561 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
562 ; AVX2-NEXT: vmovaps %ymm0, %ymm1
565 ; AVX512-LABEL: load_splat_16i32_2i32_0101:
567 ; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
569 %vec = load <2 x i32>, <2 x i32>* %vp
570 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>