1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
7 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
9 ; X32: ## BB#0: ## %entry
10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
11 ; X32-NEXT: vpbroadcastb (%eax), %xmm0
15 ; X64: ## BB#0: ## %entry
16 ; X64-NEXT: vpbroadcastb (%rdi), %xmm0
19 %q = load i8, i8* %ptr, align 4
20 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
21 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
22 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
23 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
24 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
25 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
26 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
27 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
28 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
29 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
30 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
31 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
32 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
33 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
34 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
35 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
39 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
41 ; X32: ## BB#0: ## %entry
42 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
43 ; X32-NEXT: vpbroadcastb (%eax), %ymm0
47 ; X64: ## BB#0: ## %entry
48 ; X64-NEXT: vpbroadcastb (%rdi), %ymm0
51 %q = load i8, i8* %ptr, align 4
52 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
53 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
54 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
55 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
56 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
57 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
58 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
59 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
60 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
61 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
62 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
63 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
64 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
65 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
66 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
67 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
69 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16
70 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
71 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
72 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
73 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
74 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
75 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
76 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
77 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
78 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
79 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
80 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
81 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
82 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
83 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
84 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
88 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
90 ; X32: ## BB#0: ## %entry
91 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
92 ; X32-NEXT: vpbroadcastw (%eax), %xmm0
96 ; X64: ## BB#0: ## %entry
97 ; X64-NEXT: vpbroadcastw (%rdi), %xmm0
100 %q = load i16, i16* %ptr, align 4
101 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
102 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
103 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
104 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
105 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
106 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
107 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
108 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
112 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
114 ; X32: ## BB#0: ## %entry
115 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
116 ; X32-NEXT: vpbroadcastw (%eax), %ymm0
120 ; X64: ## BB#0: ## %entry
121 ; X64-NEXT: vpbroadcastw (%rdi), %ymm0
124 %q = load i16, i16* %ptr, align 4
125 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
126 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
127 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
128 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
129 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
130 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
131 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
132 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
133 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
134 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
135 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
136 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
137 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
138 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
139 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
140 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
144 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
146 ; X32: ## BB#0: ## %entry
147 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
148 ; X32-NEXT: vbroadcastss (%eax), %xmm0
152 ; X64: ## BB#0: ## %entry
153 ; X64-NEXT: vbroadcastss (%rdi), %xmm0
156 %q = load i32, i32* %ptr, align 4
157 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
158 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
159 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
160 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
164 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
166 ; X32: ## BB#0: ## %entry
167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
168 ; X32-NEXT: vbroadcastss (%eax), %ymm0
172 ; X64: ## BB#0: ## %entry
173 ; X64-NEXT: vbroadcastss (%rdi), %ymm0
176 %q = load i32, i32* %ptr, align 4
177 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
178 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
179 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
180 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
181 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
182 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
183 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
184 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
188 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
190 ; X32: ## BB#0: ## %entry
191 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
192 ; X32-NEXT: movl (%eax), %ecx
193 ; X32-NEXT: movl 4(%eax), %eax
194 ; X32-NEXT: vmovd %ecx, %xmm0
195 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
196 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
197 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
201 ; X64: ## BB#0: ## %entry
202 ; X64-NEXT: vpbroadcastq (%rdi), %xmm0
205 %q = load i64, i64* %ptr, align 4
206 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
207 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
211 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
213 ; X32: ## BB#0: ## %entry
214 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
215 ; X32-NEXT: movl (%eax), %ecx
216 ; X32-NEXT: movl 4(%eax), %eax
217 ; X32-NEXT: vmovd %ecx, %xmm0
218 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
219 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
220 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
221 ; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
225 ; X64: ## BB#0: ## %entry
226 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0
229 %q = load i64, i64* %ptr, align 4
230 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
231 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
232 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
233 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
237 define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
238 ; X32-LABEL: broadcast_mem_v4i16_v8i16:
240 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
244 ; X64-LABEL: broadcast_mem_v4i16_v8i16:
246 ; X64-NEXT: vpbroadcastq (%rdi), %xmm0
248 %load = load <4 x i16>, <4 x i16>* %ptr
249 %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
253 define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
254 ; X32-LABEL: broadcast_mem_v4i16_v16i16:
256 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
257 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
258 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
261 ; X64-LABEL: broadcast_mem_v4i16_v16i16:
263 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0
265 %load = load <4 x i16>, <4 x i16>* %ptr
266 %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
270 ; FIXME: Pointer adjusted broadcasts
272 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
273 ; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
274 ; X32: ## BB#0: ## %entry
275 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
276 ; X32-NEXT: vpbroadcastb 1(%eax), %xmm0
279 ; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
280 ; X64: ## BB#0: ## %entry
281 ; X64-NEXT: vpbroadcastb 1(%rdi), %xmm0
284 %ld = load <16 x i8>, <16 x i8>* %ptr
285 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
289 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
290 ; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
291 ; X32: ## BB#0: ## %entry
292 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
293 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
296 ; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
297 ; X64: ## BB#0: ## %entry
298 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
301 %ld = load <16 x i8>, <16 x i8>* %ptr
302 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
306 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
307 ; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
308 ; X32: ## BB#0: ## %entry
309 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
310 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
313 ; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
314 ; X64: ## BB#0: ## %entry
315 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
318 %ld = load <32 x i8>, <32 x i8>* %ptr
319 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
323 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
324 ; X32-LABEL: load_splat_8i16_8i16_11111111:
325 ; X32: ## BB#0: ## %entry
326 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
327 ; X32-NEXT: vpbroadcastw 2(%eax), %xmm0
330 ; X64-LABEL: load_splat_8i16_8i16_11111111:
331 ; X64: ## BB#0: ## %entry
332 ; X64-NEXT: vpbroadcastw 2(%rdi), %xmm0
335 %ld = load <8 x i16>, <8 x i16>* %ptr
336 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
340 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
341 ; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
342 ; X32: ## BB#0: ## %entry
343 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
344 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
347 ; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
348 ; X64: ## BB#0: ## %entry
349 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
352 %ld = load <8 x i16>, <8 x i16>* %ptr
353 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
357 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
358 ; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
359 ; X32: ## BB#0: ## %entry
360 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
361 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
364 ; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
365 ; X64: ## BB#0: ## %entry
366 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
369 %ld = load <16 x i16>, <16 x i16>* %ptr
370 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
374 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
375 ; X32-LABEL: load_splat_4i32_4i32_1111:
376 ; X32: ## BB#0: ## %entry
377 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
378 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0
381 ; X64-LABEL: load_splat_4i32_4i32_1111:
382 ; X64: ## BB#0: ## %entry
383 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
386 %ld = load <4 x i32>, <4 x i32>* %ptr
387 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
391 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
392 ; X32-LABEL: load_splat_8i32_4i32_33333333:
393 ; X32: ## BB#0: ## %entry
394 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
395 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0
398 ; X64-LABEL: load_splat_8i32_4i32_33333333:
399 ; X64: ## BB#0: ## %entry
400 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
403 %ld = load <4 x i32>, <4 x i32>* %ptr
404 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
408 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
409 ; X32-LABEL: load_splat_8i32_8i32_55555555:
410 ; X32: ## BB#0: ## %entry
411 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
412 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0
415 ; X64-LABEL: load_splat_8i32_8i32_55555555:
416 ; X64: ## BB#0: ## %entry
417 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
420 %ld = load <8 x i32>, <8 x i32>* %ptr
421 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
425 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
426 ; X32-LABEL: load_splat_4f32_4f32_1111:
427 ; X32: ## BB#0: ## %entry
428 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
429 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0
432 ; X64-LABEL: load_splat_4f32_4f32_1111:
433 ; X64: ## BB#0: ## %entry
434 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
437 %ld = load <4 x float>, <4 x float>* %ptr
438 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
442 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
443 ; X32-LABEL: load_splat_8f32_4f32_33333333:
444 ; X32: ## BB#0: ## %entry
445 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
446 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0
449 ; X64-LABEL: load_splat_8f32_4f32_33333333:
450 ; X64: ## BB#0: ## %entry
451 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
454 %ld = load <4 x float>, <4 x float>* %ptr
455 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
459 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
460 ; X32-LABEL: load_splat_8f32_8f32_55555555:
461 ; X32: ## BB#0: ## %entry
462 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
463 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0
466 ; X64-LABEL: load_splat_8f32_8f32_55555555:
467 ; X64: ## BB#0: ## %entry
468 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
471 %ld = load <8 x float>, <8 x float>* %ptr
472 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
476 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
477 ; X32-LABEL: load_splat_2i64_2i64_1111:
478 ; X32: ## BB#0: ## %entry
479 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
480 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
483 ; X64-LABEL: load_splat_2i64_2i64_1111:
484 ; X64: ## BB#0: ## %entry
485 ; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0
488 %ld = load <2 x i64>, <2 x i64>* %ptr
489 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
493 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
494 ; X32-LABEL: load_splat_4i64_2i64_1111:
495 ; X32: ## BB#0: ## %entry
496 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
497 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
500 ; X64-LABEL: load_splat_4i64_2i64_1111:
501 ; X64: ## BB#0: ## %entry
502 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
505 %ld = load <2 x i64>, <2 x i64>* %ptr
506 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
510 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
511 ; X32-LABEL: load_splat_4i64_4i64_2222:
512 ; X32: ## BB#0: ## %entry
513 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
514 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
517 ; X64-LABEL: load_splat_4i64_4i64_2222:
518 ; X64: ## BB#0: ## %entry
519 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
522 %ld = load <4 x i64>, <4 x i64>* %ptr
523 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
527 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
528 ; X32-LABEL: load_splat_2f64_2f64_1111:
529 ; X32: ## BB#0: ## %entry
530 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
531 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
534 ; X64-LABEL: load_splat_2f64_2f64_1111:
535 ; X64: ## BB#0: ## %entry
536 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
539 %ld = load <2 x double>, <2 x double>* %ptr
540 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
541 ret <2 x double> %ret
544 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
545 ; X32-LABEL: load_splat_4f64_2f64_1111:
546 ; X32: ## BB#0: ## %entry
547 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
548 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
551 ; X64-LABEL: load_splat_4f64_2f64_1111:
552 ; X64: ## BB#0: ## %entry
553 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
556 %ld = load <2 x double>, <2 x double>* %ptr
557 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
558 ret <4 x double> %ret
561 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
562 ; X32-LABEL: load_splat_4f64_4f64_2222:
563 ; X32: ## BB#0: ## %entry
564 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
565 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
568 ; X64-LABEL: load_splat_4f64_4f64_2222:
569 ; X64: ## BB#0: ## %entry
570 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
573 %ld = load <4 x double>, <4 x double>* %ptr
574 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
575 ret <4 x double> %ret
578 ; make sure that we still don't support broadcast double into 128-bit vector
580 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
582 ; X32: ## BB#0: ## %entry
583 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
584 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
588 ; X64: ## BB#0: ## %entry
589 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
592 %q = load double, double* %ptr, align 4
593 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
594 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
595 ret <2 x double> %vecinit2.i
598 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
599 ; X32-AVX2-LABEL: V111:
600 ; X32-AVX2: ## BB#0: ## %entry
601 ; X32-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
602 ; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
603 ; X32-AVX2-NEXT: retl
605 ; X64-AVX2-LABEL: V111:
606 ; X64-AVX2: ## BB#0: ## %entry
607 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
608 ; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
609 ; X64-AVX2-NEXT: retq
611 ; X32-AVX512VL-LABEL: V111:
612 ; X32-AVX512VL: ## BB#0: ## %entry
613 ; X32-AVX512VL-NEXT: vpaddd LCPI29_0{1to8}, %ymm0, %ymm0
614 ; X32-AVX512VL-NEXT: retl
616 ; X64-AVX512VL-LABEL: V111:
617 ; X64-AVX512VL: ## BB#0: ## %entry
618 ; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
619 ; X64-AVX512VL-NEXT: retq
621 %g = add <8 x i32> %in, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
625 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
626 ; X32-AVX2-LABEL: V113:
627 ; X32-AVX2: ## BB#0: ## %entry
628 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
629 ; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
630 ; X32-AVX2-NEXT: retl
632 ; X64-AVX2-LABEL: V113:
633 ; X64-AVX2: ## BB#0: ## %entry
634 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
635 ; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
636 ; X64-AVX2-NEXT: retq
638 ; X32-AVX512VL-LABEL: V113:
639 ; X32-AVX512VL: ## BB#0: ## %entry
640 ; X32-AVX512VL-NEXT: vaddps LCPI30_0{1to8}, %ymm0, %ymm0
641 ; X32-AVX512VL-NEXT: retl
643 ; X64-AVX512VL-LABEL: V113:
644 ; X64-AVX512VL: ## BB#0: ## %entry
645 ; X64-AVX512VL-NEXT: vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0
646 ; X64-AVX512VL-NEXT: retq
648 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
652 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
655 ; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
660 ; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
662 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
663 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
664 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
665 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
666 ret <4 x float> %vecinit6.i
669 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
672 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
677 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
679 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
680 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
681 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
682 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
683 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
684 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
685 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
686 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
687 ret <8 x i8> %vecinit7.i
690 define void @crash() nounwind alwaysinline {
692 ; X32: ## BB#0: ## %WGLoopsEntry
693 ; X32-NEXT: xorl %eax, %eax
694 ; X32-NEXT: testb %al, %al
695 ; X32-NEXT: je LBB33_1
696 ; X32-NEXT: ## BB#2: ## %ret
698 ; X32-NEXT: .p2align 4, 0x90
699 ; X32-NEXT: LBB33_1: ## %footer349VF
700 ; X32-NEXT: ## =>This Inner Loop Header: Depth=1
701 ; X32-NEXT: jmp LBB33_1
704 ; X64: ## BB#0: ## %WGLoopsEntry
705 ; X64-NEXT: xorl %eax, %eax
706 ; X64-NEXT: testb %al, %al
707 ; X64-NEXT: je LBB33_1
708 ; X64-NEXT: ## BB#2: ## %ret
710 ; X64-NEXT: .p2align 4, 0x90
711 ; X64-NEXT: LBB33_1: ## %footer349VF
712 ; X64-NEXT: ## =>This Inner Loop Header: Depth=1
713 ; X64-NEXT: jmp LBB33_1
715 br i1 undef, label %ret, label %footer329VF
718 %A.0.inVF = fmul float undef, 6.553600e+04
719 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
720 %A.0VF = fptosi float %A.0.inVF to i32
721 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
722 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
723 %1 = and i32 %A.0VF, 65535
724 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
725 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
726 br i1 undef, label %preload1201VF, label %footer349VF
729 br label %footer349VF
732 %2 = mul nsw <8 x i32> undef, %0
733 %3 = mul nsw <8 x i32> undef, %vector1099VF
734 br label %footer329VF
740 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
741 ; X32-LABEL: _inreg0:
743 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
746 ; X64-AVX2-LABEL: _inreg0:
748 ; X64-AVX2-NEXT: vmovd %edi, %xmm0
749 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
750 ; X64-AVX2-NEXT: retq
752 ; X64-AVX512VL-LABEL: _inreg0:
753 ; X64-AVX512VL: ## BB#0:
754 ; X64-AVX512VL-NEXT: vpbroadcastd %edi, %ymm0
755 ; X64-AVX512VL-NEXT: retq
756 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
757 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
761 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
762 ; X32-LABEL: _inreg1:
764 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
767 ; X64-LABEL: _inreg1:
769 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
771 %in = insertelement <8 x float> undef, float %scalar, i32 0
772 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
773 ret <8 x float> %wide
776 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
777 ; X32-LABEL: _inreg2:
779 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
782 ; X64-LABEL: _inreg2:
784 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
786 %in = insertelement <4 x float> undef, float %scalar, i32 0
787 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
788 ret <4 x float> %wide
791 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
792 ; X32-LABEL: _inreg3:
794 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
797 ; X64-LABEL: _inreg3:
799 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
801 %in = insertelement <4 x double> undef, double %scalar, i32 0
802 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
803 ret <4 x double> %wide
806 define <8 x float> @_inreg8xfloat(<8 x float> %a) {
807 ; X32-LABEL: _inreg8xfloat:
809 ; X32-NEXT: vbroadcastss %xmm0, %ymm0
812 ; X64-LABEL: _inreg8xfloat:
814 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
816 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
820 define <4 x float> @_inreg4xfloat(<4 x float> %a) {
821 ; X32-LABEL: _inreg4xfloat:
823 ; X32-NEXT: vbroadcastss %xmm0, %xmm0
826 ; X64-LABEL: _inreg4xfloat:
828 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
830 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
834 define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
835 ; X32-LABEL: _inreg16xi16:
837 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0
840 ; X64-LABEL: _inreg16xi16:
842 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0
844 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
848 define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
849 ; X32-LABEL: _inreg8xi16:
851 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0
854 ; X64-LABEL: _inreg8xi16:
856 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0
858 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
862 define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
863 ; X32-LABEL: _inreg4xi64:
865 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
868 ; X64-LABEL: _inreg4xi64:
870 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
872 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
876 define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
877 ; X32-LABEL: _inreg2xi64:
879 ; X32-NEXT: vpbroadcastq %xmm0, %xmm0
882 ; X64-LABEL: _inreg2xi64:
884 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0
886 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
890 define <4 x double> @_inreg4xdouble(<4 x double> %a) {
891 ; X32-LABEL: _inreg4xdouble:
893 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
896 ; X64-LABEL: _inreg4xdouble:
898 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
900 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
904 define <2 x double> @_inreg2xdouble(<2 x double> %a) {
905 ; X32-LABEL: _inreg2xdouble:
907 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
910 ; X64-LABEL: _inreg2xdouble:
912 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
914 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
918 define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
919 ; X32-LABEL: _inreg8xi32:
921 ; X32-NEXT: vbroadcastss %xmm0, %ymm0
924 ; X64-LABEL: _inreg8xi32:
926 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
928 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
932 define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
933 ; X32-LABEL: _inreg4xi32:
935 ; X32-NEXT: vbroadcastss %xmm0, %xmm0
938 ; X64-LABEL: _inreg4xi32:
940 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
942 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
946 define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
947 ; X32-LABEL: _inreg32xi8:
949 ; X32-NEXT: vpbroadcastb %xmm0, %ymm0
952 ; X64-LABEL: _inreg32xi8:
954 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0
956 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
960 define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
961 ; X32-LABEL: _inreg16xi8:
963 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0
966 ; X64-LABEL: _inreg16xi8:
968 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0
970 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
974 ; These tests check that a vbroadcast instruction is used when we have a splat
975 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
976 ; (via the insertelements).
978 define <8 x float> @splat_concat1(float %f) {
979 ; X32-LABEL: splat_concat1:
981 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
984 ; X64-LABEL: splat_concat1:
986 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
988 %1 = insertelement <4 x float> undef, float %f, i32 0
989 %2 = insertelement <4 x float> %1, float %f, i32 1
990 %3 = insertelement <4 x float> %2, float %f, i32 2
991 %4 = insertelement <4 x float> %3, float %f, i32 3
992 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
996 define <8 x float> @splat_concat2(float %f) {
997 ; X32-LABEL: splat_concat2:
999 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
1002 ; X64-LABEL: splat_concat2:
1004 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
1006 %1 = insertelement <4 x float> undef, float %f, i32 0
1007 %2 = insertelement <4 x float> %1, float %f, i32 1
1008 %3 = insertelement <4 x float> %2, float %f, i32 2
1009 %4 = insertelement <4 x float> %3, float %f, i32 3
1010 %5 = insertelement <4 x float> undef, float %f, i32 0
1011 %6 = insertelement <4 x float> %5, float %f, i32 1
1012 %7 = insertelement <4 x float> %6, float %f, i32 2
1013 %8 = insertelement <4 x float> %7, float %f, i32 3
1014 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1018 define <4 x double> @splat_concat3(double %d) {
1019 ; X32-LABEL: splat_concat3:
1021 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
1024 ; X64-LABEL: splat_concat3:
1026 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
1028 %1 = insertelement <2 x double> undef, double %d, i32 0
1029 %2 = insertelement <2 x double> %1, double %d, i32 1
1030 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1034 define <4 x double> @splat_concat4(double %d) {
1035 ; X32-LABEL: splat_concat4:
1037 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
1040 ; X64-LABEL: splat_concat4:
1042 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
1044 %1 = insertelement <2 x double> undef, double %d, i32 0
1045 %2 = insertelement <2 x double> %1, double %d, i32 1
1046 %3 = insertelement <2 x double> undef, double %d, i32 0
1047 %4 = insertelement <2 x double> %3, double %d, i32 1
1048 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1052 ; Test cases for <rdar://problem/16074331>.
1053 ; Instruction selection for broacast instruction fails if
1054 ; the load cannot be folded into the broadcast.
1055 ; This happens if the load has initial one use but other uses are
1056 ; created later, or if selection DAG cannot prove that folding the
1057 ; load will not create a cycle in the DAG.
1058 ; Those test cases exerce the latter.
1060 define void @isel_crash_16b(i8* %cV_R.addr) {
1061 ; X32-LABEL: isel_crash_16b:
1062 ; X32: ## BB#0: ## %eintry
1063 ; X32-NEXT: subl $60, %esp
1064 ; X32-NEXT: .cfi_def_cfa_offset 64
1065 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1066 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1067 ; X32-NEXT: vmovaps %xmm0, (%esp)
1068 ; X32-NEXT: vpbroadcastb (%eax), %xmm1
1069 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1070 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
1071 ; X32-NEXT: addl $60, %esp
1074 ; X64-LABEL: isel_crash_16b:
1075 ; X64: ## BB#0: ## %eintry
1076 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1077 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1078 ; X64-NEXT: movb (%rdi), %al
1079 ; X64-NEXT: vmovd %eax, %xmm1
1080 ; X64-NEXT: vpbroadcastb %xmm1, %xmm1
1081 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1082 ; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1085 %__a.addr.i = alloca <2 x i64>, align 16
1086 %__b.addr.i = alloca <2 x i64>, align 16
1087 %vCr = alloca <2 x i64>, align 16
1088 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1089 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1090 %tmp2 = load i8, i8* %cV_R.addr, align 4
1091 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
1092 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
1093 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
1094 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1095 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1099 define void @isel_crash_32b(i8* %cV_R.addr) {
1100 ; X32-LABEL: isel_crash_32b:
1101 ; X32: ## BB#0: ## %eintry
1102 ; X32-NEXT: pushl %ebp
1103 ; X32-NEXT: .cfi_def_cfa_offset 8
1104 ; X32-NEXT: .cfi_offset %ebp, -8
1105 ; X32-NEXT: movl %esp, %ebp
1106 ; X32-NEXT: .cfi_def_cfa_register %ebp
1107 ; X32-NEXT: andl $-32, %esp
1108 ; X32-NEXT: subl $128, %esp
1109 ; X32-NEXT: movl 8(%ebp), %eax
1110 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1111 ; X32-NEXT: vmovaps %ymm0, (%esp)
1112 ; X32-NEXT: vpbroadcastb (%eax), %ymm1
1113 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1114 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1115 ; X32-NEXT: movl %ebp, %esp
1116 ; X32-NEXT: popl %ebp
1117 ; X32-NEXT: vzeroupper
1120 ; X64-LABEL: isel_crash_32b:
1121 ; X64: ## BB#0: ## %eintry
1122 ; X64-NEXT: pushq %rbp
1123 ; X64-NEXT: .cfi_def_cfa_offset 16
1124 ; X64-NEXT: .cfi_offset %rbp, -16
1125 ; X64-NEXT: movq %rsp, %rbp
1126 ; X64-NEXT: .cfi_def_cfa_register %rbp
1127 ; X64-NEXT: andq $-32, %rsp
1128 ; X64-NEXT: subq $128, %rsp
1129 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1130 ; X64-NEXT: vmovaps %ymm0, (%rsp)
1131 ; X64-NEXT: movb (%rdi), %al
1132 ; X64-NEXT: vmovd %eax, %xmm1
1133 ; X64-NEXT: vpbroadcastb %xmm1, %ymm1
1134 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1135 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1136 ; X64-NEXT: movq %rbp, %rsp
1137 ; X64-NEXT: popq %rbp
1138 ; X64-NEXT: vzeroupper
1141 %__a.addr.i = alloca <4 x i64>, align 16
1142 %__b.addr.i = alloca <4 x i64>, align 16
1143 %vCr = alloca <4 x i64>, align 16
1144 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1145 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1146 %tmp2 = load i8, i8* %cV_R.addr, align 4
1147 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
1148 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
1149 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
1150 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1151 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1155 define void @isel_crash_8w(i16* %cV_R.addr) {
1156 ; X32-LABEL: isel_crash_8w:
1157 ; X32: ## BB#0: ## %entry
1158 ; X32-NEXT: subl $60, %esp
1159 ; X32-NEXT: .cfi_def_cfa_offset 64
1160 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1161 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1162 ; X32-NEXT: vmovaps %xmm0, (%esp)
1163 ; X32-NEXT: vpbroadcastw (%eax), %xmm1
1164 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1165 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
1166 ; X32-NEXT: addl $60, %esp
1169 ; X64-LABEL: isel_crash_8w:
1170 ; X64: ## BB#0: ## %entry
1171 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1172 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1173 ; X64-NEXT: movzwl (%rdi), %eax
1174 ; X64-NEXT: vmovd %eax, %xmm1
1175 ; X64-NEXT: vpbroadcastw %xmm1, %xmm1
1176 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1177 ; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1180 %__a.addr.i = alloca <2 x i64>, align 16
1181 %__b.addr.i = alloca <2 x i64>, align 16
1182 %vCr = alloca <2 x i64>, align 16
1183 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1184 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1185 %tmp2 = load i16, i16* %cV_R.addr, align 4
1186 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
1187 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
1188 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
1189 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1190 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1194 define void @isel_crash_16w(i16* %cV_R.addr) {
1195 ; X32-LABEL: isel_crash_16w:
1196 ; X32: ## BB#0: ## %eintry
1197 ; X32-NEXT: pushl %ebp
1198 ; X32-NEXT: .cfi_def_cfa_offset 8
1199 ; X32-NEXT: .cfi_offset %ebp, -8
1200 ; X32-NEXT: movl %esp, %ebp
1201 ; X32-NEXT: .cfi_def_cfa_register %ebp
1202 ; X32-NEXT: andl $-32, %esp
1203 ; X32-NEXT: subl $128, %esp
1204 ; X32-NEXT: movl 8(%ebp), %eax
1205 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1206 ; X32-NEXT: vmovaps %ymm0, (%esp)
1207 ; X32-NEXT: vpbroadcastw (%eax), %ymm1
1208 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1209 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1210 ; X32-NEXT: movl %ebp, %esp
1211 ; X32-NEXT: popl %ebp
1212 ; X32-NEXT: vzeroupper
1215 ; X64-LABEL: isel_crash_16w:
1216 ; X64: ## BB#0: ## %eintry
1217 ; X64-NEXT: pushq %rbp
1218 ; X64-NEXT: .cfi_def_cfa_offset 16
1219 ; X64-NEXT: .cfi_offset %rbp, -16
1220 ; X64-NEXT: movq %rsp, %rbp
1221 ; X64-NEXT: .cfi_def_cfa_register %rbp
1222 ; X64-NEXT: andq $-32, %rsp
1223 ; X64-NEXT: subq $128, %rsp
1224 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1225 ; X64-NEXT: vmovaps %ymm0, (%rsp)
1226 ; X64-NEXT: movzwl (%rdi), %eax
1227 ; X64-NEXT: vmovd %eax, %xmm1
1228 ; X64-NEXT: vpbroadcastw %xmm1, %ymm1
1229 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1230 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1231 ; X64-NEXT: movq %rbp, %rsp
1232 ; X64-NEXT: popq %rbp
1233 ; X64-NEXT: vzeroupper
1236 %__a.addr.i = alloca <4 x i64>, align 16
1237 %__b.addr.i = alloca <4 x i64>, align 16
1238 %vCr = alloca <4 x i64>, align 16
1239 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1240 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1241 %tmp2 = load i16, i16* %cV_R.addr, align 4
1242 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
1243 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
1244 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
1245 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1246 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1250 define void @isel_crash_4d(i32* %cV_R.addr) {
1251 ; X32-LABEL: isel_crash_4d:
1252 ; X32: ## BB#0: ## %entry
1253 ; X32-NEXT: subl $60, %esp
1254 ; X32-NEXT: .cfi_def_cfa_offset 64
1255 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1256 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1257 ; X32-NEXT: vmovaps %xmm0, (%esp)
1258 ; X32-NEXT: vbroadcastss (%eax), %xmm1
1259 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1260 ; X32-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1261 ; X32-NEXT: addl $60, %esp
1264 ; X64-AVX2-LABEL: isel_crash_4d:
1265 ; X64-AVX2: ## BB#0: ## %entry
1266 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1267 ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1268 ; X64-AVX2-NEXT: movl (%rdi), %eax
1269 ; X64-AVX2-NEXT: vmovd %eax, %xmm1
1270 ; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
1271 ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1272 ; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1273 ; X64-AVX2-NEXT: retq
1275 ; X64-AVX512VL-LABEL: isel_crash_4d:
1276 ; X64-AVX512VL: ## BB#0: ## %entry
1277 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
1278 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1279 ; X64-AVX512VL-NEXT: movl (%rdi), %eax
1280 ; X64-AVX512VL-NEXT: vpbroadcastd %eax, %xmm1
1281 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1282 ; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1283 ; X64-AVX512VL-NEXT: retq
1285 %__a.addr.i = alloca <2 x i64>, align 16
1286 %__b.addr.i = alloca <2 x i64>, align 16
1287 %vCr = alloca <2 x i64>, align 16
1288 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1289 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1290 %tmp2 = load i32, i32* %cV_R.addr, align 4
1291 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
1292 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1293 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
1294 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1295 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1299 define void @isel_crash_8d(i32* %cV_R.addr) {
1300 ; X32-LABEL: isel_crash_8d:
1301 ; X32: ## BB#0: ## %eintry
1302 ; X32-NEXT: pushl %ebp
1303 ; X32-NEXT: .cfi_def_cfa_offset 8
1304 ; X32-NEXT: .cfi_offset %ebp, -8
1305 ; X32-NEXT: movl %esp, %ebp
1306 ; X32-NEXT: .cfi_def_cfa_register %ebp
1307 ; X32-NEXT: andl $-32, %esp
1308 ; X32-NEXT: subl $128, %esp
1309 ; X32-NEXT: movl 8(%ebp), %eax
1310 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1311 ; X32-NEXT: vmovaps %ymm0, (%esp)
1312 ; X32-NEXT: vbroadcastss (%eax), %ymm1
1313 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1314 ; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
1315 ; X32-NEXT: movl %ebp, %esp
1316 ; X32-NEXT: popl %ebp
1317 ; X32-NEXT: vzeroupper
1320 ; X64-AVX2-LABEL: isel_crash_8d:
1321 ; X64-AVX2: ## BB#0: ## %eintry
1322 ; X64-AVX2-NEXT: pushq %rbp
1323 ; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
1324 ; X64-AVX2-NEXT: .cfi_offset %rbp, -16
1325 ; X64-AVX2-NEXT: movq %rsp, %rbp
1326 ; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
1327 ; X64-AVX2-NEXT: andq $-32, %rsp
1328 ; X64-AVX2-NEXT: subq $128, %rsp
1329 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1330 ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
1331 ; X64-AVX2-NEXT: movl (%rdi), %eax
1332 ; X64-AVX2-NEXT: vmovd %eax, %xmm1
1333 ; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1334 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1335 ; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1336 ; X64-AVX2-NEXT: movq %rbp, %rsp
1337 ; X64-AVX2-NEXT: popq %rbp
1338 ; X64-AVX2-NEXT: vzeroupper
1339 ; X64-AVX2-NEXT: retq
1341 ; X64-AVX512VL-LABEL: isel_crash_8d:
1342 ; X64-AVX512VL: ## BB#0: ## %eintry
1343 ; X64-AVX512VL-NEXT: pushq %rbp
1344 ; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
1345 ; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
1346 ; X64-AVX512VL-NEXT: movq %rsp, %rbp
1347 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
1348 ; X64-AVX512VL-NEXT: andq $-32, %rsp
1349 ; X64-AVX512VL-NEXT: subq $128, %rsp
1350 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
1351 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
1352 ; X64-AVX512VL-NEXT: movl (%rdi), %eax
1353 ; X64-AVX512VL-NEXT: vpbroadcastd %eax, %ymm1
1354 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1355 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1356 ; X64-AVX512VL-NEXT: movq %rbp, %rsp
1357 ; X64-AVX512VL-NEXT: popq %rbp
1358 ; X64-AVX512VL-NEXT: vzeroupper
1359 ; X64-AVX512VL-NEXT: retq
1361 %__a.addr.i = alloca <4 x i64>, align 16
1362 %__b.addr.i = alloca <4 x i64>, align 16
1363 %vCr = alloca <4 x i64>, align 16
1364 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1365 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1366 %tmp2 = load i32, i32* %cV_R.addr, align 4
1367 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
1368 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
1369 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
1370 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1371 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1375 define void @isel_crash_2q(i64* %cV_R.addr) {
1376 ; X32-LABEL: isel_crash_2q:
1377 ; X32: ## BB#0: ## %entry
1378 ; X32-NEXT: subl $60, %esp
1379 ; X32-NEXT: .cfi_def_cfa_offset 64
1380 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1381 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1382 ; X32-NEXT: vmovaps %xmm0, (%esp)
1383 ; X32-NEXT: movl (%eax), %ecx
1384 ; X32-NEXT: movl 4(%eax), %eax
1385 ; X32-NEXT: vmovd %ecx, %xmm1
1386 ; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
1387 ; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
1388 ; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
1389 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1390 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
1391 ; X32-NEXT: addl $60, %esp
1394 ; X64-AVX2-LABEL: isel_crash_2q:
1395 ; X64-AVX2: ## BB#0: ## %entry
1396 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1397 ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1398 ; X64-AVX2-NEXT: movq (%rdi), %rax
1399 ; X64-AVX2-NEXT: vmovq %rax, %xmm1
1400 ; X64-AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
1401 ; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1402 ; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1403 ; X64-AVX2-NEXT: retq
1405 ; X64-AVX512VL-LABEL: isel_crash_2q:
1406 ; X64-AVX512VL: ## BB#0: ## %entry
1407 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
1408 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1409 ; X64-AVX512VL-NEXT: movq (%rdi), %rax
1410 ; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm1
1411 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1412 ; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
1413 ; X64-AVX512VL-NEXT: retq
1415 %__a.addr.i = alloca <2 x i64>, align 16
1416 %__b.addr.i = alloca <2 x i64>, align 16
1417 %vCr = alloca <2 x i64>, align 16
1418 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1419 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1420 %tmp2 = load i64, i64* %cV_R.addr, align 4
1421 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
1422 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
1423 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1424 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
1428 define void @isel_crash_4q(i64* %cV_R.addr) {
1429 ; X32-LABEL: isel_crash_4q:
1430 ; X32: ## BB#0: ## %eintry
1431 ; X32-NEXT: pushl %ebp
1432 ; X32-NEXT: .cfi_def_cfa_offset 8
1433 ; X32-NEXT: .cfi_offset %ebp, -8
1434 ; X32-NEXT: movl %esp, %ebp
1435 ; X32-NEXT: .cfi_def_cfa_register %ebp
1436 ; X32-NEXT: andl $-32, %esp
1437 ; X32-NEXT: subl $128, %esp
1438 ; X32-NEXT: movl 8(%ebp), %eax
1439 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1440 ; X32-NEXT: vmovaps %ymm0, (%esp)
1441 ; X32-NEXT: movl (%eax), %ecx
1442 ; X32-NEXT: movl 4(%eax), %eax
1443 ; X32-NEXT: vmovd %ecx, %xmm1
1444 ; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
1445 ; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
1446 ; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
1447 ; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
1448 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1449 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1450 ; X32-NEXT: movl %ebp, %esp
1451 ; X32-NEXT: popl %ebp
1452 ; X32-NEXT: vzeroupper
1455 ; X64-AVX2-LABEL: isel_crash_4q:
1456 ; X64-AVX2: ## BB#0: ## %eintry
1457 ; X64-AVX2-NEXT: pushq %rbp
1458 ; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
1459 ; X64-AVX2-NEXT: .cfi_offset %rbp, -16
1460 ; X64-AVX2-NEXT: movq %rsp, %rbp
1461 ; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
1462 ; X64-AVX2-NEXT: andq $-32, %rsp
1463 ; X64-AVX2-NEXT: subq $128, %rsp
1464 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1465 ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
1466 ; X64-AVX2-NEXT: movq (%rdi), %rax
1467 ; X64-AVX2-NEXT: vmovq %rax, %xmm1
1468 ; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
1469 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1470 ; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1471 ; X64-AVX2-NEXT: movq %rbp, %rsp
1472 ; X64-AVX2-NEXT: popq %rbp
1473 ; X64-AVX2-NEXT: vzeroupper
1474 ; X64-AVX2-NEXT: retq
1476 ; X64-AVX512VL-LABEL: isel_crash_4q:
1477 ; X64-AVX512VL: ## BB#0: ## %eintry
1478 ; X64-AVX512VL-NEXT: pushq %rbp
1479 ; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
1480 ; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
1481 ; X64-AVX512VL-NEXT: movq %rsp, %rbp
1482 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
1483 ; X64-AVX512VL-NEXT: andq $-32, %rsp
1484 ; X64-AVX512VL-NEXT: subq $128, %rsp
1485 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
1486 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
1487 ; X64-AVX512VL-NEXT: movq (%rdi), %rax
1488 ; X64-AVX512VL-NEXT: vpbroadcastq %rax, %ymm1
1489 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1490 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1491 ; X64-AVX512VL-NEXT: movq %rbp, %rsp
1492 ; X64-AVX512VL-NEXT: popq %rbp
1493 ; X64-AVX512VL-NEXT: vzeroupper
1494 ; X64-AVX512VL-NEXT: retq
1496 %__a.addr.i = alloca <4 x i64>, align 16
1497 %__b.addr.i = alloca <4 x i64>, align 16
1498 %vCr = alloca <4 x i64>, align 16
1499 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1500 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1501 %tmp2 = load i64, i64* %cV_R.addr, align 4
1502 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
1503 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
1504 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1505 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16