1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW
8 ; RUN: llc < %s -mtriple=i686-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,X86AVX2
10 define <16 x i8> @undef_index(i8 %x) nounwind {
11 ; ALL-LABEL: undef_index:
13 ; ALL-NEXT: ret{{[l|q]}}
14 %ins = insertelement <16 x i8> undef, i8 %x, i64 undef
18 define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
19 ; ALL-LABEL: undef_scalar:
21 ; ALL-NEXT: ret{{[l|q]}}
22 %ins = insertelement <16 x i8> %x, i8 undef, i32 %index
27 ; Insertion into undef vectors
30 define <16 x i8> @arg_i8_v16i8_undef(i8 %x, i32 %y) nounwind {
31 ; SSE2-LABEL: arg_i8_v16i8_undef:
33 ; SSE2-NEXT: movd %edi, %xmm0
34 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
35 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
36 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
39 ; SSE41-LABEL: arg_i8_v16i8_undef:
41 ; SSE41-NEXT: movd %edi, %xmm0
42 ; SSE41-NEXT: pxor %xmm1, %xmm1
43 ; SSE41-NEXT: pshufb %xmm1, %xmm0
46 ; AVX1-LABEL: arg_i8_v16i8_undef:
48 ; AVX1-NEXT: vmovd %edi, %xmm0
49 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
50 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
53 ; AVX2-LABEL: arg_i8_v16i8_undef:
55 ; AVX2-NEXT: vmovd %edi, %xmm0
56 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
59 ; AVX512F-LABEL: arg_i8_v16i8_undef:
61 ; AVX512F-NEXT: vmovd %edi, %xmm0
62 ; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0
65 ; AVX512BW-LABEL: arg_i8_v16i8_undef:
67 ; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0
70 ; X86AVX2-LABEL: arg_i8_v16i8_undef:
72 ; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0
74 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
78 define <8 x i16> @arg_i16_v8i16_undef(i16 %x, i32 %y) nounwind {
79 ; SSE-LABEL: arg_i16_v8i16_undef:
81 ; SSE-NEXT: movd %edi, %xmm0
82 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
83 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
86 ; AVX1-LABEL: arg_i16_v8i16_undef:
88 ; AVX1-NEXT: vmovd %edi, %xmm0
89 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
90 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
93 ; AVX2-LABEL: arg_i16_v8i16_undef:
95 ; AVX2-NEXT: vmovd %edi, %xmm0
96 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
99 ; AVX512F-LABEL: arg_i16_v8i16_undef:
101 ; AVX512F-NEXT: vmovd %edi, %xmm0
102 ; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
105 ; AVX512BW-LABEL: arg_i16_v8i16_undef:
107 ; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0
108 ; AVX512BW-NEXT: retq
110 ; X86AVX2-LABEL: arg_i16_v8i16_undef:
112 ; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0
114 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
118 define <4 x i32> @arg_i32_v4i32_undef(i32 %x, i32 %y) nounwind {
119 ; SSE-LABEL: arg_i32_v4i32_undef:
121 ; SSE-NEXT: movd %edi, %xmm0
122 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
125 ; AVX1-LABEL: arg_i32_v4i32_undef:
127 ; AVX1-NEXT: vmovd %edi, %xmm0
128 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
131 ; AVX2-LABEL: arg_i32_v4i32_undef:
133 ; AVX2-NEXT: vmovd %edi, %xmm0
134 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
137 ; AVX512-LABEL: arg_i32_v4i32_undef:
139 ; AVX512-NEXT: vpbroadcastd %edi, %xmm0
142 ; X86AVX2-LABEL: arg_i32_v4i32_undef:
144 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
146 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
150 define <2 x i64> @arg_i64_v2i64_undef(i64 %x, i32 %y) nounwind {
151 ; SSE-LABEL: arg_i64_v2i64_undef:
153 ; SSE-NEXT: movq %rdi, %xmm0
154 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
157 ; AVX1-LABEL: arg_i64_v2i64_undef:
159 ; AVX1-NEXT: vmovq %rdi, %xmm0
160 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
163 ; AVX2-LABEL: arg_i64_v2i64_undef:
165 ; AVX2-NEXT: vmovq %rdi, %xmm0
166 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
169 ; AVX512-LABEL: arg_i64_v2i64_undef:
171 ; AVX512-NEXT: vpbroadcastq %rdi, %xmm0
174 ; X86AVX2-LABEL: arg_i64_v2i64_undef:
176 ; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
178 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
182 define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind {
183 ; SSE-LABEL: arg_f32_v4f32_undef:
185 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
188 ; AVX1-LABEL: arg_f32_v4f32_undef:
190 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
193 ; AVX2-LABEL: arg_f32_v4f32_undef:
195 ; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
198 ; AVX512-LABEL: arg_f32_v4f32_undef:
200 ; AVX512-NEXT: vbroadcastss %xmm0, %xmm0
203 ; X86AVX2-LABEL: arg_f32_v4f32_undef:
205 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
207 %ins = insertelement <4 x float> undef, float %x, i32 %y
211 define <2 x double> @arg_f64_v2f64_undef(double %x, i32 %y) nounwind {
212 ; SSE2-LABEL: arg_f64_v2f64_undef:
214 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
217 ; SSE41-LABEL: arg_f64_v2f64_undef:
219 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
222 ; AVX-LABEL: arg_f64_v2f64_undef:
224 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
227 ; X86AVX2-LABEL: arg_f64_v2f64_undef:
229 ; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
231 %ins = insertelement <2 x double> undef, double %x, i32 %y
232 ret <2 x double> %ins
235 define <16 x i8> @load_i8_v16i8_undef(ptr %p, i32 %y) nounwind {
236 ; SSE2-LABEL: load_i8_v16i8_undef:
238 ; SSE2-NEXT: movzbl (%rdi), %eax
239 ; SSE2-NEXT: movd %eax, %xmm0
240 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
241 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
242 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
245 ; SSE41-LABEL: load_i8_v16i8_undef:
247 ; SSE41-NEXT: movzbl (%rdi), %eax
248 ; SSE41-NEXT: movd %eax, %xmm0
249 ; SSE41-NEXT: pxor %xmm1, %xmm1
250 ; SSE41-NEXT: pshufb %xmm1, %xmm0
253 ; AVX1-LABEL: load_i8_v16i8_undef:
255 ; AVX1-NEXT: movzbl (%rdi), %eax
256 ; AVX1-NEXT: vmovd %eax, %xmm0
257 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
258 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
261 ; AVX2-LABEL: load_i8_v16i8_undef:
263 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
266 ; AVX512-LABEL: load_i8_v16i8_undef:
268 ; AVX512-NEXT: vpbroadcastb (%rdi), %xmm0
271 ; X86AVX2-LABEL: load_i8_v16i8_undef:
273 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
274 ; X86AVX2-NEXT: vpbroadcastb (%eax), %xmm0
277 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
281 define <8 x i16> @load_i16_v8i16_undef(ptr %p, i32 %y) nounwind {
282 ; SSE-LABEL: load_i16_v8i16_undef:
284 ; SSE-NEXT: movzwl (%rdi), %eax
285 ; SSE-NEXT: movd %eax, %xmm0
286 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
287 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
290 ; AVX1-LABEL: load_i16_v8i16_undef:
292 ; AVX1-NEXT: movzwl (%rdi), %eax
293 ; AVX1-NEXT: vmovd %eax, %xmm0
294 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
295 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
298 ; AVX2-LABEL: load_i16_v8i16_undef:
300 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
303 ; AVX512-LABEL: load_i16_v8i16_undef:
305 ; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0
308 ; X86AVX2-LABEL: load_i16_v8i16_undef:
310 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
311 ; X86AVX2-NEXT: vpbroadcastw (%eax), %xmm0
313 %x = load i16, ptr %p
314 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
318 define <4 x i32> @load_i32_v4i32_undef(ptr %p, i32 %y) nounwind {
319 ; SSE-LABEL: load_i32_v4i32_undef:
321 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
322 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
325 ; AVX-LABEL: load_i32_v4i32_undef:
327 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
330 ; X86AVX2-LABEL: load_i32_v4i32_undef:
332 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
333 ; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0
335 %x = load i32, ptr %p
336 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
340 define <2 x i64> @load_i64_v2i64_undef(ptr %p, i32 %y) nounwind {
341 ; SSE-LABEL: load_i64_v2i64_undef:
343 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
344 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
347 ; AVX-LABEL: load_i64_v2i64_undef:
349 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
352 ; X86AVX2-LABEL: load_i64_v2i64_undef:
354 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
355 ; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
357 %x = load i64, ptr %p
358 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
362 define <4 x float> @load_f32_v4f32_undef(ptr %p, i32 %y) nounwind {
363 ; SSE-LABEL: load_f32_v4f32_undef:
365 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
366 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
369 ; AVX-LABEL: load_f32_v4f32_undef:
371 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
374 ; X86AVX2-LABEL: load_f32_v4f32_undef:
376 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
377 ; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0
379 %x = load float, ptr %p
380 %ins = insertelement <4 x float> undef, float %x, i32 %y
384 define <2 x double> @load_f64_v2f64_undef(ptr %p, i32 %y) nounwind {
385 ; SSE2-LABEL: load_f64_v2f64_undef:
387 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
388 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
391 ; SSE41-LABEL: load_f64_v2f64_undef:
393 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
396 ; AVX-LABEL: load_f64_v2f64_undef:
398 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
401 ; X86AVX2-LABEL: load_f64_v2f64_undef:
403 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
404 ; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
406 %x = load double, ptr %p
407 %ins = insertelement <2 x double> undef, double %x, i32 %y
408 ret <2 x double> %ins
411 define <32 x i8> @arg_i8_v32i8_undef(i8 %x, i32 %y) nounwind {
412 ; SSE-LABEL: arg_i8_v32i8_undef:
414 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
415 ; SSE-NEXT: andl $31, %esi
416 ; SSE-NEXT: movb %dil, -40(%rsp,%rsi)
417 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
418 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
421 ; AVX1-LABEL: arg_i8_v32i8_undef:
423 ; AVX1-NEXT: vmovd %edi, %xmm0
424 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
425 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
426 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
429 ; AVX2-LABEL: arg_i8_v32i8_undef:
431 ; AVX2-NEXT: vmovd %edi, %xmm0
432 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
435 ; AVX512F-LABEL: arg_i8_v32i8_undef:
437 ; AVX512F-NEXT: vmovd %edi, %xmm0
438 ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
441 ; AVX512BW-LABEL: arg_i8_v32i8_undef:
443 ; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0
444 ; AVX512BW-NEXT: retq
446 ; X86AVX2-LABEL: arg_i8_v32i8_undef:
448 ; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0
450 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
454 define <16 x i16> @arg_i16_v16i16_undef(i16 %x, i32 %y) nounwind {
455 ; SSE-LABEL: arg_i16_v16i16_undef:
457 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
458 ; SSE-NEXT: andl $15, %esi
459 ; SSE-NEXT: movw %di, -40(%rsp,%rsi,2)
460 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
461 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
464 ; AVX1-LABEL: arg_i16_v16i16_undef:
466 ; AVX1-NEXT: vmovd %edi, %xmm0
467 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
468 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
469 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
472 ; AVX2-LABEL: arg_i16_v16i16_undef:
474 ; AVX2-NEXT: vmovd %edi, %xmm0
475 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
478 ; AVX512F-LABEL: arg_i16_v16i16_undef:
480 ; AVX512F-NEXT: vmovd %edi, %xmm0
481 ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
484 ; AVX512BW-LABEL: arg_i16_v16i16_undef:
486 ; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0
487 ; AVX512BW-NEXT: retq
489 ; X86AVX2-LABEL: arg_i16_v16i16_undef:
491 ; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0
493 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
497 define <8 x i32> @arg_i32_v8i32_undef(i32 %x, i32 %y) nounwind {
498 ; SSE-LABEL: arg_i32_v8i32_undef:
500 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
501 ; SSE-NEXT: andl $7, %esi
502 ; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4)
503 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
504 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
507 ; AVX1-LABEL: arg_i32_v8i32_undef:
509 ; AVX1-NEXT: vmovd %edi, %xmm0
510 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
511 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
514 ; AVX2-LABEL: arg_i32_v8i32_undef:
516 ; AVX2-NEXT: vmovd %edi, %xmm0
517 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
520 ; AVX512-LABEL: arg_i32_v8i32_undef:
522 ; AVX512-NEXT: vpbroadcastd %edi, %ymm0
525 ; X86AVX2-LABEL: arg_i32_v8i32_undef:
527 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
529 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
533 define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind {
534 ; SSE-LABEL: arg_i64_v4i64_undef:
536 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
537 ; SSE-NEXT: andl $3, %esi
538 ; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8)
539 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
540 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
543 ; AVX1-LABEL: arg_i64_v4i64_undef:
545 ; AVX1-NEXT: vmovq %rdi, %xmm0
546 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
547 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
550 ; AVX2-LABEL: arg_i64_v4i64_undef:
552 ; AVX2-NEXT: vmovq %rdi, %xmm0
553 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
556 ; AVX512-LABEL: arg_i64_v4i64_undef:
558 ; AVX512-NEXT: vpbroadcastq %rdi, %ymm0
561 ; X86AVX2-LABEL: arg_i64_v4i64_undef:
563 ; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
565 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
569 define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind {
570 ; SSE-LABEL: arg_f32_v8f32_undef:
572 ; SSE-NEXT: # kill: def $edi killed $edi def $rdi
573 ; SSE-NEXT: andl $7, %edi
574 ; SSE-NEXT: movss %xmm0, -40(%rsp,%rdi,4)
575 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
576 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
579 ; AVX1-LABEL: arg_f32_v8f32_undef:
581 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
582 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
585 ; AVX2-LABEL: arg_f32_v8f32_undef:
587 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
590 ; AVX512-LABEL: arg_f32_v8f32_undef:
592 ; AVX512-NEXT: vbroadcastss %xmm0, %ymm0
595 ; X86AVX2-LABEL: arg_f32_v8f32_undef:
597 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
599 %ins = insertelement <8 x float> undef, float %x, i32 %y
603 define <4 x double> @arg_f64_v4f64_undef(double %x, i32 %y) nounwind {
604 ; SSE-LABEL: arg_f64_v4f64_undef:
606 ; SSE-NEXT: # kill: def $edi killed $edi def $rdi
607 ; SSE-NEXT: andl $3, %edi
608 ; SSE-NEXT: movsd %xmm0, -40(%rsp,%rdi,8)
609 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
610 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
613 ; AVX1-LABEL: arg_f64_v4f64_undef:
615 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
616 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
619 ; AVX2-LABEL: arg_f64_v4f64_undef:
621 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
624 ; AVX512-LABEL: arg_f64_v4f64_undef:
626 ; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
629 ; X86AVX2-LABEL: arg_f64_v4f64_undef:
631 ; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
633 %ins = insertelement <4 x double> undef, double %x, i32 %y
634 ret <4 x double> %ins
637 define <32 x i8> @load_i8_v32i8_undef(ptr %p, i32 %y) nounwind {
638 ; SSE-LABEL: load_i8_v32i8_undef:
640 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
641 ; SSE-NEXT: movzbl (%rdi), %eax
642 ; SSE-NEXT: andl $31, %esi
643 ; SSE-NEXT: movb %al, -40(%rsp,%rsi)
644 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
645 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
648 ; AVX1-LABEL: load_i8_v32i8_undef:
650 ; AVX1-NEXT: movzbl (%rdi), %eax
651 ; AVX1-NEXT: vmovd %eax, %xmm0
652 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
653 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
657 ; AVX2-LABEL: load_i8_v32i8_undef:
659 ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
662 ; AVX512-LABEL: load_i8_v32i8_undef:
664 ; AVX512-NEXT: vpbroadcastb (%rdi), %ymm0
667 ; X86AVX2-LABEL: load_i8_v32i8_undef:
669 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
670 ; X86AVX2-NEXT: vpbroadcastb (%eax), %ymm0
673 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
677 define <16 x i16> @load_i16_v16i16_undef(ptr %p, i32 %y) nounwind {
678 ; SSE-LABEL: load_i16_v16i16_undef:
680 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
681 ; SSE-NEXT: movzwl (%rdi), %eax
682 ; SSE-NEXT: andl $15, %esi
683 ; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2)
684 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
685 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
688 ; AVX1-LABEL: load_i16_v16i16_undef:
690 ; AVX1-NEXT: movzwl (%rdi), %eax
691 ; AVX1-NEXT: vmovd %eax, %xmm0
692 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
693 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
694 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
697 ; AVX2-LABEL: load_i16_v16i16_undef:
699 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
702 ; AVX512-LABEL: load_i16_v16i16_undef:
704 ; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0
707 ; X86AVX2-LABEL: load_i16_v16i16_undef:
709 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
710 ; X86AVX2-NEXT: vpbroadcastw (%eax), %ymm0
712 %x = load i16, ptr %p
713 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
717 define <8 x i32> @load_i32_v8i32_undef(ptr %p, i32 %y) nounwind {
718 ; SSE-LABEL: load_i32_v8i32_undef:
720 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
721 ; SSE-NEXT: movl (%rdi), %eax
722 ; SSE-NEXT: andl $7, %esi
723 ; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4)
724 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
725 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
728 ; AVX-LABEL: load_i32_v8i32_undef:
730 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
733 ; X86AVX2-LABEL: load_i32_v8i32_undef:
735 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
736 ; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0
738 %x = load i32, ptr %p
739 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
743 define <4 x i64> @load_i64_v4i64_undef(ptr %p, i32 %y) nounwind {
744 ; SSE-LABEL: load_i64_v4i64_undef:
746 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
747 ; SSE-NEXT: movq (%rdi), %rax
748 ; SSE-NEXT: andl $3, %esi
749 ; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8)
750 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
751 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
754 ; AVX-LABEL: load_i64_v4i64_undef:
756 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
759 ; X86AVX2-LABEL: load_i64_v4i64_undef:
761 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
762 ; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0
764 %x = load i64, ptr %p
765 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
769 define <8 x float> @load_f32_v8f32_undef(ptr %p, i32 %y) nounwind {
770 ; SSE-LABEL: load_f32_v8f32_undef:
772 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
773 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
774 ; SSE-NEXT: andl $7, %esi
775 ; SSE-NEXT: movss %xmm0, -40(%rsp,%rsi,4)
776 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
777 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
780 ; AVX-LABEL: load_f32_v8f32_undef:
782 ; AVX-NEXT: vbroadcastss (%rdi), %ymm0
785 ; X86AVX2-LABEL: load_f32_v8f32_undef:
787 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
788 ; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0
790 %x = load float, ptr %p
791 %ins = insertelement <8 x float> undef, float %x, i32 %y
795 define <4 x double> @load_f64_v4f64_undef(ptr %p, i32 %y) nounwind {
796 ; SSE-LABEL: load_f64_v4f64_undef:
798 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
799 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
800 ; SSE-NEXT: andl $3, %esi
801 ; SSE-NEXT: movsd %xmm0, -40(%rsp,%rsi,8)
802 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
803 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
806 ; AVX-LABEL: load_f64_v4f64_undef:
808 ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
811 ; X86AVX2-LABEL: load_f64_v4f64_undef:
813 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
814 ; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0
816 %x = load double, ptr %p
817 %ins = insertelement <4 x double> undef, double %x, i32 %y
818 ret <4 x double> %ins
822 ; Insertion into arg vectors
825 define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind {
826 ; SSE-LABEL: arg_i8_v16i8:
828 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
829 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
830 ; SSE-NEXT: andl $15, %esi
831 ; SSE-NEXT: movb %dil, -24(%rsp,%rsi)
832 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
835 ; AVX1OR2-LABEL: arg_i8_v16i8:
837 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
838 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
839 ; AVX1OR2-NEXT: andl $15, %esi
840 ; AVX1OR2-NEXT: movb %dil, -24(%rsp,%rsi)
841 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
844 ; AVX512F-LABEL: arg_i8_v16i8:
846 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
847 ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
848 ; AVX512F-NEXT: andl $15, %esi
849 ; AVX512F-NEXT: movb %dil, -24(%rsp,%rsi)
850 ; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
853 ; AVX512BW-LABEL: arg_i8_v16i8:
855 ; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1
856 ; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
857 ; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
858 ; AVX512BW-NEXT: retq
860 ; X86AVX2-LABEL: arg_i8_v16i8:
862 ; X86AVX2-NEXT: pushl %ebp
863 ; X86AVX2-NEXT: movl %esp, %ebp
864 ; X86AVX2-NEXT: andl $-16, %esp
865 ; X86AVX2-NEXT: subl $32, %esp
866 ; X86AVX2-NEXT: movl 12(%ebp), %eax
867 ; X86AVX2-NEXT: andl $15, %eax
868 ; X86AVX2-NEXT: movzbl 8(%ebp), %ecx
869 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
870 ; X86AVX2-NEXT: movb %cl, (%esp,%eax)
871 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
872 ; X86AVX2-NEXT: movl %ebp, %esp
873 ; X86AVX2-NEXT: popl %ebp
875 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y
879 define <8 x i16> @arg_i16_v8i16(<8 x i16> %v, i16 %x, i32 %y) nounwind {
880 ; SSE-LABEL: arg_i16_v8i16:
882 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
883 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
884 ; SSE-NEXT: andl $7, %esi
885 ; SSE-NEXT: movw %di, -24(%rsp,%rsi,2)
886 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
889 ; AVX1OR2-LABEL: arg_i16_v8i16:
891 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
892 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
893 ; AVX1OR2-NEXT: andl $7, %esi
894 ; AVX1OR2-NEXT: movw %di, -24(%rsp,%rsi,2)
895 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
898 ; AVX512F-LABEL: arg_i16_v8i16:
900 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
901 ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
902 ; AVX512F-NEXT: andl $7, %esi
903 ; AVX512F-NEXT: movw %di, -24(%rsp,%rsi,2)
904 ; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
907 ; AVX512BW-LABEL: arg_i16_v8i16:
909 ; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1
910 ; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
911 ; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
912 ; AVX512BW-NEXT: retq
914 ; X86AVX2-LABEL: arg_i16_v8i16:
916 ; X86AVX2-NEXT: pushl %ebp
917 ; X86AVX2-NEXT: movl %esp, %ebp
918 ; X86AVX2-NEXT: andl $-16, %esp
919 ; X86AVX2-NEXT: subl $32, %esp
920 ; X86AVX2-NEXT: movl 12(%ebp), %eax
921 ; X86AVX2-NEXT: andl $7, %eax
922 ; X86AVX2-NEXT: movzwl 8(%ebp), %ecx
923 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
924 ; X86AVX2-NEXT: movw %cx, (%esp,%eax,2)
925 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
926 ; X86AVX2-NEXT: movl %ebp, %esp
927 ; X86AVX2-NEXT: popl %ebp
929 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y
933 define <4 x i32> @arg_i32_v4i32(<4 x i32> %v, i32 %x, i32 %y) nounwind {
934 ; SSE-LABEL: arg_i32_v4i32:
936 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
937 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
938 ; SSE-NEXT: andl $3, %esi
939 ; SSE-NEXT: movl %edi, -24(%rsp,%rsi,4)
940 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
943 ; AVX1OR2-LABEL: arg_i32_v4i32:
945 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
946 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
947 ; AVX1OR2-NEXT: andl $3, %esi
948 ; AVX1OR2-NEXT: movl %edi, -24(%rsp,%rsi,4)
949 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
952 ; AVX512-LABEL: arg_i32_v4i32:
954 ; AVX512-NEXT: vpbroadcastd %esi, %xmm1
955 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
956 ; AVX512-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
959 ; X86AVX2-LABEL: arg_i32_v4i32:
961 ; X86AVX2-NEXT: pushl %ebp
962 ; X86AVX2-NEXT: movl %esp, %ebp
963 ; X86AVX2-NEXT: andl $-16, %esp
964 ; X86AVX2-NEXT: subl $32, %esp
965 ; X86AVX2-NEXT: movl 12(%ebp), %eax
966 ; X86AVX2-NEXT: andl $3, %eax
967 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
968 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
969 ; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4)
970 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
971 ; X86AVX2-NEXT: movl %ebp, %esp
972 ; X86AVX2-NEXT: popl %ebp
974 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y
978 define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
979 ; SSE-LABEL: arg_i64_v2i64:
981 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
982 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
983 ; SSE-NEXT: andl $1, %esi
984 ; SSE-NEXT: movq %rdi, -24(%rsp,%rsi,8)
985 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
988 ; AVX1OR2-LABEL: arg_i64_v2i64:
990 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
991 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
992 ; AVX1OR2-NEXT: andl $1, %esi
993 ; AVX1OR2-NEXT: movq %rdi, -24(%rsp,%rsi,8)
994 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
997 ; AVX512-LABEL: arg_i64_v2i64:
999 ; AVX512-NEXT: movl %esi, %eax
1000 ; AVX512-NEXT: vpbroadcastq %rax, %xmm1
1001 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1002 ; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
1005 ; X86AVX2-LABEL: arg_i64_v2i64:
1007 ; X86AVX2-NEXT: pushl %ebp
1008 ; X86AVX2-NEXT: movl %esp, %ebp
1009 ; X86AVX2-NEXT: pushl %esi
1010 ; X86AVX2-NEXT: andl $-16, %esp
1011 ; X86AVX2-NEXT: subl $48, %esp
1012 ; X86AVX2-NEXT: movl 8(%ebp), %edx
1013 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1014 ; X86AVX2-NEXT: movl 16(%ebp), %ecx
1015 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1016 ; X86AVX2-NEXT: addl %ecx, %ecx
1017 ; X86AVX2-NEXT: movl %ecx, %esi
1018 ; X86AVX2-NEXT: andl $3, %esi
1019 ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
1020 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1021 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1022 ; X86AVX2-NEXT: incl %ecx
1023 ; X86AVX2-NEXT: andl $3, %ecx
1024 ; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4)
1025 ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
1026 ; X86AVX2-NEXT: leal -4(%ebp), %esp
1027 ; X86AVX2-NEXT: popl %esi
1028 ; X86AVX2-NEXT: popl %ebp
1029 ; X86AVX2-NEXT: retl
1030 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y
1034 define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind {
1035 ; SSE2-LABEL: arg_f32_v4f32:
1037 ; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
1038 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1039 ; SSE2-NEXT: andl $3, %edi
1040 ; SSE2-NEXT: movss %xmm1, -24(%rsp,%rdi,4)
1041 ; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1044 ; SSE41-LABEL: arg_f32_v4f32:
1046 ; SSE41-NEXT: movaps %xmm0, %xmm2
1047 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1048 ; SSE41-NEXT: movd %edi, %xmm0
1049 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1050 ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1051 ; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2
1052 ; SSE41-NEXT: movaps %xmm2, %xmm0
1055 ; AVX1-LABEL: arg_f32_v4f32:
1057 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1058 ; AVX1-NEXT: vmovd %edi, %xmm2
1059 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1060 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1061 ; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1064 ; AVX2-LABEL: arg_f32_v4f32:
1066 ; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
1067 ; AVX2-NEXT: vmovd %edi, %xmm2
1068 ; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2
1069 ; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1070 ; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1073 ; AVX512-LABEL: arg_f32_v4f32:
1075 ; AVX512-NEXT: vpbroadcastd %edi, %xmm2
1076 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
1077 ; AVX512-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
1080 ; X86AVX2-LABEL: arg_f32_v4f32:
1082 ; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
1083 ; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
1084 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2
1085 ; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
1086 ; X86AVX2-NEXT: retl
1087 %ins = insertelement <4 x float> %v, float %x, i32 %y
1088 ret <4 x float> %ins
1091 define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind {
1092 ; SSE2-LABEL: arg_f64_v2f64:
1094 ; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
1095 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1096 ; SSE2-NEXT: andl $1, %edi
1097 ; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rdi,8)
1098 ; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1101 ; SSE41-LABEL: arg_f64_v2f64:
1103 ; SSE41-NEXT: movapd %xmm0, %xmm2
1104 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
1105 ; SSE41-NEXT: movd %edi, %xmm0
1106 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1107 ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1108 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
1109 ; SSE41-NEXT: movapd %xmm2, %xmm0
1112 ; AVX1-LABEL: arg_f64_v2f64:
1114 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1115 ; AVX1-NEXT: vmovd %edi, %xmm2
1116 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1117 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1118 ; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1121 ; AVX2-LABEL: arg_f64_v2f64:
1123 ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1124 ; AVX2-NEXT: movl %edi, %eax
1125 ; AVX2-NEXT: vmovq %rax, %xmm2
1126 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
1127 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1128 ; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1131 ; AVX512-LABEL: arg_f64_v2f64:
1133 ; AVX512-NEXT: movl %edi, %eax
1134 ; AVX512-NEXT: vpbroadcastq %rax, %xmm2
1135 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
1136 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
1139 ; X86AVX2-LABEL: arg_f64_v2f64:
1141 ; X86AVX2-NEXT: pushl %ebp
1142 ; X86AVX2-NEXT: movl %esp, %ebp
1143 ; X86AVX2-NEXT: andl $-16, %esp
1144 ; X86AVX2-NEXT: subl $32, %esp
1145 ; X86AVX2-NEXT: movl 16(%ebp), %eax
1146 ; X86AVX2-NEXT: andl $1, %eax
1147 ; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1148 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1149 ; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8)
1150 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1151 ; X86AVX2-NEXT: movl %ebp, %esp
1152 ; X86AVX2-NEXT: popl %ebp
1153 ; X86AVX2-NEXT: retl
1154 %ins = insertelement <2 x double> %v, double %x, i32 %y
1155 ret <2 x double> %ins
1158 define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
1159 ; SSE-LABEL: load_i8_v16i8:
1161 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1162 ; SSE-NEXT: movzbl (%rdi), %eax
1163 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1164 ; SSE-NEXT: andl $15, %esi
1165 ; SSE-NEXT: movb %al, -24(%rsp,%rsi)
1166 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1169 ; AVX1OR2-LABEL: load_i8_v16i8:
1171 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1172 ; AVX1OR2-NEXT: movzbl (%rdi), %eax
1173 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1174 ; AVX1OR2-NEXT: andl $15, %esi
1175 ; AVX1OR2-NEXT: movb %al, -24(%rsp,%rsi)
1176 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1177 ; AVX1OR2-NEXT: retq
1179 ; AVX512F-LABEL: load_i8_v16i8:
1181 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1182 ; AVX512F-NEXT: movzbl (%rdi), %eax
1183 ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1184 ; AVX512F-NEXT: andl $15, %esi
1185 ; AVX512F-NEXT: movb %al, -24(%rsp,%rsi)
1186 ; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1187 ; AVX512F-NEXT: retq
1189 ; AVX512BW-LABEL: load_i8_v16i8:
1190 ; AVX512BW: # %bb.0:
1191 ; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1
1192 ; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1193 ; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
1194 ; AVX512BW-NEXT: retq
1196 ; X86AVX2-LABEL: load_i8_v16i8:
1198 ; X86AVX2-NEXT: pushl %ebp
1199 ; X86AVX2-NEXT: movl %esp, %ebp
1200 ; X86AVX2-NEXT: andl $-16, %esp
1201 ; X86AVX2-NEXT: subl $32, %esp
1202 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1203 ; X86AVX2-NEXT: andl $15, %eax
1204 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1205 ; X86AVX2-NEXT: movzbl (%ecx), %ecx
1206 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1207 ; X86AVX2-NEXT: movb %cl, (%esp,%eax)
1208 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1209 ; X86AVX2-NEXT: movl %ebp, %esp
1210 ; X86AVX2-NEXT: popl %ebp
1211 ; X86AVX2-NEXT: retl
1212 %x = load i8, ptr %p
1213 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y
1217 define <8 x i16> @load_i16_v8i16(<8 x i16> %v, ptr %p, i32 %y) nounwind {
1218 ; SSE-LABEL: load_i16_v8i16:
1220 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1221 ; SSE-NEXT: movzwl (%rdi), %eax
1222 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1223 ; SSE-NEXT: andl $7, %esi
1224 ; SSE-NEXT: movw %ax, -24(%rsp,%rsi,2)
1225 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1228 ; AVX1OR2-LABEL: load_i16_v8i16:
1230 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1231 ; AVX1OR2-NEXT: movzwl (%rdi), %eax
1232 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1233 ; AVX1OR2-NEXT: andl $7, %esi
1234 ; AVX1OR2-NEXT: movw %ax, -24(%rsp,%rsi,2)
1235 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1236 ; AVX1OR2-NEXT: retq
1238 ; AVX512F-LABEL: load_i16_v8i16:
1240 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1241 ; AVX512F-NEXT: movzwl (%rdi), %eax
1242 ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1243 ; AVX512F-NEXT: andl $7, %esi
1244 ; AVX512F-NEXT: movw %ax, -24(%rsp,%rsi,2)
1245 ; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1246 ; AVX512F-NEXT: retq
1248 ; AVX512BW-LABEL: load_i16_v8i16:
1249 ; AVX512BW: # %bb.0:
1250 ; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1
1251 ; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1252 ; AVX512BW-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
1253 ; AVX512BW-NEXT: retq
1255 ; X86AVX2-LABEL: load_i16_v8i16:
1257 ; X86AVX2-NEXT: pushl %ebp
1258 ; X86AVX2-NEXT: movl %esp, %ebp
1259 ; X86AVX2-NEXT: andl $-16, %esp
1260 ; X86AVX2-NEXT: subl $32, %esp
1261 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1262 ; X86AVX2-NEXT: andl $7, %eax
1263 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1264 ; X86AVX2-NEXT: movzwl (%ecx), %ecx
1265 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1266 ; X86AVX2-NEXT: movw %cx, (%esp,%eax,2)
1267 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1268 ; X86AVX2-NEXT: movl %ebp, %esp
1269 ; X86AVX2-NEXT: popl %ebp
1270 ; X86AVX2-NEXT: retl
1271 %x = load i16, ptr %p
1272 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y
1276 define <4 x i32> @load_i32_v4i32(<4 x i32> %v, ptr %p, i32 %y) nounwind {
1277 ; SSE-LABEL: load_i32_v4i32:
1279 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1280 ; SSE-NEXT: movl (%rdi), %eax
1281 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1282 ; SSE-NEXT: andl $3, %esi
1283 ; SSE-NEXT: movl %eax, -24(%rsp,%rsi,4)
1284 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1287 ; AVX1OR2-LABEL: load_i32_v4i32:
1289 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1290 ; AVX1OR2-NEXT: movl (%rdi), %eax
1291 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1292 ; AVX1OR2-NEXT: andl $3, %esi
1293 ; AVX1OR2-NEXT: movl %eax, -24(%rsp,%rsi,4)
1294 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1295 ; AVX1OR2-NEXT: retq
1297 ; AVX512-LABEL: load_i32_v4i32:
1299 ; AVX512-NEXT: vpbroadcastd %esi, %xmm1
1300 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1301 ; AVX512-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
1304 ; X86AVX2-LABEL: load_i32_v4i32:
1306 ; X86AVX2-NEXT: pushl %ebp
1307 ; X86AVX2-NEXT: movl %esp, %ebp
1308 ; X86AVX2-NEXT: andl $-16, %esp
1309 ; X86AVX2-NEXT: subl $32, %esp
1310 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1311 ; X86AVX2-NEXT: andl $3, %eax
1312 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1313 ; X86AVX2-NEXT: movl (%ecx), %ecx
1314 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1315 ; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4)
1316 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1317 ; X86AVX2-NEXT: movl %ebp, %esp
1318 ; X86AVX2-NEXT: popl %ebp
1319 ; X86AVX2-NEXT: retl
1320 %x = load i32, ptr %p
1321 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y
1325 define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
1326 ; SSE-LABEL: load_i64_v2i64:
1328 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1329 ; SSE-NEXT: movq (%rdi), %rax
1330 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1331 ; SSE-NEXT: andl $1, %esi
1332 ; SSE-NEXT: movq %rax, -24(%rsp,%rsi,8)
1333 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1336 ; AVX1OR2-LABEL: load_i64_v2i64:
1338 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1339 ; AVX1OR2-NEXT: movq (%rdi), %rax
1340 ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1341 ; AVX1OR2-NEXT: andl $1, %esi
1342 ; AVX1OR2-NEXT: movq %rax, -24(%rsp,%rsi,8)
1343 ; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1344 ; AVX1OR2-NEXT: retq
1346 ; AVX512-LABEL: load_i64_v2i64:
1348 ; AVX512-NEXT: movl %esi, %eax
1349 ; AVX512-NEXT: vpbroadcastq %rax, %xmm1
1350 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1351 ; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
1354 ; X86AVX2-LABEL: load_i64_v2i64:
1356 ; X86AVX2-NEXT: pushl %ebp
1357 ; X86AVX2-NEXT: movl %esp, %ebp
1358 ; X86AVX2-NEXT: pushl %esi
1359 ; X86AVX2-NEXT: andl $-16, %esp
1360 ; X86AVX2-NEXT: subl $48, %esp
1361 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1362 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1363 ; X86AVX2-NEXT: movl (%ecx), %edx
1364 ; X86AVX2-NEXT: movl 4(%ecx), %ecx
1365 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1366 ; X86AVX2-NEXT: addl %eax, %eax
1367 ; X86AVX2-NEXT: movl %eax, %esi
1368 ; X86AVX2-NEXT: andl $3, %esi
1369 ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
1370 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1371 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1372 ; X86AVX2-NEXT: incl %eax
1373 ; X86AVX2-NEXT: andl $3, %eax
1374 ; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
1375 ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
1376 ; X86AVX2-NEXT: leal -4(%ebp), %esp
1377 ; X86AVX2-NEXT: popl %esi
1378 ; X86AVX2-NEXT: popl %ebp
1379 ; X86AVX2-NEXT: retl
1380 %x = load i64, ptr %p
1381 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y
1385 define <4 x float> @load_f32_v4f32(<4 x float> %v, ptr %p, i32 %y) nounwind {
1386 ; SSE2-LABEL: load_f32_v4f32:
1388 ; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
1389 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1390 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1391 ; SSE2-NEXT: andl $3, %esi
1392 ; SSE2-NEXT: movss %xmm1, -24(%rsp,%rsi,4)
1393 ; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1396 ; SSE41-LABEL: load_f32_v4f32:
1398 ; SSE41-NEXT: movaps %xmm0, %xmm1
1399 ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1400 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
1401 ; SSE41-NEXT: movd %esi, %xmm0
1402 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1403 ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1404 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
1405 ; SSE41-NEXT: movaps %xmm1, %xmm0
1408 ; AVX1-LABEL: load_f32_v4f32:
1410 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm1
1411 ; AVX1-NEXT: vmovd %esi, %xmm2
1412 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1413 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1414 ; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1417 ; AVX2-LABEL: load_f32_v4f32:
1419 ; AVX2-NEXT: vbroadcastss (%rdi), %xmm1
1420 ; AVX2-NEXT: vmovd %esi, %xmm2
1421 ; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2
1422 ; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1423 ; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1426 ; AVX512-LABEL: load_f32_v4f32:
1428 ; AVX512-NEXT: vpbroadcastd %esi, %xmm1
1429 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1430 ; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
1433 ; X86AVX2-LABEL: load_f32_v4f32:
1435 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1436 ; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
1437 ; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
1438 ; X86AVX2-NEXT: vbroadcastss (%eax), %xmm2
1439 ; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
1440 ; X86AVX2-NEXT: retl
1441 %x = load float, ptr %p
1442 %ins = insertelement <4 x float> %v, float %x, i32 %y
1443 ret <4 x float> %ins
1446 define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
1447 ; SSE2-LABEL: load_f64_v2f64:
1449 ; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
1450 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1451 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1452 ; SSE2-NEXT: andl $1, %esi
1453 ; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rsi,8)
1454 ; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1457 ; SSE41-LABEL: load_f64_v2f64:
1459 ; SSE41-NEXT: movapd %xmm0, %xmm1
1460 ; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
1461 ; SSE41-NEXT: movd %esi, %xmm0
1462 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1463 ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1464 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1465 ; SSE41-NEXT: movapd %xmm1, %xmm0
1468 ; AVX1-LABEL: load_f64_v2f64:
1470 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
1471 ; AVX1-NEXT: vmovd %esi, %xmm2
1472 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1473 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1474 ; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1477 ; AVX2-LABEL: load_f64_v2f64:
1479 ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
1480 ; AVX2-NEXT: movl %esi, %eax
1481 ; AVX2-NEXT: vmovq %rax, %xmm2
1482 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
1483 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1484 ; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1487 ; AVX512-LABEL: load_f64_v2f64:
1489 ; AVX512-NEXT: movl %esi, %eax
1490 ; AVX512-NEXT: vpbroadcastq %rax, %xmm1
1491 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1492 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
1495 ; X86AVX2-LABEL: load_f64_v2f64:
1497 ; X86AVX2-NEXT: pushl %ebp
1498 ; X86AVX2-NEXT: movl %esp, %ebp
1499 ; X86AVX2-NEXT: andl $-16, %esp
1500 ; X86AVX2-NEXT: subl $32, %esp
1501 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1502 ; X86AVX2-NEXT: andl $1, %eax
1503 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1504 ; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1505 ; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1506 ; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8)
1507 ; X86AVX2-NEXT: vmovaps (%esp), %xmm0
1508 ; X86AVX2-NEXT: movl %ebp, %esp
1509 ; X86AVX2-NEXT: popl %ebp
1510 ; X86AVX2-NEXT: retl
1511 %x = load double, ptr %p
1512 %ins = insertelement <2 x double> %v, double %x, i32 %y
1513 ret <2 x double> %ins
1516 define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind {
1517 ; SSE-LABEL: arg_i8_v32i8:
1519 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1520 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1521 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1522 ; SSE-NEXT: andl $31, %esi
1523 ; SSE-NEXT: movb %dil, -40(%rsp,%rsi)
1524 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1525 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1528 ; AVX1OR2-LABEL: arg_i8_v32i8:
1530 ; AVX1OR2-NEXT: pushq %rbp
1531 ; AVX1OR2-NEXT: movq %rsp, %rbp
1532 ; AVX1OR2-NEXT: andq $-32, %rsp
1533 ; AVX1OR2-NEXT: subq $64, %rsp
1534 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1535 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1536 ; AVX1OR2-NEXT: andl $31, %esi
1537 ; AVX1OR2-NEXT: movb %dil, (%rsp,%rsi)
1538 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1539 ; AVX1OR2-NEXT: movq %rbp, %rsp
1540 ; AVX1OR2-NEXT: popq %rbp
1541 ; AVX1OR2-NEXT: retq
1543 ; AVX512F-LABEL: arg_i8_v32i8:
1545 ; AVX512F-NEXT: pushq %rbp
1546 ; AVX512F-NEXT: movq %rsp, %rbp
1547 ; AVX512F-NEXT: andq $-32, %rsp
1548 ; AVX512F-NEXT: subq $64, %rsp
1549 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1550 ; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
1551 ; AVX512F-NEXT: andl $31, %esi
1552 ; AVX512F-NEXT: movb %dil, (%rsp,%rsi)
1553 ; AVX512F-NEXT: vmovaps (%rsp), %ymm0
1554 ; AVX512F-NEXT: movq %rbp, %rsp
1555 ; AVX512F-NEXT: popq %rbp
1556 ; AVX512F-NEXT: retq
1558 ; AVX512BW-LABEL: arg_i8_v32i8:
1559 ; AVX512BW: # %bb.0:
1560 ; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1
1561 ; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1562 ; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
1563 ; AVX512BW-NEXT: retq
1565 ; X86AVX2-LABEL: arg_i8_v32i8:
1567 ; X86AVX2-NEXT: pushl %ebp
1568 ; X86AVX2-NEXT: movl %esp, %ebp
1569 ; X86AVX2-NEXT: andl $-32, %esp
1570 ; X86AVX2-NEXT: subl $64, %esp
1571 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1572 ; X86AVX2-NEXT: andl $31, %eax
1573 ; X86AVX2-NEXT: movzbl 8(%ebp), %ecx
1574 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1575 ; X86AVX2-NEXT: movb %cl, (%esp,%eax)
1576 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1577 ; X86AVX2-NEXT: movl %ebp, %esp
1578 ; X86AVX2-NEXT: popl %ebp
1579 ; X86AVX2-NEXT: retl
1580 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y
1584 define <16 x i16> @arg_i16_v16i16(<16 x i16> %v, i16 %x, i32 %y) nounwind {
1585 ; SSE-LABEL: arg_i16_v16i16:
1587 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1588 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1589 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1590 ; SSE-NEXT: andl $15, %esi
1591 ; SSE-NEXT: movw %di, -40(%rsp,%rsi,2)
1592 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1593 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1596 ; AVX1OR2-LABEL: arg_i16_v16i16:
1598 ; AVX1OR2-NEXT: pushq %rbp
1599 ; AVX1OR2-NEXT: movq %rsp, %rbp
1600 ; AVX1OR2-NEXT: andq $-32, %rsp
1601 ; AVX1OR2-NEXT: subq $64, %rsp
1602 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1603 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1604 ; AVX1OR2-NEXT: andl $15, %esi
1605 ; AVX1OR2-NEXT: movw %di, (%rsp,%rsi,2)
1606 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1607 ; AVX1OR2-NEXT: movq %rbp, %rsp
1608 ; AVX1OR2-NEXT: popq %rbp
1609 ; AVX1OR2-NEXT: retq
1611 ; AVX512F-LABEL: arg_i16_v16i16:
1613 ; AVX512F-NEXT: pushq %rbp
1614 ; AVX512F-NEXT: movq %rsp, %rbp
1615 ; AVX512F-NEXT: andq $-32, %rsp
1616 ; AVX512F-NEXT: subq $64, %rsp
1617 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1618 ; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
1619 ; AVX512F-NEXT: andl $15, %esi
1620 ; AVX512F-NEXT: movw %di, (%rsp,%rsi,2)
1621 ; AVX512F-NEXT: vmovaps (%rsp), %ymm0
1622 ; AVX512F-NEXT: movq %rbp, %rsp
1623 ; AVX512F-NEXT: popq %rbp
1624 ; AVX512F-NEXT: retq
1626 ; AVX512BW-LABEL: arg_i16_v16i16:
1627 ; AVX512BW: # %bb.0:
1628 ; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1
1629 ; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1630 ; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
1631 ; AVX512BW-NEXT: retq
1633 ; X86AVX2-LABEL: arg_i16_v16i16:
1635 ; X86AVX2-NEXT: pushl %ebp
1636 ; X86AVX2-NEXT: movl %esp, %ebp
1637 ; X86AVX2-NEXT: andl $-32, %esp
1638 ; X86AVX2-NEXT: subl $64, %esp
1639 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1640 ; X86AVX2-NEXT: andl $15, %eax
1641 ; X86AVX2-NEXT: movzwl 8(%ebp), %ecx
1642 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1643 ; X86AVX2-NEXT: movw %cx, (%esp,%eax,2)
1644 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1645 ; X86AVX2-NEXT: movl %ebp, %esp
1646 ; X86AVX2-NEXT: popl %ebp
1647 ; X86AVX2-NEXT: retl
1648 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y
1652 define <8 x i32> @arg_i32_v8i32(<8 x i32> %v, i32 %x, i32 %y) nounwind {
1653 ; SSE-LABEL: arg_i32_v8i32:
1655 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1656 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1657 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1658 ; SSE-NEXT: andl $7, %esi
1659 ; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4)
1660 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1661 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1664 ; AVX1OR2-LABEL: arg_i32_v8i32:
1666 ; AVX1OR2-NEXT: pushq %rbp
1667 ; AVX1OR2-NEXT: movq %rsp, %rbp
1668 ; AVX1OR2-NEXT: andq $-32, %rsp
1669 ; AVX1OR2-NEXT: subq $64, %rsp
1670 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1671 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1672 ; AVX1OR2-NEXT: andl $7, %esi
1673 ; AVX1OR2-NEXT: movl %edi, (%rsp,%rsi,4)
1674 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1675 ; AVX1OR2-NEXT: movq %rbp, %rsp
1676 ; AVX1OR2-NEXT: popq %rbp
1677 ; AVX1OR2-NEXT: retq
1679 ; AVX512-LABEL: arg_i32_v8i32:
1681 ; AVX512-NEXT: vpbroadcastd %esi, %ymm1
1682 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1683 ; AVX512-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
1686 ; X86AVX2-LABEL: arg_i32_v8i32:
1688 ; X86AVX2-NEXT: pushl %ebp
1689 ; X86AVX2-NEXT: movl %esp, %ebp
1690 ; X86AVX2-NEXT: andl $-32, %esp
1691 ; X86AVX2-NEXT: subl $64, %esp
1692 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1693 ; X86AVX2-NEXT: andl $7, %eax
1694 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1695 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1696 ; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4)
1697 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1698 ; X86AVX2-NEXT: movl %ebp, %esp
1699 ; X86AVX2-NEXT: popl %ebp
1700 ; X86AVX2-NEXT: retl
1701 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y
1705 define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
1706 ; SSE-LABEL: arg_i64_v4i64:
1708 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1709 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1710 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1711 ; SSE-NEXT: andl $3, %esi
1712 ; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8)
1713 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1714 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1717 ; AVX1OR2-LABEL: arg_i64_v4i64:
1719 ; AVX1OR2-NEXT: pushq %rbp
1720 ; AVX1OR2-NEXT: movq %rsp, %rbp
1721 ; AVX1OR2-NEXT: andq $-32, %rsp
1722 ; AVX1OR2-NEXT: subq $64, %rsp
1723 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1724 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1725 ; AVX1OR2-NEXT: andl $3, %esi
1726 ; AVX1OR2-NEXT: movq %rdi, (%rsp,%rsi,8)
1727 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1728 ; AVX1OR2-NEXT: movq %rbp, %rsp
1729 ; AVX1OR2-NEXT: popq %rbp
1730 ; AVX1OR2-NEXT: retq
1732 ; AVX512-LABEL: arg_i64_v4i64:
1734 ; AVX512-NEXT: movl %esi, %eax
1735 ; AVX512-NEXT: vpbroadcastq %rax, %ymm1
1736 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1737 ; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
1740 ; X86AVX2-LABEL: arg_i64_v4i64:
1742 ; X86AVX2-NEXT: pushl %ebp
1743 ; X86AVX2-NEXT: movl %esp, %ebp
1744 ; X86AVX2-NEXT: pushl %esi
1745 ; X86AVX2-NEXT: andl $-32, %esp
1746 ; X86AVX2-NEXT: subl $96, %esp
1747 ; X86AVX2-NEXT: movl 8(%ebp), %edx
1748 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1749 ; X86AVX2-NEXT: movl 16(%ebp), %ecx
1750 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1751 ; X86AVX2-NEXT: addl %ecx, %ecx
1752 ; X86AVX2-NEXT: movl %ecx, %esi
1753 ; X86AVX2-NEXT: andl $7, %esi
1754 ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
1755 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1756 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1757 ; X86AVX2-NEXT: incl %ecx
1758 ; X86AVX2-NEXT: andl $7, %ecx
1759 ; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4)
1760 ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
1761 ; X86AVX2-NEXT: leal -4(%ebp), %esp
1762 ; X86AVX2-NEXT: popl %esi
1763 ; X86AVX2-NEXT: popl %ebp
1764 ; X86AVX2-NEXT: retl
1765 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y
1769 define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind {
1770 ; SSE-LABEL: arg_f32_v8f32:
1772 ; SSE-NEXT: # kill: def $edi killed $edi def $rdi
1773 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1774 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1775 ; SSE-NEXT: andl $7, %edi
1776 ; SSE-NEXT: movss %xmm2, -40(%rsp,%rdi,4)
1777 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1778 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1781 ; AVX1-LABEL: arg_f32_v8f32:
1783 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1784 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1785 ; AVX1-NEXT: vmovd %edi, %xmm2
1786 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1787 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1788 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1789 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1790 ; AVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
1793 ; AVX2-LABEL: arg_f32_v8f32:
1795 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
1796 ; AVX2-NEXT: vmovd %edi, %xmm2
1797 ; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
1798 ; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
1799 ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
1802 ; AVX512-LABEL: arg_f32_v8f32:
1804 ; AVX512-NEXT: vpbroadcastd %edi, %ymm2
1805 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
1806 ; AVX512-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
1809 ; X86AVX2-LABEL: arg_f32_v8f32:
1811 ; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1
1812 ; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
1813 ; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2
1814 ; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
1815 ; X86AVX2-NEXT: retl
1816 %ins = insertelement <8 x float> %v, float %x, i32 %y
1817 ret <8 x float> %ins
1820 define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind {
1821 ; SSE-LABEL: arg_f64_v4f64:
1823 ; SSE-NEXT: # kill: def $edi killed $edi def $rdi
1824 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1825 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1826 ; SSE-NEXT: andl $3, %edi
1827 ; SSE-NEXT: movsd %xmm2, -40(%rsp,%rdi,8)
1828 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1829 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1832 ; AVX1-LABEL: arg_f64_v4f64:
1834 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1835 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1836 ; AVX1-NEXT: movl %edi, %eax
1837 ; AVX1-NEXT: vmovq %rax, %xmm2
1838 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1839 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1840 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1841 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1842 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
1845 ; AVX2-LABEL: arg_f64_v4f64:
1847 ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
1848 ; AVX2-NEXT: movl %edi, %eax
1849 ; AVX2-NEXT: vmovq %rax, %xmm2
1850 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
1851 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
1852 ; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
1855 ; AVX512-LABEL: arg_f64_v4f64:
1857 ; AVX512-NEXT: movl %edi, %eax
1858 ; AVX512-NEXT: vpbroadcastq %rax, %ymm2
1859 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
1860 ; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
1863 ; X86AVX2-LABEL: arg_f64_v4f64:
1865 ; X86AVX2-NEXT: pushl %ebp
1866 ; X86AVX2-NEXT: movl %esp, %ebp
1867 ; X86AVX2-NEXT: andl $-32, %esp
1868 ; X86AVX2-NEXT: subl $64, %esp
1869 ; X86AVX2-NEXT: movl 16(%ebp), %eax
1870 ; X86AVX2-NEXT: andl $3, %eax
1871 ; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1872 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1873 ; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8)
1874 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1875 ; X86AVX2-NEXT: movl %ebp, %esp
1876 ; X86AVX2-NEXT: popl %ebp
1877 ; X86AVX2-NEXT: retl
1878 %ins = insertelement <4 x double> %v, double %x, i32 %y
1879 ret <4 x double> %ins
1882 define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
1883 ; SSE-LABEL: load_i8_v32i8:
1885 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1886 ; SSE-NEXT: movzbl (%rdi), %eax
1887 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1888 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1889 ; SSE-NEXT: andl $31, %esi
1890 ; SSE-NEXT: movb %al, -40(%rsp,%rsi)
1891 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1892 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1895 ; AVX1OR2-LABEL: load_i8_v32i8:
1897 ; AVX1OR2-NEXT: pushq %rbp
1898 ; AVX1OR2-NEXT: movq %rsp, %rbp
1899 ; AVX1OR2-NEXT: andq $-32, %rsp
1900 ; AVX1OR2-NEXT: subq $64, %rsp
1901 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1902 ; AVX1OR2-NEXT: movzbl (%rdi), %eax
1903 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1904 ; AVX1OR2-NEXT: andl $31, %esi
1905 ; AVX1OR2-NEXT: movb %al, (%rsp,%rsi)
1906 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1907 ; AVX1OR2-NEXT: movq %rbp, %rsp
1908 ; AVX1OR2-NEXT: popq %rbp
1909 ; AVX1OR2-NEXT: retq
1911 ; AVX512F-LABEL: load_i8_v32i8:
1913 ; AVX512F-NEXT: pushq %rbp
1914 ; AVX512F-NEXT: movq %rsp, %rbp
1915 ; AVX512F-NEXT: andq $-32, %rsp
1916 ; AVX512F-NEXT: subq $64, %rsp
1917 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1918 ; AVX512F-NEXT: movzbl (%rdi), %eax
1919 ; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
1920 ; AVX512F-NEXT: andl $31, %esi
1921 ; AVX512F-NEXT: movb %al, (%rsp,%rsi)
1922 ; AVX512F-NEXT: vmovaps (%rsp), %ymm0
1923 ; AVX512F-NEXT: movq %rbp, %rsp
1924 ; AVX512F-NEXT: popq %rbp
1925 ; AVX512F-NEXT: retq
1927 ; AVX512BW-LABEL: load_i8_v32i8:
1928 ; AVX512BW: # %bb.0:
1929 ; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1
1930 ; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1931 ; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
1932 ; AVX512BW-NEXT: retq
1934 ; X86AVX2-LABEL: load_i8_v32i8:
1936 ; X86AVX2-NEXT: pushl %ebp
1937 ; X86AVX2-NEXT: movl %esp, %ebp
1938 ; X86AVX2-NEXT: andl $-32, %esp
1939 ; X86AVX2-NEXT: subl $64, %esp
1940 ; X86AVX2-NEXT: movl 12(%ebp), %eax
1941 ; X86AVX2-NEXT: andl $31, %eax
1942 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
1943 ; X86AVX2-NEXT: movzbl (%ecx), %ecx
1944 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1945 ; X86AVX2-NEXT: movb %cl, (%esp,%eax)
1946 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
1947 ; X86AVX2-NEXT: movl %ebp, %esp
1948 ; X86AVX2-NEXT: popl %ebp
1949 ; X86AVX2-NEXT: retl
1950 %x = load i8, ptr %p
1951 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y
1955 define <16 x i16> @load_i16_v16i16(<16 x i16> %v, ptr %p, i32 %y) nounwind {
1956 ; SSE-LABEL: load_i16_v16i16:
1958 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
1959 ; SSE-NEXT: movzwl (%rdi), %eax
1960 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1961 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1962 ; SSE-NEXT: andl $15, %esi
1963 ; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2)
1964 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
1965 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
1968 ; AVX1OR2-LABEL: load_i16_v16i16:
1970 ; AVX1OR2-NEXT: pushq %rbp
1971 ; AVX1OR2-NEXT: movq %rsp, %rbp
1972 ; AVX1OR2-NEXT: andq $-32, %rsp
1973 ; AVX1OR2-NEXT: subq $64, %rsp
1974 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
1975 ; AVX1OR2-NEXT: movzwl (%rdi), %eax
1976 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
1977 ; AVX1OR2-NEXT: andl $15, %esi
1978 ; AVX1OR2-NEXT: movw %ax, (%rsp,%rsi,2)
1979 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
1980 ; AVX1OR2-NEXT: movq %rbp, %rsp
1981 ; AVX1OR2-NEXT: popq %rbp
1982 ; AVX1OR2-NEXT: retq
1984 ; AVX512F-LABEL: load_i16_v16i16:
1986 ; AVX512F-NEXT: pushq %rbp
1987 ; AVX512F-NEXT: movq %rsp, %rbp
1988 ; AVX512F-NEXT: andq $-32, %rsp
1989 ; AVX512F-NEXT: subq $64, %rsp
1990 ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
1991 ; AVX512F-NEXT: movzwl (%rdi), %eax
1992 ; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
1993 ; AVX512F-NEXT: andl $15, %esi
1994 ; AVX512F-NEXT: movw %ax, (%rsp,%rsi,2)
1995 ; AVX512F-NEXT: vmovaps (%rsp), %ymm0
1996 ; AVX512F-NEXT: movq %rbp, %rsp
1997 ; AVX512F-NEXT: popq %rbp
1998 ; AVX512F-NEXT: retq
2000 ; AVX512BW-LABEL: load_i16_v16i16:
2001 ; AVX512BW: # %bb.0:
2002 ; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1
2003 ; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2004 ; AVX512BW-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
2005 ; AVX512BW-NEXT: retq
2007 ; X86AVX2-LABEL: load_i16_v16i16:
2009 ; X86AVX2-NEXT: pushl %ebp
2010 ; X86AVX2-NEXT: movl %esp, %ebp
2011 ; X86AVX2-NEXT: andl $-32, %esp
2012 ; X86AVX2-NEXT: subl $64, %esp
2013 ; X86AVX2-NEXT: movl 12(%ebp), %eax
2014 ; X86AVX2-NEXT: andl $15, %eax
2015 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
2016 ; X86AVX2-NEXT: movzwl (%ecx), %ecx
2017 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
2018 ; X86AVX2-NEXT: movw %cx, (%esp,%eax,2)
2019 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
2020 ; X86AVX2-NEXT: movl %ebp, %esp
2021 ; X86AVX2-NEXT: popl %ebp
2022 ; X86AVX2-NEXT: retl
2023 %x = load i16, ptr %p
2024 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y
2028 define <8 x i32> @load_i32_v8i32(<8 x i32> %v, ptr %p, i32 %y) nounwind {
2029 ; SSE-LABEL: load_i32_v8i32:
2031 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
2032 ; SSE-NEXT: movl (%rdi), %eax
2033 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2034 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2035 ; SSE-NEXT: andl $7, %esi
2036 ; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4)
2037 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
2038 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
2041 ; AVX1OR2-LABEL: load_i32_v8i32:
2043 ; AVX1OR2-NEXT: pushq %rbp
2044 ; AVX1OR2-NEXT: movq %rsp, %rbp
2045 ; AVX1OR2-NEXT: andq $-32, %rsp
2046 ; AVX1OR2-NEXT: subq $64, %rsp
2047 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
2048 ; AVX1OR2-NEXT: movl (%rdi), %eax
2049 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
2050 ; AVX1OR2-NEXT: andl $7, %esi
2051 ; AVX1OR2-NEXT: movl %eax, (%rsp,%rsi,4)
2052 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
2053 ; AVX1OR2-NEXT: movq %rbp, %rsp
2054 ; AVX1OR2-NEXT: popq %rbp
2055 ; AVX1OR2-NEXT: retq
2057 ; AVX512-LABEL: load_i32_v8i32:
2059 ; AVX512-NEXT: vpbroadcastd %esi, %ymm1
2060 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2061 ; AVX512-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
2064 ; X86AVX2-LABEL: load_i32_v8i32:
2066 ; X86AVX2-NEXT: pushl %ebp
2067 ; X86AVX2-NEXT: movl %esp, %ebp
2068 ; X86AVX2-NEXT: andl $-32, %esp
2069 ; X86AVX2-NEXT: subl $64, %esp
2070 ; X86AVX2-NEXT: movl 12(%ebp), %eax
2071 ; X86AVX2-NEXT: andl $7, %eax
2072 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
2073 ; X86AVX2-NEXT: movl (%ecx), %ecx
2074 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
2075 ; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4)
2076 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
2077 ; X86AVX2-NEXT: movl %ebp, %esp
2078 ; X86AVX2-NEXT: popl %ebp
2079 ; X86AVX2-NEXT: retl
2080 %x = load i32, ptr %p
2081 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y
2085 define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
2086 ; SSE-LABEL: load_i64_v4i64:
2088 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
2089 ; SSE-NEXT: movq (%rdi), %rax
2090 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2091 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2092 ; SSE-NEXT: andl $3, %esi
2093 ; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8)
2094 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
2095 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
2098 ; AVX1OR2-LABEL: load_i64_v4i64:
2100 ; AVX1OR2-NEXT: pushq %rbp
2101 ; AVX1OR2-NEXT: movq %rsp, %rbp
2102 ; AVX1OR2-NEXT: andq $-32, %rsp
2103 ; AVX1OR2-NEXT: subq $64, %rsp
2104 ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
2105 ; AVX1OR2-NEXT: movq (%rdi), %rax
2106 ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
2107 ; AVX1OR2-NEXT: andl $3, %esi
2108 ; AVX1OR2-NEXT: movq %rax, (%rsp,%rsi,8)
2109 ; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0
2110 ; AVX1OR2-NEXT: movq %rbp, %rsp
2111 ; AVX1OR2-NEXT: popq %rbp
2112 ; AVX1OR2-NEXT: retq
2114 ; AVX512-LABEL: load_i64_v4i64:
2116 ; AVX512-NEXT: movl %esi, %eax
2117 ; AVX512-NEXT: vpbroadcastq %rax, %ymm1
2118 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2119 ; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
2122 ; X86AVX2-LABEL: load_i64_v4i64:
2124 ; X86AVX2-NEXT: pushl %ebp
2125 ; X86AVX2-NEXT: movl %esp, %ebp
2126 ; X86AVX2-NEXT: pushl %esi
2127 ; X86AVX2-NEXT: andl $-32, %esp
2128 ; X86AVX2-NEXT: subl $96, %esp
2129 ; X86AVX2-NEXT: movl 12(%ebp), %eax
2130 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
2131 ; X86AVX2-NEXT: movl (%ecx), %edx
2132 ; X86AVX2-NEXT: movl 4(%ecx), %ecx
2133 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
2134 ; X86AVX2-NEXT: addl %eax, %eax
2135 ; X86AVX2-NEXT: movl %eax, %esi
2136 ; X86AVX2-NEXT: andl $7, %esi
2137 ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
2138 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
2139 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
2140 ; X86AVX2-NEXT: incl %eax
2141 ; X86AVX2-NEXT: andl $7, %eax
2142 ; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
2143 ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
2144 ; X86AVX2-NEXT: leal -4(%ebp), %esp
2145 ; X86AVX2-NEXT: popl %esi
2146 ; X86AVX2-NEXT: popl %ebp
2147 ; X86AVX2-NEXT: retl
2148 %x = load i64, ptr %p
2149 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y
2153 define <8 x float> @load_f32_v8f32(<8 x float> %v, ptr %p, i32 %y) nounwind {
2154 ; SSE-LABEL: load_f32_v8f32:
2156 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
2157 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2158 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2159 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2160 ; SSE-NEXT: andl $7, %esi
2161 ; SSE-NEXT: movss %xmm2, -40(%rsp,%rsi,4)
2162 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
2163 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
2166 ; AVX1-LABEL: load_f32_v8f32:
2168 ; AVX1-NEXT: vmovd %esi, %xmm1
2169 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2170 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
2171 ; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
2172 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2173 ; AVX1-NEXT: vbroadcastss (%rdi), %ymm2
2174 ; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
2177 ; AVX2-LABEL: load_f32_v8f32:
2179 ; AVX2-NEXT: vbroadcastss (%rdi), %ymm1
2180 ; AVX2-NEXT: vmovd %esi, %xmm2
2181 ; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
2182 ; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
2183 ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2186 ; AVX512-LABEL: load_f32_v8f32:
2188 ; AVX512-NEXT: vpbroadcastd %esi, %ymm1
2189 ; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2190 ; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
2193 ; X86AVX2-LABEL: load_f32_v8f32:
2195 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
2196 ; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1
2197 ; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
2198 ; X86AVX2-NEXT: vbroadcastss (%eax), %ymm2
2199 ; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
2200 ; X86AVX2-NEXT: retl
2201 %x = load float, ptr %p
2202 %ins = insertelement <8 x float> %v, float %x, i32 %y
2203 ret <8 x float> %ins
2206 define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind {
2207 ; SSE-LABEL: load_f64_v4f64:
2209 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
2210 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
2211 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2212 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2213 ; SSE-NEXT: andl $3, %esi
2214 ; SSE-NEXT: movsd %xmm2, -40(%rsp,%rsi,8)
2215 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
2216 ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
2219 ; AVX1-LABEL: load_f64_v4f64:
2221 ; AVX1-NEXT: movl %esi, %eax
2222 ; AVX1-NEXT: vmovq %rax, %xmm1
2223 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
2224 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
2225 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
2226 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2227 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2
2228 ; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
2231 ; AVX2-LABEL: load_f64_v4f64:
2233 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2234 ; AVX2-NEXT: movl %esi, %eax
2235 ; AVX2-NEXT: vmovq %rax, %xmm2
2236 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
2237 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
2238 ; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2241 ; AVX512-LABEL: load_f64_v4f64:
2243 ; AVX512-NEXT: movl %esi, %eax
2244 ; AVX512-NEXT: vpbroadcastq %rax, %ymm1
2245 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2246 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
2249 ; X86AVX2-LABEL: load_f64_v4f64:
2251 ; X86AVX2-NEXT: pushl %ebp
2252 ; X86AVX2-NEXT: movl %esp, %ebp
2253 ; X86AVX2-NEXT: andl $-32, %esp
2254 ; X86AVX2-NEXT: subl $64, %esp
2255 ; X86AVX2-NEXT: movl 12(%ebp), %eax
2256 ; X86AVX2-NEXT: andl $3, %eax
2257 ; X86AVX2-NEXT: movl 8(%ebp), %ecx
2258 ; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2259 ; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
2260 ; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8)
2261 ; X86AVX2-NEXT: vmovaps (%esp), %ymm0
2262 ; X86AVX2-NEXT: movl %ebp, %esp
2263 ; X86AVX2-NEXT: popl %ebp
2264 ; X86AVX2-NEXT: retl
2265 %x = load double, ptr %p
2266 %ins = insertelement <4 x double> %v, double %x, i32 %y
2267 ret <4 x double> %ins
2270 ; Don't die trying to insert to an invalid index.
2272 define i32 @PR44139(ptr %p) {
2273 ; SSE-LABEL: PR44139:
2275 ; SSE-NEXT: movl (%rdi), %eax
2276 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
2277 ; SSE-NEXT: movdqa %xmm0, 96(%rdi)
2278 ; SSE-NEXT: movdqa %xmm0, 112(%rdi)
2279 ; SSE-NEXT: movdqa %xmm0, 64(%rdi)
2280 ; SSE-NEXT: movdqa %xmm0, 80(%rdi)
2281 ; SSE-NEXT: movdqa %xmm0, 32(%rdi)
2282 ; SSE-NEXT: movdqa %xmm0, 48(%rdi)
2283 ; SSE-NEXT: movdqa %xmm0, (%rdi)
2284 ; SSE-NEXT: movdqa %xmm0, 16(%rdi)
2285 ; SSE-NEXT: leal 2147483647(%rax), %ecx
2286 ; SSE-NEXT: testl %eax, %eax
2287 ; SSE-NEXT: cmovnsl %eax, %ecx
2288 ; SSE-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
2289 ; SSE-NEXT: addl %eax, %ecx
2290 ; SSE-NEXT: # kill: def $eax killed $eax killed $rax
2291 ; SSE-NEXT: xorl %edx, %edx
2292 ; SSE-NEXT: divl %ecx
2295 ; AVX1-LABEL: PR44139:
2297 ; AVX1-NEXT: movq (%rdi), %rax
2298 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
2299 ; AVX1-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
2300 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2301 ; AVX1-NEXT: vmovaps %ymm0, 64(%rdi)
2302 ; AVX1-NEXT: vmovaps %ymm0, 96(%rdi)
2303 ; AVX1-NEXT: vmovaps %ymm0, 32(%rdi)
2304 ; AVX1-NEXT: vmovaps %ymm1, (%rdi)
2305 ; AVX1-NEXT: leal 2147483647(%rax), %ecx
2306 ; AVX1-NEXT: testl %eax, %eax
2307 ; AVX1-NEXT: cmovnsl %eax, %ecx
2308 ; AVX1-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
2309 ; AVX1-NEXT: addl %eax, %ecx
2310 ; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
2311 ; AVX1-NEXT: xorl %edx, %edx
2312 ; AVX1-NEXT: divl %ecx
2313 ; AVX1-NEXT: vzeroupper
2316 ; AVX2-LABEL: PR44139:
2318 ; AVX2-NEXT: movq (%rdi), %rax
2319 ; AVX2-NEXT: vpbroadcastq (%rdi), %ymm0
2320 ; AVX2-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
2321 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2322 ; AVX2-NEXT: vmovdqa %ymm0, 64(%rdi)
2323 ; AVX2-NEXT: vmovdqa %ymm0, 96(%rdi)
2324 ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdi)
2325 ; AVX2-NEXT: vmovdqa %ymm1, (%rdi)
2326 ; AVX2-NEXT: leal 2147483647(%rax), %ecx
2327 ; AVX2-NEXT: testl %eax, %eax
2328 ; AVX2-NEXT: cmovnsl %eax, %ecx
2329 ; AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
2330 ; AVX2-NEXT: addl %eax, %ecx
2331 ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
2332 ; AVX2-NEXT: xorl %edx, %edx
2333 ; AVX2-NEXT: divl %ecx
2334 ; AVX2-NEXT: vzeroupper
2337 ; AVX512-LABEL: PR44139:
2339 ; AVX512-NEXT: movq (%rdi), %rax
2340 ; AVX512-NEXT: vpbroadcastq (%rdi), %zmm0
2341 ; AVX512-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
2342 ; AVX512-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
2343 ; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi)
2344 ; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi)
2345 ; AVX512-NEXT: leal 2147483647(%rax), %ecx
2346 ; AVX512-NEXT: testl %eax, %eax
2347 ; AVX512-NEXT: cmovnsl %eax, %ecx
2348 ; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
2349 ; AVX512-NEXT: addl %eax, %ecx
2350 ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
2351 ; AVX512-NEXT: xorl %edx, %edx
2352 ; AVX512-NEXT: divl %ecx
2353 ; AVX512-NEXT: vzeroupper
2356 ; X86AVX2-LABEL: PR44139:
2358 ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2359 ; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0
2360 ; X86AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0]
2361 ; X86AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2362 ; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx)
2363 ; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx)
2364 ; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx)
2365 ; X86AVX2-NEXT: movl (%ecx), %eax
2366 ; X86AVX2-NEXT: vmovaps %ymm1, (%ecx)
2367 ; X86AVX2-NEXT: leal 2147483647(%eax), %ecx
2368 ; X86AVX2-NEXT: testl %eax, %eax
2369 ; X86AVX2-NEXT: cmovnsl %eax, %ecx
2370 ; X86AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
2371 ; X86AVX2-NEXT: addl %eax, %ecx
2372 ; X86AVX2-NEXT: xorl %edx, %edx
2373 ; X86AVX2-NEXT: divl %ecx
2374 ; X86AVX2-NEXT: vzeroupper
2375 ; X86AVX2-NEXT: retl
2376 %L = load <16 x i64>, ptr %p
2377 %E1 = extractelement <16 x i64> %L, i64 0
2378 %tempvector = insertelement <16 x i64> undef, i64 %E1, i32 0
2379 %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
2380 %C3 = icmp sgt i64 9223372036854775807, -9223372036854775808
2381 %t0 = trunc <16 x i64> %vector to <16 x i32>
2382 %I4 = insertelement <16 x i64> %vector, i64 %E1, i1 %C3
2383 store <16 x i64> %I4, ptr %p
2384 %elt = extractelement <16 x i32> %t0, i32 0
2385 %B = srem i32 %elt, -2147483648
2386 %B9 = udiv i32 %elt, %B