1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE2
4 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE4
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE4
6 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
8 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512F
11 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512F
13 define <16 x i8> @elt0_v16i8(i8 %x) {
14 ; X86-SSE2-LABEL: elt0_v16i8:
16 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
17 ; X86-SSE2-NEXT: movd %eax, %xmm0
18 ; X86-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
21 ; X64-SSE2-LABEL: elt0_v16i8:
23 ; X64-SSE2-NEXT: movzbl %dil, %eax
24 ; X64-SSE2-NEXT: movd %eax, %xmm0
25 ; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28 ; X86-SSE4-LABEL: elt0_v16i8:
30 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
31 ; X86-SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
34 ; X64-SSE4-LABEL: elt0_v16i8:
36 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
37 ; X64-SSE4-NEXT: pinsrb $0, %edi, %xmm0
40 ; X86-AVX-LABEL: elt0_v16i8:
42 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
43 ; X86-AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
46 ; X64-AVX-LABEL: elt0_v16i8:
48 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
49 ; X64-AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
51 %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0
55 define <8 x i16> @elt5_v8i16(i16 %x) {
56 ; X86-SSE2-LABEL: elt5_v8i16:
58 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7]
59 ; X86-SSE2-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0
62 ; X64-SSE2-LABEL: elt5_v8i16:
64 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7]
65 ; X64-SSE2-NEXT: pinsrw $5, %edi, %xmm0
68 ; X86-SSE4-LABEL: elt5_v8i16:
70 ; X86-SSE4-NEXT: pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
71 ; X86-SSE4-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0
74 ; X64-SSE4-LABEL: elt5_v8i16:
76 ; X64-SSE4-NEXT: pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
77 ; X64-SSE4-NEXT: pinsrw $5, %edi, %xmm0
80 ; X86-AVX-LABEL: elt5_v8i16:
82 ; X86-AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
83 ; X86-AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
86 ; X64-AVX-LABEL: elt5_v8i16:
88 ; X64-AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
89 ; X64-AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
91 %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5
95 define <4 x i32> @elt3_v4i32(i32 %x) {
96 ; X86-SSE2-LABEL: elt3_v4i32:
98 ; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
99 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,u]
100 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
101 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
102 ; X86-SSE2-NEXT: retl
104 ; X64-SSE2-LABEL: elt3_v4i32:
106 ; X64-SSE2-NEXT: movd %edi, %xmm1
107 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,u]
108 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
109 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
110 ; X64-SSE2-NEXT: retq
112 ; X86-SSE4-LABEL: elt3_v4i32:
114 ; X86-SSE4-NEXT: pmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
115 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0
116 ; X86-SSE4-NEXT: retl
118 ; X64-SSE4-LABEL: elt3_v4i32:
120 ; X64-SSE4-NEXT: pmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
121 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm0
122 ; X64-SSE4-NEXT: retq
124 ; X86-AVX-LABEL: elt3_v4i32:
126 ; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
127 ; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
130 ; X64-AVX-LABEL: elt3_v4i32:
132 ; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
133 ; X64-AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
135 %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3
139 define <2 x i64> @elt0_v2i64(i64 %x) {
140 ; X86-SSE-LABEL: elt0_v2i64:
142 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
143 ; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
146 ; X64-SSE2-LABEL: elt0_v2i64:
148 ; X64-SSE2-NEXT: movq %rdi, %xmm1
149 ; X64-SSE2-NEXT: movapd {{.*#+}} xmm0 = [u,1]
150 ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
151 ; X64-SSE2-NEXT: retq
153 ; X64-SSE4-LABEL: elt0_v2i64:
155 ; X64-SSE4-NEXT: pmovsxbq {{.*#+}} xmm0 = [1,1]
156 ; X64-SSE4-NEXT: pinsrq $0, %rdi, %xmm0
157 ; X64-SSE4-NEXT: retq
159 ; X86-AVX-LABEL: elt0_v2i64:
161 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
162 ; X86-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
165 ; X64-AVX-LABEL: elt0_v2i64:
167 ; X64-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,1]
168 ; X64-AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
170 %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0
174 define <4 x float> @elt1_v4f32(float %x) {
175 ; X86-SSE2-LABEL: elt1_v4f32:
177 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
178 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
179 ; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
180 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
181 ; X86-SSE2-NEXT: retl
183 ; X64-SSE2-LABEL: elt1_v4f32:
185 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
186 ; X64-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
187 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
188 ; X64-SSE2-NEXT: retq
190 ; X86-SSE4-LABEL: elt1_v4f32:
192 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0]
193 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
194 ; X86-SSE4-NEXT: retl
196 ; X64-SSE4-LABEL: elt1_v4f32:
198 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
199 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
200 ; X64-SSE4-NEXT: movaps %xmm1, %xmm0
201 ; X64-SSE4-NEXT: retq
203 ; X86-AVX-LABEL: elt1_v4f32:
205 ; X86-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0]
206 ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
209 ; X64-AVX-LABEL: elt1_v4f32:
211 ; X64-AVX-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
212 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
214 %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
218 define <2 x double> @elt1_v2f64(double %x) {
219 ; X86-SSE-LABEL: elt1_v2f64:
221 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u]
222 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
225 ; X64-SSE-LABEL: elt1_v2f64:
227 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u]
228 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
229 ; X64-SSE-NEXT: movaps %xmm1, %xmm0
232 ; X86-AVX-LABEL: elt1_v2f64:
234 ; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4.2E+1,4.2E+1]
235 ; X86-AVX-NEXT: # xmm0 = mem[0,0]
236 ; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
239 ; X64-AVX-LABEL: elt1_v2f64:
241 ; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
242 ; X64-AVX-NEXT: # xmm1 = mem[0,0]
243 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
245 %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
246 ret <2 x double> %ins
249 define <8 x i32> @elt7_v8i32(i32 %x) {
250 ; X86-SSE2-LABEL: elt7_v8i32:
252 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
253 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,5,6,u]
254 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
255 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
256 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
257 ; X86-SSE2-NEXT: retl
259 ; X64-SSE2-LABEL: elt7_v8i32:
261 ; X64-SSE2-NEXT: movd %edi, %xmm0
262 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,5,6,u]
263 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
264 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
265 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
266 ; X64-SSE2-NEXT: retq
268 ; X86-SSE4-LABEL: elt7_v8i32:
270 ; X86-SSE4-NEXT: pmovsxbd {{.*#+}} xmm1 = [4,5,6,0]
271 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1
272 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
273 ; X86-SSE4-NEXT: retl
275 ; X64-SSE4-LABEL: elt7_v8i32:
277 ; X64-SSE4-NEXT: pmovsxbd {{.*#+}} xmm1 = [4,5,6,0]
278 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm1
279 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
280 ; X64-SSE4-NEXT: retq
282 ; X86-AVX-LABEL: elt7_v8i32:
284 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
285 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
288 ; X64-AVX1-LABEL: elt7_v8i32:
290 ; X64-AVX1-NEXT: vmovd %edi, %xmm0
291 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
292 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
293 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
294 ; X64-AVX1-NEXT: retq
296 ; X64-AVX2-LABEL: elt7_v8i32:
298 ; X64-AVX2-NEXT: vmovd %edi, %xmm0
299 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
300 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
301 ; X64-AVX2-NEXT: retq
303 ; X64-AVX512F-LABEL: elt7_v8i32:
304 ; X64-AVX512F: # %bb.0:
305 ; X64-AVX512F-NEXT: vmovd %edi, %xmm0
306 ; X64-AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
307 ; X64-AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
308 ; X64-AVX512F-NEXT: retq
309 %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7
313 define <8 x float> @elt6_v8f32(float %x) {
314 ; X86-SSE2-LABEL: elt6_v8f32:
316 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
317 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
318 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
319 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
320 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
321 ; X86-SSE2-NEXT: retl
323 ; X64-SSE2-LABEL: elt6_v8f32:
325 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
326 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
327 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
328 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
329 ; X64-SSE2-NEXT: retq
331 ; X86-SSE4-LABEL: elt6_v8f32:
333 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
334 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
335 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
336 ; X86-SSE4-NEXT: retl
338 ; X64-SSE4-LABEL: elt6_v8f32:
340 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
341 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
342 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
343 ; X64-SSE4-NEXT: retq
345 ; X86-AVX-LABEL: elt6_v8f32:
347 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
348 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
351 ; X64-AVX1-LABEL: elt6_v8f32:
353 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
354 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
355 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
356 ; X64-AVX1-NEXT: retq
358 ; X64-AVX2-LABEL: elt6_v8f32:
360 ; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0
361 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
362 ; X64-AVX2-NEXT: retq
364 ; X64-AVX512F-LABEL: elt6_v8f32:
365 ; X64-AVX512F: # %bb.0:
366 ; X64-AVX512F-NEXT: vbroadcastss %xmm0, %ymm0
367 ; X64-AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
368 ; X64-AVX512F-NEXT: retq
369 %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
373 define <8 x i64> @elt5_v8i64(i64 %x) {
374 ; X86-SSE-LABEL: elt5_v8i64:
376 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
377 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = [4,0,0,0]
378 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
379 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0]
380 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0]
381 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0]
384 ; X64-SSE2-LABEL: elt5_v8i64:
386 ; X64-SSE2-NEXT: movq %rdi, %xmm0
387 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,u]
388 ; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
389 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1]
390 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
391 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7]
392 ; X64-SSE2-NEXT: retq
394 ; X64-SSE4-LABEL: elt5_v8i64:
396 ; X64-SSE4-NEXT: pmovsxbq {{.*#+}} xmm2 = [4,4]
397 ; X64-SSE4-NEXT: pinsrq $1, %rdi, %xmm2
398 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1]
399 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3]
400 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7]
401 ; X64-SSE4-NEXT: retq
403 ; X86-AVX1-LABEL: elt5_v8i64:
405 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
406 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0]
407 ; X86-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
408 ; X86-AVX1-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
409 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
410 ; X86-AVX1-NEXT: retl
412 ; X64-AVX1-LABEL: elt5_v8i64:
414 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} ymm0 = [4,u,6,7]
415 ; X64-AVX1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
416 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
417 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
418 ; X64-AVX1-NEXT: retq
420 ; X86-AVX2-LABEL: elt5_v8i64:
422 ; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
423 ; X86-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0]
424 ; X86-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
425 ; X86-AVX2-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
426 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
427 ; X86-AVX2-NEXT: retl
429 ; X64-AVX2-LABEL: elt5_v8i64:
431 ; X64-AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,0,6,7]
432 ; X64-AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
433 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
434 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
435 ; X64-AVX2-NEXT: retq
437 ; X86-AVX512F-LABEL: elt5_v8i64:
438 ; X86-AVX512F: # %bb.0:
439 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
440 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
441 ; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [4,0,0,0]
442 ; X86-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
443 ; X86-AVX512F-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
444 ; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
445 ; X86-AVX512F-NEXT: retl
447 ; X64-AVX512F-LABEL: elt5_v8i64:
448 ; X64-AVX512F: # %bb.0:
449 ; X64-AVX512F-NEXT: vmovq %rdi, %xmm1
450 ; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15]
451 ; X64-AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
452 ; X64-AVX512F-NEXT: retq
453 %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
457 define <8 x double> @elt1_v8f64(double %x) {
458 ; X86-SSE-LABEL: elt1_v8f64:
460 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u]
461 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
462 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
463 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
464 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
467 ; X64-SSE-LABEL: elt1_v8f64:
469 ; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [4.2E+1,u]
470 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
471 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
472 ; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
473 ; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
474 ; X64-SSE-NEXT: movaps %xmm4, %xmm0
477 ; X86-AVX1-LABEL: elt1_v8f64:
479 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
480 ; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
481 ; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
482 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
483 ; X86-AVX1-NEXT: retl
485 ; X64-AVX1-LABEL: elt1_v8f64:
487 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
488 ; X64-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
489 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
490 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
491 ; X64-AVX1-NEXT: retq
493 ; X86-AVX2-LABEL: elt1_v8f64:
495 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
496 ; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
497 ; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
498 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
499 ; X86-AVX2-NEXT: retl
501 ; X64-AVX2-LABEL: elt1_v8f64:
503 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
504 ; X64-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
505 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
506 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
507 ; X64-AVX2-NEXT: retq
509 ; X86-AVX512F-LABEL: elt1_v8f64:
510 ; X86-AVX512F: # %bb.0:
511 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
512 ; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
513 ; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
514 ; X86-AVX512F-NEXT: retl
516 ; X64-AVX512F-LABEL: elt1_v8f64:
517 ; X64-AVX512F: # %bb.0:
518 ; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
519 ; X64-AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
520 ; X64-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
521 ; X64-AVX512F-NEXT: retq
522 %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1
523 ret <8 x double> %ins