1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE2
4 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE4
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE4
6 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
8 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512F
11 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512F
13 define <16 x i8> @elt0_v16i8(i8 %x) {
14 ; X86-SSE2-LABEL: elt0_v16i8:
16 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
17 ; X86-SSE2-NEXT: movd %eax, %xmm0
18 ; X86-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
21 ; X64-SSE2-LABEL: elt0_v16i8:
23 ; X64-SSE2-NEXT: movzbl %dil, %eax
24 ; X64-SSE2-NEXT: movd %eax, %xmm0
25 ; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28 ; X86-SSE4-LABEL: elt0_v16i8:
30 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
31 ; X86-SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
34 ; X64-SSE4-LABEL: elt0_v16i8:
36 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
37 ; X64-SSE4-NEXT: pinsrb $0, %edi, %xmm0
40 ; X86-AVX-LABEL: elt0_v16i8:
42 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
43 ; X86-AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
46 ; X64-AVX-LABEL: elt0_v16i8:
48 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
49 ; X64-AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
51 %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0
55 define <8 x i16> @elt5_v8i16(i16 %x) {
56 ; X86-SSE-LABEL: elt5_v8i16:
58 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
59 ; X86-SSE-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0
62 ; X64-SSE-LABEL: elt5_v8i16:
64 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
65 ; X64-SSE-NEXT: pinsrw $5, %edi, %xmm0
68 ; X86-AVX-LABEL: elt5_v8i16:
70 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
71 ; X86-AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
74 ; X64-AVX-LABEL: elt5_v8i16:
76 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
77 ; X64-AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
79 %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5
83 define <4 x i32> @elt3_v4i32(i32 %x) {
84 ; X86-SSE2-LABEL: elt3_v4i32:
86 ; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
87 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
88 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
89 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
92 ; X64-SSE2-LABEL: elt3_v4i32:
94 ; X64-SSE2-NEXT: movd %edi, %xmm1
95 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
96 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
97 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
100 ; X86-SSE4-LABEL: elt3_v4i32:
102 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
103 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0
104 ; X86-SSE4-NEXT: retl
106 ; X64-SSE4-LABEL: elt3_v4i32:
108 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
109 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm0
110 ; X64-SSE4-NEXT: retq
112 ; X86-AVX-LABEL: elt3_v4i32:
114 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
115 ; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
118 ; X64-AVX-LABEL: elt3_v4i32:
120 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
121 ; X64-AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
123 %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3
127 define <2 x i64> @elt0_v2i64(i64 %x) {
128 ; X86-SSE-LABEL: elt0_v2i64:
130 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
131 ; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
134 ; X64-SSE2-LABEL: elt0_v2i64:
136 ; X64-SSE2-NEXT: movq %rdi, %xmm1
137 ; X64-SSE2-NEXT: movapd {{.*#+}} xmm0 = <u,1>
138 ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
139 ; X64-SSE2-NEXT: retq
141 ; X64-SSE4-LABEL: elt0_v2i64:
143 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1>
144 ; X64-SSE4-NEXT: pinsrq $0, %rdi, %xmm0
145 ; X64-SSE4-NEXT: retq
147 ; X86-AVX-LABEL: elt0_v2i64:
149 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
150 ; X86-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
153 ; X64-AVX1-LABEL: elt0_v2i64:
155 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
156 ; X64-AVX1-NEXT: # xmm0 = mem[0,0]
157 ; X64-AVX1-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
158 ; X64-AVX1-NEXT: retq
160 ; X64-AVX2-LABEL: elt0_v2i64:
162 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1]
163 ; X64-AVX2-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
164 ; X64-AVX2-NEXT: retq
166 ; X64-AVX512F-LABEL: elt0_v2i64:
167 ; X64-AVX512F: # %bb.0:
168 ; X64-AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1]
169 ; X64-AVX512F-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
170 ; X64-AVX512F-NEXT: retq
171 %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0
175 define <4 x float> @elt1_v4f32(float %x) {
176 ; X86-SSE2-LABEL: elt1_v4f32:
178 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
179 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
180 ; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
181 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
182 ; X86-SSE2-NEXT: retl
184 ; X64-SSE2-LABEL: elt1_v4f32:
186 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
187 ; X64-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
188 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
189 ; X64-SSE2-NEXT: retq
191 ; X86-SSE4-LABEL: elt1_v4f32:
193 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
194 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
195 ; X86-SSE4-NEXT: retl
197 ; X64-SSE4-LABEL: elt1_v4f32:
199 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
200 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
201 ; X64-SSE4-NEXT: movaps %xmm1, %xmm0
202 ; X64-SSE4-NEXT: retq
204 ; X86-AVX-LABEL: elt1_v4f32:
206 ; X86-AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
207 ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
210 ; X64-AVX-LABEL: elt1_v4f32:
212 ; X64-AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
213 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
215 %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
219 define <2 x double> @elt1_v2f64(double %x) {
220 ; X86-SSE-LABEL: elt1_v2f64:
222 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
223 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
226 ; X64-SSE-LABEL: elt1_v2f64:
228 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u>
229 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
230 ; X64-SSE-NEXT: movaps %xmm1, %xmm0
233 ; X86-AVX-LABEL: elt1_v2f64:
235 ; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4.2E+1,4.2E+1]
236 ; X86-AVX-NEXT: # xmm0 = mem[0,0]
237 ; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
240 ; X64-AVX-LABEL: elt1_v2f64:
242 ; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
243 ; X64-AVX-NEXT: # xmm1 = mem[0,0]
244 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
246 %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
247 ret <2 x double> %ins
250 define <8 x i32> @elt7_v8i32(i32 %x) {
251 ; X86-SSE2-LABEL: elt7_v8i32:
253 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
254 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
255 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
256 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
257 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
258 ; X86-SSE2-NEXT: retl
260 ; X64-SSE2-LABEL: elt7_v8i32:
262 ; X64-SSE2-NEXT: movd %edi, %xmm0
263 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
264 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
265 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
266 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
267 ; X64-SSE2-NEXT: retq
269 ; X86-SSE4-LABEL: elt7_v8i32:
271 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
272 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1
273 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
274 ; X86-SSE4-NEXT: retl
276 ; X64-SSE4-LABEL: elt7_v8i32:
278 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
279 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm1
280 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
281 ; X64-SSE4-NEXT: retq
283 ; X86-AVX-LABEL: elt7_v8i32:
285 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
286 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
289 ; X64-AVX1-LABEL: elt7_v8i32:
291 ; X64-AVX1-NEXT: vmovd %edi, %xmm0
292 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
293 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
294 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
295 ; X64-AVX1-NEXT: retq
297 ; X64-AVX2-LABEL: elt7_v8i32:
299 ; X64-AVX2-NEXT: vmovd %edi, %xmm0
300 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
301 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
302 ; X64-AVX2-NEXT: retq
304 ; X64-AVX512F-LABEL: elt7_v8i32:
305 ; X64-AVX512F: # %bb.0:
306 ; X64-AVX512F-NEXT: vmovd %edi, %xmm0
307 ; X64-AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
308 ; X64-AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
309 ; X64-AVX512F-NEXT: retq
310 %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7
314 define <8 x float> @elt6_v8f32(float %x) {
315 ; X86-SSE2-LABEL: elt6_v8f32:
317 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
318 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
319 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
320 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
321 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
322 ; X86-SSE2-NEXT: retl
324 ; X64-SSE2-LABEL: elt6_v8f32:
326 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
327 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
328 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
329 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
330 ; X64-SSE2-NEXT: retq
332 ; X86-SSE4-LABEL: elt6_v8f32:
334 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
335 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
336 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
337 ; X86-SSE4-NEXT: retl
339 ; X64-SSE4-LABEL: elt6_v8f32:
341 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
342 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
343 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
344 ; X64-SSE4-NEXT: retq
346 ; X86-AVX-LABEL: elt6_v8f32:
348 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
349 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
352 ; X64-AVX1-LABEL: elt6_v8f32:
354 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
355 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
356 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
357 ; X64-AVX1-NEXT: retq
359 ; X64-AVX2-LABEL: elt6_v8f32:
361 ; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0
362 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
363 ; X64-AVX2-NEXT: retq
365 ; X64-AVX512F-LABEL: elt6_v8f32:
366 ; X64-AVX512F: # %bb.0:
367 ; X64-AVX512F-NEXT: vbroadcastss %xmm0, %ymm0
368 ; X64-AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
369 ; X64-AVX512F-NEXT: retq
370 %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
374 define <8 x i64> @elt5_v8i64(i64 %x) {
375 ; X86-SSE-LABEL: elt5_v8i64:
377 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
378 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4,0,0,0]
379 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
380 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0]
381 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0]
382 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0]
385 ; X64-SSE2-LABEL: elt5_v8i64:
387 ; X64-SSE2-NEXT: movq %rdi, %xmm0
388 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
389 ; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
390 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1]
391 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
392 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7]
393 ; X64-SSE2-NEXT: retq
395 ; X64-SSE4-LABEL: elt5_v8i64:
397 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
398 ; X64-SSE4-NEXT: pinsrq $1, %rdi, %xmm2
399 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1]
400 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3]
401 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7]
402 ; X64-SSE4-NEXT: retq
404 ; X86-AVX1-LABEL: elt5_v8i64:
406 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
407 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
408 ; X86-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
409 ; X86-AVX1-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
410 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
411 ; X86-AVX1-NEXT: retl
413 ; X64-AVX1-LABEL: elt5_v8i64:
415 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
416 ; X64-AVX1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
417 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
418 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
419 ; X64-AVX1-NEXT: retq
421 ; X86-AVX2-LABEL: elt5_v8i64:
423 ; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
424 ; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
425 ; X86-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
426 ; X86-AVX2-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
427 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
428 ; X86-AVX2-NEXT: retl
430 ; X64-AVX2-LABEL: elt5_v8i64:
432 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
433 ; X64-AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
434 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
435 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
436 ; X64-AVX2-NEXT: retq
438 ; X86-AVX512F-LABEL: elt5_v8i64:
439 ; X86-AVX512F: # %bb.0:
440 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
441 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
442 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} xmm2 = [4,0,0,0]
443 ; X86-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
444 ; X86-AVX512F-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
445 ; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
446 ; X86-AVX512F-NEXT: retl
448 ; X64-AVX512F-LABEL: elt5_v8i64:
449 ; X64-AVX512F: # %bb.0:
450 ; X64-AVX512F-NEXT: vmovq %rdi, %xmm1
451 ; X64-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7]
452 ; X64-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <42,1,2,3,4,u,6,7>
453 ; X64-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
454 ; X64-AVX512F-NEXT: retq
455 %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
459 define <8 x double> @elt1_v8f64(double %x) {
460 ; X86-SSE-LABEL: elt1_v8f64:
462 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
463 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
464 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
465 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
466 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
469 ; X64-SSE-LABEL: elt1_v8f64:
471 ; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = <4.2E+1,u>
472 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
473 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
474 ; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
475 ; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
476 ; X64-SSE-NEXT: movaps %xmm4, %xmm0
479 ; X86-AVX1-LABEL: elt1_v8f64:
481 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
482 ; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
483 ; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
484 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
485 ; X86-AVX1-NEXT: retl
487 ; X64-AVX1-LABEL: elt1_v8f64:
489 ; X64-AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
490 ; X64-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
491 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
492 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
493 ; X64-AVX1-NEXT: retq
495 ; X86-AVX2-LABEL: elt1_v8f64:
497 ; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
498 ; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
499 ; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
500 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
501 ; X86-AVX2-NEXT: retl
503 ; X64-AVX2-LABEL: elt1_v8f64:
505 ; X64-AVX2-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
506 ; X64-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
507 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
508 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
509 ; X64-AVX2-NEXT: retq
511 ; X86-AVX512F-LABEL: elt1_v8f64:
512 ; X86-AVX512F: # %bb.0:
513 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
514 ; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
515 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
516 ; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
517 ; X86-AVX512F-NEXT: retl
519 ; X64-AVX512F-LABEL: elt1_v8f64:
520 ; X64-AVX512F: # %bb.0:
521 ; X64-AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
522 ; X64-AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
523 ; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
524 ; X64-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
525 ; X64-AVX512F-NEXT: retq
526 %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1
527 ret <8 x double> %ins