1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE2
4 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE4
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE4
6 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
8 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512F
11 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512F
13 define <16 x i8> @elt0_v16i8(i8 %x) {
14 ; X86-SSE2-LABEL: elt0_v16i8:
16 ; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
17 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
18 ; X86-SSE2-NEXT: andnps %xmm1, %xmm0
19 ; X86-SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
22 ; X64-SSE2-LABEL: elt0_v16i8:
24 ; X64-SSE2-NEXT: movd %edi, %xmm1
25 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
26 ; X64-SSE2-NEXT: pandn %xmm1, %xmm0
27 ; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30 ; X86-SSE4-LABEL: elt0_v16i8:
32 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
33 ; X86-SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
36 ; X64-SSE4-LABEL: elt0_v16i8:
38 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
39 ; X64-SSE4-NEXT: pinsrb $0, %edi, %xmm0
42 ; X86-AVX-LABEL: elt0_v16i8:
44 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
45 ; X86-AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
48 ; X64-AVX-LABEL: elt0_v16i8:
50 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
51 ; X64-AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
53 %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0
57 define <8 x i16> @elt5_v8i16(i16 %x) {
58 ; X86-SSE-LABEL: elt5_v8i16:
60 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
61 ; X86-SSE-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0
64 ; X64-SSE-LABEL: elt5_v8i16:
66 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
67 ; X64-SSE-NEXT: pinsrw $5, %edi, %xmm0
70 ; X86-AVX-LABEL: elt5_v8i16:
72 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
73 ; X86-AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
76 ; X64-AVX-LABEL: elt5_v8i16:
78 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
79 ; X64-AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
81 %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5
85 define <4 x i32> @elt3_v4i32(i32 %x) {
86 ; X86-SSE2-LABEL: elt3_v4i32:
88 ; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
89 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
90 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
91 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
94 ; X64-SSE2-LABEL: elt3_v4i32:
96 ; X64-SSE2-NEXT: movd %edi, %xmm1
97 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
98 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
99 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
100 ; X64-SSE2-NEXT: retq
102 ; X86-SSE4-LABEL: elt3_v4i32:
104 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
105 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0
106 ; X86-SSE4-NEXT: retl
108 ; X64-SSE4-LABEL: elt3_v4i32:
110 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
111 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm0
112 ; X64-SSE4-NEXT: retq
114 ; X86-AVX-LABEL: elt3_v4i32:
116 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
117 ; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
120 ; X64-AVX-LABEL: elt3_v4i32:
122 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
123 ; X64-AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
125 %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3
129 define <2 x i64> @elt0_v2i64(i64 %x) {
130 ; X86-SSE-LABEL: elt0_v2i64:
132 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
133 ; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
136 ; X64-SSE2-LABEL: elt0_v2i64:
138 ; X64-SSE2-NEXT: movq %rdi, %xmm1
139 ; X64-SSE2-NEXT: movapd {{.*#+}} xmm0 = <u,1>
140 ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
141 ; X64-SSE2-NEXT: retq
143 ; X64-SSE4-LABEL: elt0_v2i64:
145 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1>
146 ; X64-SSE4-NEXT: pinsrq $0, %rdi, %xmm0
147 ; X64-SSE4-NEXT: retq
149 ; X86-AVX-LABEL: elt0_v2i64:
151 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
152 ; X86-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
155 ; X64-AVX-LABEL: elt0_v2i64:
157 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1>
158 ; X64-AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
160 %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0
164 define <4 x float> @elt1_v4f32(float %x) {
165 ; X86-SSE2-LABEL: elt1_v4f32:
167 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
168 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
169 ; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
170 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
171 ; X86-SSE2-NEXT: retl
173 ; X64-SSE2-LABEL: elt1_v4f32:
175 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
176 ; X64-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
177 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
178 ; X64-SSE2-NEXT: retq
180 ; X86-SSE4-LABEL: elt1_v4f32:
182 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
183 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
184 ; X86-SSE4-NEXT: retl
186 ; X64-SSE4-LABEL: elt1_v4f32:
188 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
189 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
190 ; X64-SSE4-NEXT: movaps %xmm1, %xmm0
191 ; X64-SSE4-NEXT: retq
193 ; X86-AVX-LABEL: elt1_v4f32:
195 ; X86-AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
196 ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
199 ; X64-AVX-LABEL: elt1_v4f32:
201 ; X64-AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
202 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
204 %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
208 define <2 x double> @elt1_v2f64(double %x) {
209 ; X86-SSE-LABEL: elt1_v2f64:
211 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
212 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
215 ; X64-SSE-LABEL: elt1_v2f64:
217 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u>
218 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
219 ; X64-SSE-NEXT: movaps %xmm1, %xmm0
222 ; X86-AVX-LABEL: elt1_v2f64:
224 ; X86-AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u>
225 ; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
228 ; X64-AVX-LABEL: elt1_v2f64:
230 ; X64-AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u>
231 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
233 %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
234 ret <2 x double> %ins
237 define <8 x i32> @elt7_v8i32(i32 %x) {
238 ; X86-SSE2-LABEL: elt7_v8i32:
240 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
241 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
242 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
243 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
244 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
245 ; X86-SSE2-NEXT: retl
247 ; X64-SSE2-LABEL: elt7_v8i32:
249 ; X64-SSE2-NEXT: movd %edi, %xmm0
250 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
251 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
252 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
253 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
254 ; X64-SSE2-NEXT: retq
256 ; X86-SSE4-LABEL: elt7_v8i32:
258 ; X86-SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
259 ; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1
260 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
261 ; X86-SSE4-NEXT: retl
263 ; X64-SSE4-LABEL: elt7_v8i32:
265 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
266 ; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm1
267 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
268 ; X64-SSE4-NEXT: retq
270 ; X86-AVX-LABEL: elt7_v8i32:
272 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
273 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
276 ; X64-AVX1-LABEL: elt7_v8i32:
278 ; X64-AVX1-NEXT: vmovd %edi, %xmm0
279 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
280 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
281 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
282 ; X64-AVX1-NEXT: retq
284 ; X64-AVX2-LABEL: elt7_v8i32:
286 ; X64-AVX2-NEXT: vmovd %edi, %xmm0
287 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
288 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
289 ; X64-AVX2-NEXT: retq
291 ; X64-AVX512F-LABEL: elt7_v8i32:
292 ; X64-AVX512F: # %bb.0:
293 ; X64-AVX512F-NEXT: vmovd %edi, %xmm0
294 ; X64-AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
295 ; X64-AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
296 ; X64-AVX512F-NEXT: retq
297 %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7
301 define <8 x float> @elt6_v8f32(float %x) {
302 ; X86-SSE2-LABEL: elt6_v8f32:
304 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
305 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
306 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
307 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
308 ; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
309 ; X86-SSE2-NEXT: retl
311 ; X64-SSE2-LABEL: elt6_v8f32:
313 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
314 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
315 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
316 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
317 ; X64-SSE2-NEXT: retq
319 ; X86-SSE4-LABEL: elt6_v8f32:
321 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
322 ; X86-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
323 ; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
324 ; X86-SSE4-NEXT: retl
326 ; X64-SSE4-LABEL: elt6_v8f32:
328 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
329 ; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
330 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
331 ; X64-SSE4-NEXT: retq
333 ; X86-AVX-LABEL: elt6_v8f32:
335 ; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
336 ; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
339 ; X64-AVX1-LABEL: elt6_v8f32:
341 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
342 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
343 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
344 ; X64-AVX1-NEXT: retq
346 ; X64-AVX2-LABEL: elt6_v8f32:
348 ; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0
349 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
350 ; X64-AVX2-NEXT: retq
352 ; X64-AVX512F-LABEL: elt6_v8f32:
353 ; X64-AVX512F: # %bb.0:
354 ; X64-AVX512F-NEXT: vbroadcastss %xmm0, %ymm0
355 ; X64-AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
356 ; X64-AVX512F-NEXT: retq
357 %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
361 define <8 x i64> @elt5_v8i64(i64 %x) {
362 ; X86-SSE-LABEL: elt5_v8i64:
364 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
365 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4,0,0,0]
366 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
367 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0]
368 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0]
369 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0]
372 ; X64-SSE2-LABEL: elt5_v8i64:
374 ; X64-SSE2-NEXT: movq %rdi, %xmm0
375 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
376 ; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
377 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1]
378 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
379 ; X64-SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7]
380 ; X64-SSE2-NEXT: retq
382 ; X64-SSE4-LABEL: elt5_v8i64:
384 ; X64-SSE4-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
385 ; X64-SSE4-NEXT: pinsrq $1, %rdi, %xmm2
386 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1]
387 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3]
388 ; X64-SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7]
389 ; X64-SSE4-NEXT: retq
391 ; X86-AVX1-LABEL: elt5_v8i64:
393 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
394 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
395 ; X86-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
396 ; X86-AVX1-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
397 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
398 ; X86-AVX1-NEXT: retl
400 ; X64-AVX1-LABEL: elt5_v8i64:
402 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
403 ; X64-AVX1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
404 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
405 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
406 ; X64-AVX1-NEXT: retq
408 ; X86-AVX2-LABEL: elt5_v8i64:
410 ; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
411 ; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
412 ; X86-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
413 ; X86-AVX2-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
414 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
415 ; X86-AVX2-NEXT: retl
417 ; X64-AVX2-LABEL: elt5_v8i64:
419 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
420 ; X64-AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
421 ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
422 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
423 ; X64-AVX2-NEXT: retq
425 ; X86-AVX512F-LABEL: elt5_v8i64:
426 ; X86-AVX512F: # %bb.0:
427 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
428 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
429 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} xmm2 = [4,0,0,0]
430 ; X86-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
431 ; X86-AVX512F-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
432 ; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
433 ; X86-AVX512F-NEXT: retl
435 ; X64-AVX512F-LABEL: elt5_v8i64:
436 ; X64-AVX512F: # %bb.0:
437 ; X64-AVX512F-NEXT: vmovq %rdi, %xmm1
438 ; X64-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7]
439 ; X64-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <42,1,2,3,4,u,6,7>
440 ; X64-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
441 ; X64-AVX512F-NEXT: retq
442 %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
446 define <8 x double> @elt1_v8f64(double %x) {
447 ; X86-SSE-LABEL: elt1_v8f64:
449 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
450 ; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
451 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
452 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
453 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
456 ; X64-SSE-LABEL: elt1_v8f64:
458 ; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = <4.2E+1,u>
459 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
460 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
461 ; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
462 ; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
463 ; X64-SSE-NEXT: movaps %xmm4, %xmm0
466 ; X86-AVX1-LABEL: elt1_v8f64:
468 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
469 ; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
470 ; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
471 ; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
472 ; X86-AVX1-NEXT: retl
474 ; X64-AVX1-LABEL: elt1_v8f64:
476 ; X64-AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
477 ; X64-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
478 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
479 ; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
480 ; X64-AVX1-NEXT: retq
482 ; X86-AVX2-LABEL: elt1_v8f64:
484 ; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
485 ; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
486 ; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
487 ; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
488 ; X86-AVX2-NEXT: retl
490 ; X64-AVX2-LABEL: elt1_v8f64:
492 ; X64-AVX2-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
493 ; X64-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
494 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
495 ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
496 ; X64-AVX2-NEXT: retq
498 ; X86-AVX512F-LABEL: elt1_v8f64:
499 ; X86-AVX512F: # %bb.0:
500 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
501 ; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
502 ; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
503 ; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
504 ; X86-AVX512F-NEXT: retl
506 ; X64-AVX512F-LABEL: elt1_v8f64:
507 ; X64-AVX512F: # %bb.0:
508 ; X64-AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
509 ; X64-AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
510 ; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
511 ; X64-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
512 ; X64-AVX512F-NEXT: retq
513 %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1
514 ret <8 x double> %ins