1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE2
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE2
4 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE4
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE4
6 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX1
8 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX2
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX2
10 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX512F
11 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX512F
13 define <16 x i8> @elt0_v16i8(i8 %x) {
14 ; X32SSE2-LABEL: elt0_v16i8:
16 ; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
17 ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
18 ; X32SSE2-NEXT: andnps %xmm1, %xmm0
19 ; X32SSE2-NEXT: orps {{\.LCPI.*}}, %xmm0
22 ; X64SSE2-LABEL: elt0_v16i8:
24 ; X64SSE2-NEXT: movd %edi, %xmm1
25 ; X64SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
26 ; X64SSE2-NEXT: pandn %xmm1, %xmm0
27 ; X64SSE2-NEXT: por {{.*}}(%rip), %xmm0
30 ; X32SSE4-LABEL: elt0_v16i8:
32 ; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
33 ; X32SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
36 ; X64SSE4-LABEL: elt0_v16i8:
38 ; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
39 ; X64SSE4-NEXT: pinsrb $0, %edi, %xmm0
42 ; X32AVX-LABEL: elt0_v16i8:
44 ; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
45 ; X32AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
48 ; X64AVX-LABEL: elt0_v16i8:
50 ; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
51 ; X64AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
53 %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0
57 define <8 x i16> @elt5_v8i16(i16 %x) {
58 ; X32SSE-LABEL: elt5_v8i16:
60 ; X32SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
61 ; X32SSE-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0
64 ; X64SSE-LABEL: elt5_v8i16:
66 ; X64SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
67 ; X64SSE-NEXT: pinsrw $5, %edi, %xmm0
70 ; X32AVX-LABEL: elt5_v8i16:
72 ; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
73 ; X32AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
76 ; X64AVX-LABEL: elt5_v8i16:
78 ; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7>
79 ; X64AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
81 %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5
85 define <4 x i32> @elt3_v4i32(i32 %x) {
86 ; X32SSE2-LABEL: elt3_v4i32:
88 ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
89 ; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
90 ; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
91 ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
94 ; X64SSE2-LABEL: elt3_v4i32:
96 ; X64SSE2-NEXT: movd %edi, %xmm1
97 ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u>
98 ; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
99 ; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
102 ; X32SSE4-LABEL: elt3_v4i32:
104 ; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
105 ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0
108 ; X64SSE4-LABEL: elt3_v4i32:
110 ; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u>
111 ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm0
114 ; X32AVX-LABEL: elt3_v4i32:
116 ; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
117 ; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
120 ; X64AVX-LABEL: elt3_v4i32:
122 ; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u>
123 ; X64AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
125 %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3
129 define <2 x i64> @elt0_v2i64(i64 %x) {
130 ; X32SSE-LABEL: elt0_v2i64:
132 ; X32SSE-NEXT: movl $1, %eax
133 ; X32SSE-NEXT: movd %eax, %xmm1
134 ; X32SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
135 ; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
138 ; X64SSE2-LABEL: elt0_v2i64:
140 ; X64SSE2-NEXT: movq %rdi, %xmm1
141 ; X64SSE2-NEXT: movapd {{.*#+}} xmm0 = <u,1>
142 ; X64SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
145 ; X64SSE4-LABEL: elt0_v2i64:
147 ; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = <u,1>
148 ; X64SSE4-NEXT: pinsrq $0, %rdi, %xmm0
151 ; X32AVX-LABEL: elt0_v2i64:
153 ; X32AVX-NEXT: movl $1, %eax
154 ; X32AVX-NEXT: vmovd %eax, %xmm0
155 ; X32AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
156 ; X32AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
159 ; X64AVX-LABEL: elt0_v2i64:
161 ; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,1>
162 ; X64AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
164 %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0
168 define <4 x float> @elt1_v4f32(float %x) {
169 ; X32SSE2-LABEL: elt1_v4f32:
171 ; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
172 ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
173 ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
174 ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
177 ; X64SSE2-LABEL: elt1_v4f32:
179 ; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
180 ; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
181 ; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
184 ; X32SSE4-LABEL: elt1_v4f32:
186 ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
187 ; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
190 ; X64SSE4-LABEL: elt1_v4f32:
192 ; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
193 ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
194 ; X64SSE4-NEXT: movaps %xmm1, %xmm0
197 ; X32AVX-LABEL: elt1_v4f32:
199 ; X32AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
200 ; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
203 ; X64AVX-LABEL: elt1_v4f32:
205 ; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
206 ; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
208 %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
212 define <2 x double> @elt1_v2f64(double %x) {
213 ; X32SSE-LABEL: elt1_v2f64:
215 ; X32SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
216 ; X32SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
219 ; X64SSE-LABEL: elt1_v2f64:
221 ; X64SSE-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u>
222 ; X64SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
223 ; X64SSE-NEXT: movaps %xmm1, %xmm0
226 ; X32AVX-LABEL: elt1_v2f64:
228 ; X32AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u>
229 ; X32AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
232 ; X64AVX-LABEL: elt1_v2f64:
234 ; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u>
235 ; X64AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
237 %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
238 ret <2 x double> %ins
241 define <8 x i32> @elt7_v8i32(i32 %x) {
242 ; X32SSE2-LABEL: elt7_v8i32:
244 ; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
245 ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
246 ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
247 ; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
248 ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
251 ; X64SSE2-LABEL: elt7_v8i32:
253 ; X64SSE2-NEXT: movd %edi, %xmm0
254 ; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u>
255 ; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
256 ; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
257 ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
260 ; X32SSE4-LABEL: elt7_v8i32:
262 ; X32SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
263 ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1
264 ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
267 ; X64SSE4-LABEL: elt7_v8i32:
269 ; X64SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u>
270 ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm1
271 ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
274 ; X32AVX-LABEL: elt7_v8i32:
276 ; X32AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
277 ; X32AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
280 ; X64AVX1-LABEL: elt7_v8i32:
282 ; X64AVX1-NEXT: vmovd %edi, %xmm0
283 ; X64AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
284 ; X64AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
285 ; X64AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
288 ; X64AVX2-LABEL: elt7_v8i32:
290 ; X64AVX2-NEXT: vmovd %edi, %xmm0
291 ; X64AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
292 ; X64AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
295 ; X64AVX512F-LABEL: elt7_v8i32:
296 ; X64AVX512F: # %bb.0:
297 ; X64AVX512F-NEXT: vmovd %edi, %xmm0
298 ; X64AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
299 ; X64AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
300 ; X64AVX512F-NEXT: retq
301 %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7
305 define <8 x float> @elt6_v8f32(float %x) {
306 ; X32SSE2-LABEL: elt6_v8f32:
308 ; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
309 ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
310 ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
311 ; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
312 ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
315 ; X64SSE2-LABEL: elt6_v8f32:
317 ; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
318 ; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
319 ; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
320 ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
323 ; X32SSE4-LABEL: elt6_v8f32:
325 ; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
326 ; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
327 ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
330 ; X64SSE4-LABEL: elt6_v8f32:
332 ; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
333 ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
334 ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
337 ; X32AVX-LABEL: elt6_v8f32:
339 ; X32AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
340 ; X32AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
343 ; X64AVX1-LABEL: elt6_v8f32:
345 ; X64AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
346 ; X64AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
347 ; X64AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
350 ; X64AVX2-LABEL: elt6_v8f32:
352 ; X64AVX2-NEXT: vbroadcastss %xmm0, %ymm0
353 ; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
356 ; X64AVX512F-LABEL: elt6_v8f32:
357 ; X64AVX512F: # %bb.0:
358 ; X64AVX512F-NEXT: vbroadcastss %xmm0, %ymm0
359 ; X64AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
360 ; X64AVX512F-NEXT: retq
361 %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
365 define <8 x i64> @elt5_v8i64(i64 %x) {
366 ; X32SSE-LABEL: elt5_v8i64:
368 ; X32SSE-NEXT: movl $4, %eax
369 ; X32SSE-NEXT: movd %eax, %xmm2
370 ; X32SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
371 ; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
372 ; X32SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0]
373 ; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0]
374 ; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0]
377 ; X64SSE2-LABEL: elt5_v8i64:
379 ; X64SSE2-NEXT: movq %rdi, %xmm0
380 ; X64SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
381 ; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
382 ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1]
383 ; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
384 ; X64SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7]
387 ; X64SSE4-LABEL: elt5_v8i64:
389 ; X64SSE4-NEXT: movdqa {{.*#+}} xmm2 = <4,u>
390 ; X64SSE4-NEXT: pinsrq $1, %rdi, %xmm2
391 ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1]
392 ; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3]
393 ; X64SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7]
396 ; X32AVX1-LABEL: elt5_v8i64:
398 ; X32AVX1-NEXT: movl $4, %eax
399 ; X32AVX1-NEXT: vmovd %eax, %xmm0
400 ; X32AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
401 ; X32AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
402 ; X32AVX1-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm1
403 ; X32AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
406 ; X64AVX1-LABEL: elt5_v8i64:
408 ; X64AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
409 ; X64AVX1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
410 ; X64AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
411 ; X64AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
414 ; X32AVX2-LABEL: elt5_v8i64:
416 ; X32AVX2-NEXT: movl $4, %eax
417 ; X32AVX2-NEXT: vmovd %eax, %xmm0
418 ; X32AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
419 ; X32AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
420 ; X32AVX2-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm0, %ymm1
421 ; X32AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
424 ; X64AVX2-LABEL: elt5_v8i64:
426 ; X64AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = <4,u,6,7>
427 ; X64AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
428 ; X64AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7]
429 ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3]
432 ; X32AVX512F-LABEL: elt5_v8i64:
433 ; X32AVX512F: # %bb.0:
434 ; X32AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
435 ; X32AVX512F-NEXT: movl $4, %eax
436 ; X32AVX512F-NEXT: vmovd %eax, %xmm1
437 ; X32AVX512F-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
438 ; X32AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
439 ; X32AVX512F-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm1, %ymm1
440 ; X32AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
441 ; X32AVX512F-NEXT: retl
443 ; X64AVX512F-LABEL: elt5_v8i64:
444 ; X64AVX512F: # %bb.0:
445 ; X64AVX512F-NEXT: vmovq %rdi, %xmm1
446 ; X64AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7]
447 ; X64AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <42,1,2,3,4,u,6,7>
448 ; X64AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
449 ; X64AVX512F-NEXT: retq
450 %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
454 define <8 x double> @elt1_v8f64(double %x) {
455 ; X32SSE-LABEL: elt1_v8f64:
457 ; X32SSE-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u>
458 ; X32SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
459 ; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
460 ; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
461 ; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
464 ; X64SSE-LABEL: elt1_v8f64:
466 ; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <4.2E+1,u>
467 ; X64SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
468 ; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
469 ; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
470 ; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
471 ; X64SSE-NEXT: movaps %xmm4, %xmm0
474 ; X32AVX1-LABEL: elt1_v8f64:
476 ; X32AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
477 ; X32AVX1-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
478 ; X32AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
479 ; X32AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
482 ; X64AVX1-LABEL: elt1_v8f64:
484 ; X64AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
485 ; X64AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
486 ; X64AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
487 ; X64AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
490 ; X32AVX2-LABEL: elt1_v8f64:
492 ; X32AVX2-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
493 ; X32AVX2-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
494 ; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
495 ; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
498 ; X64AVX2-LABEL: elt1_v8f64:
500 ; X64AVX2-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
501 ; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
502 ; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
503 ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
506 ; X32AVX512F-LABEL: elt1_v8f64:
507 ; X32AVX512F: # %bb.0:
508 ; X32AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
509 ; X32AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
510 ; X32AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
511 ; X32AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
512 ; X32AVX512F-NEXT: retl
514 ; X64AVX512F-LABEL: elt1_v8f64:
515 ; X64AVX512F: # %bb.0:
516 ; X64AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
517 ; X64AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
518 ; X64AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
519 ; X64AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
520 ; X64AVX512F-NEXT: retq
521 %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1
522 ret <8 x double> %ins