1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
7 define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
10 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
11 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
13 ; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
14 ; SSE2-NEXT: shll $8, %ecx
15 ; SSE2-NEXT: orl %eax, %ecx
16 ; SSE2-NEXT: movd %ecx, %xmm0
17 ; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
18 ; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
19 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0
20 ; SSE2-NEXT: movd %xmm0, (%rdi)
25 ; SSE41-NEXT: cvttps2dq %xmm0, %xmm0
26 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
27 ; SSE41-NEXT: movl $255, %eax
28 ; SSE41-NEXT: pinsrb $3, %eax, %xmm0
29 ; SSE41-NEXT: movd %xmm0, (%rdi)
34 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
35 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
36 ; AVX-NEXT: movl $255, %eax
37 ; AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
38 ; AVX-NEXT: vmovd %xmm0, (%rdi)
40 %t0 = fptoui <3 x float> %in to <3 x i8>
41 %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
42 %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
43 store <4 x i8> %t2, <4 x i8>* %out, align 4
47 ; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
48 ; blend with a zero vector if the build_vector contains negative zero.
50 define <4 x float> @test_negative_zero_1(<4 x float> %A) {
51 ; SSE2-LABEL: test_negative_zero_1:
52 ; SSE2: # %bb.0: # %entry
53 ; SSE2-NEXT: xorps %xmm1, %xmm1
54 ; SSE2-NEXT: movaps %xmm0, %xmm2
55 ; SSE2-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
56 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
57 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
58 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
61 ; SSE41-LABEL: test_negative_zero_1:
62 ; SSE41: # %bb.0: # %entry
63 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
66 ; AVX-LABEL: test_negative_zero_1:
67 ; AVX: # %bb.0: # %entry
68 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
71 %0 = extractelement <4 x float> %A, i32 0
72 %1 = insertelement <4 x float> undef, float %0, i32 0
73 %2 = insertelement <4 x float> %1, float -0.0, i32 1
74 %3 = extractelement <4 x float> %A, i32 2
75 %4 = insertelement <4 x float> %2, float %3, i32 2
76 %5 = insertelement <4 x float> %4, float 0.0, i32 3
80 ; FIXME: This could be 'movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]'.
82 define <2 x double> @test_negative_zero_2(<2 x double> %A) {
83 ; SSE2-LABEL: test_negative_zero_2:
84 ; SSE2: # %bb.0: # %entry
85 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
88 ; SSE41-LABEL: test_negative_zero_2:
89 ; SSE41: # %bb.0: # %entry
90 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
93 ; AVX-LABEL: test_negative_zero_2:
94 ; AVX: # %bb.0: # %entry
95 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
98 %0 = extractelement <2 x double> %A, i32 0
99 %1 = insertelement <2 x double> undef, double %0, i32 0
100 %2 = insertelement <2 x double> %1, double -0.0, i32 1
104 define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
105 ; SSE2-LABEL: test_buildvector_v4f32_register:
107 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
108 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
109 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
112 ; SSE41-LABEL: test_buildvector_v4f32_register:
114 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
115 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
116 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
119 ; AVX-LABEL: test_buildvector_v4f32_register:
121 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
122 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
123 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
125 %ins0 = insertelement <4 x float> undef, float %f0, i32 0
126 %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
127 %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
128 %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
129 ret <4 x float> %ins3
132 define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %p2, float* %p3) {
133 ; SSE2-LABEL: test_buildvector_v4f32_load:
135 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
136 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
137 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
138 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
139 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
140 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
141 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
144 ; SSE41-LABEL: test_buildvector_v4f32_load:
146 ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
147 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
148 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
149 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
152 ; AVX-LABEL: test_buildvector_v4f32_load:
154 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
155 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
156 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
157 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
159 %f0 = load float, float* %p0, align 4
160 %f1 = load float, float* %p1, align 4
161 %f2 = load float, float* %p2, align 4
162 %f3 = load float, float* %p3, align 4
163 %ins0 = insertelement <4 x float> undef, float %f0, i32 0
164 %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
165 %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
166 %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
167 ret <4 x float> %ins3
170 define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
171 ; SSE2-LABEL: test_buildvector_v4f32_partial_load:
173 ; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
174 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
175 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
176 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
179 ; SSE41-LABEL: test_buildvector_v4f32_partial_load:
181 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
182 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
183 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
186 ; AVX-LABEL: test_buildvector_v4f32_partial_load:
188 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
189 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
190 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
192 %f3 = load float, float* %p3, align 4
193 %ins0 = insertelement <4 x float> undef, float %f0, i32 0
194 %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
195 %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
196 %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
197 ret <4 x float> %ins3
200 define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
201 ; SSE2-LABEL: test_buildvector_v4i32_register:
203 ; SSE2-NEXT: movd %ecx, %xmm0
204 ; SSE2-NEXT: movd %edx, %xmm1
205 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
206 ; SSE2-NEXT: movd %esi, %xmm2
207 ; SSE2-NEXT: movd %edi, %xmm0
208 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
209 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
212 ; SSE41-LABEL: test_buildvector_v4i32_register:
214 ; SSE41-NEXT: movd %edi, %xmm0
215 ; SSE41-NEXT: pinsrd $1, %esi, %xmm0
216 ; SSE41-NEXT: pinsrd $2, %edx, %xmm0
217 ; SSE41-NEXT: pinsrd $3, %ecx, %xmm0
220 ; AVX-LABEL: test_buildvector_v4i32_register:
222 ; AVX-NEXT: vmovd %edi, %xmm0
223 ; AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
224 ; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
225 ; AVX-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
227 %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
228 %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
229 %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
230 %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
234 define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
235 ; SSE2-LABEL: test_buildvector_v4i32_partial:
237 ; SSE2-NEXT: movd %edi, %xmm0
238 ; SSE2-NEXT: movd %esi, %xmm1
239 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
240 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
243 ; SSE41-LABEL: test_buildvector_v4i32_partial:
245 ; SSE41-NEXT: movd %edi, %xmm0
246 ; SSE41-NEXT: pinsrd $3, %esi, %xmm0
249 ; AVX-LABEL: test_buildvector_v4i32_partial:
251 ; AVX-NEXT: vmovd %edi, %xmm0
252 ; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0
254 %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
255 %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1
256 %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2
257 %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
261 define <4 x i32> @test_buildvector_v4i32_register_zero(i32 %a0, i32 %a2, i32 %a3) {
262 ; SSE-LABEL: test_buildvector_v4i32_register_zero:
264 ; SSE-NEXT: movd %edx, %xmm0
265 ; SSE-NEXT: movd %esi, %xmm1
266 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
267 ; SSE-NEXT: movd %edi, %xmm0
268 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271 ; AVX-LABEL: test_buildvector_v4i32_register_zero:
273 ; AVX-NEXT: vmovd %edx, %xmm0
274 ; AVX-NEXT: vmovd %esi, %xmm1
275 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
276 ; AVX-NEXT: vmovd %edi, %xmm1
277 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
279 %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
280 %ins1 = insertelement <4 x i32> %ins0, i32 0, i32 1
281 %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
282 %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
286 define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %a3) {
287 ; SSE-LABEL: test_buildvector_v4i32_register_zero_2:
289 ; SSE-NEXT: movd %edx, %xmm0
290 ; SSE-NEXT: movd %esi, %xmm1
291 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
292 ; SSE-NEXT: movd %edi, %xmm0
293 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1]
296 ; AVX-LABEL: test_buildvector_v4i32_register_zero_2:
298 ; AVX-NEXT: vmovd %edx, %xmm0
299 ; AVX-NEXT: vmovd %esi, %xmm1
300 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
301 ; AVX-NEXT: vmovd %edi, %xmm1
302 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
304 %ins0 = insertelement <4 x i32> undef, i32 0, i32 0
305 %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
306 %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
307 %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
311 define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
312 ; SSE2-LABEL: test_buildvector_v8i16_register:
314 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
315 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
316 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
317 ; SSE2-NEXT: movd %r9d, %xmm0
318 ; SSE2-NEXT: movd %r8d, %xmm2
319 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
320 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
321 ; SSE2-NEXT: movd %ecx, %xmm0
322 ; SSE2-NEXT: movd %edx, %xmm1
323 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
324 ; SSE2-NEXT: movd %esi, %xmm3
325 ; SSE2-NEXT: movd %edi, %xmm0
326 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
327 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
328 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
331 ; SSE41-LABEL: test_buildvector_v8i16_register:
333 ; SSE41-NEXT: movd %edi, %xmm0
334 ; SSE41-NEXT: pinsrw $1, %esi, %xmm0
335 ; SSE41-NEXT: pinsrw $2, %edx, %xmm0
336 ; SSE41-NEXT: pinsrw $3, %ecx, %xmm0
337 ; SSE41-NEXT: pinsrw $4, %r8d, %xmm0
338 ; SSE41-NEXT: pinsrw $5, %r9d, %xmm0
339 ; SSE41-NEXT: pinsrw $6, {{[0-9]+}}(%rsp), %xmm0
340 ; SSE41-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm0
343 ; AVX-LABEL: test_buildvector_v8i16_register:
345 ; AVX-NEXT: vmovd %edi, %xmm0
346 ; AVX-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0
347 ; AVX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
348 ; AVX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
349 ; AVX-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0
350 ; AVX-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0
351 ; AVX-NEXT: vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
352 ; AVX-NEXT: vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
354 %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
355 %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
356 %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
357 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
358 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
359 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
360 %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
361 %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
365 define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
366 ; SSE-LABEL: test_buildvector_v8i16_partial:
368 ; SSE-NEXT: pxor %xmm0, %xmm0
369 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
370 ; SSE-NEXT: pinsrw $3, %esi, %xmm0
371 ; SSE-NEXT: pinsrw $4, %edx, %xmm0
372 ; SSE-NEXT: pinsrw $5, %ecx, %xmm0
375 ; AVX-LABEL: test_buildvector_v8i16_partial:
377 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
378 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
379 ; AVX-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
380 ; AVX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
381 ; AVX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
383 %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0
384 %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
385 %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2
386 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
387 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
388 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
389 %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6
390 %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7
394 define <8 x i16> @test_buildvector_v8i16_register_zero(i16 %a0, i16 %a3, i16 %a4, i16 %a5) {
395 ; SSE-LABEL: test_buildvector_v8i16_register_zero:
397 ; SSE-NEXT: movzwl %di, %eax
398 ; SSE-NEXT: movd %eax, %xmm0
399 ; SSE-NEXT: pinsrw $3, %esi, %xmm0
400 ; SSE-NEXT: pinsrw $4, %edx, %xmm0
401 ; SSE-NEXT: pinsrw $5, %ecx, %xmm0
404 ; AVX-LABEL: test_buildvector_v8i16_register_zero:
406 ; AVX-NEXT: movzwl %di, %eax
407 ; AVX-NEXT: vmovd %eax, %xmm0
408 ; AVX-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
409 ; AVX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
410 ; AVX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
412 %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
413 %ins1 = insertelement <8 x i16> %ins0, i16 0, i32 1
414 %ins2 = insertelement <8 x i16> %ins1, i16 0, i32 2
415 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
416 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
417 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
418 %ins6 = insertelement <8 x i16> %ins5, i16 0, i32 6
419 %ins7 = insertelement <8 x i16> %ins6, i16 0, i32 7
423 define <8 x i16> @test_buildvector_v8i16_register_zero_2(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
424 ; SSE-LABEL: test_buildvector_v8i16_register_zero_2:
426 ; SSE-NEXT: pxor %xmm0, %xmm0
427 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
428 ; SSE-NEXT: pinsrw $3, %esi, %xmm0
429 ; SSE-NEXT: pinsrw $4, %edx, %xmm0
430 ; SSE-NEXT: pinsrw $5, %ecx, %xmm0
433 ; AVX-LABEL: test_buildvector_v8i16_register_zero_2:
435 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
436 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
437 ; AVX-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
438 ; AVX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
439 ; AVX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
441 %ins0 = insertelement <8 x i16> undef, i16 0, i32 0
442 %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
443 %ins2 = insertelement <8 x i16> %ins1, i16 0, i32 2
444 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
445 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
446 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
447 %ins6 = insertelement <8 x i16> %ins5, i16 0, i32 6
448 %ins7 = insertelement <8 x i16> %ins6, i16 0, i32 7
452 define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
453 ; SSE2-LABEL: test_buildvector_v16i8_register:
455 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
456 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
457 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
458 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
459 ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
460 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
461 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
462 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
463 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
464 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
465 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
466 ; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
467 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
468 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
469 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
470 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
471 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
472 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
473 ; SSE2-NEXT: movd %r9d, %xmm0
474 ; SSE2-NEXT: movd %r8d, %xmm2
475 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
476 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
477 ; SSE2-NEXT: movd %ecx, %xmm0
478 ; SSE2-NEXT: movd %edx, %xmm1
479 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
480 ; SSE2-NEXT: movd %esi, %xmm4
481 ; SSE2-NEXT: movd %edi, %xmm0
482 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
483 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
484 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
485 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
488 ; SSE41-LABEL: test_buildvector_v16i8_register:
490 ; SSE41-NEXT: movd %edi, %xmm0
491 ; SSE41-NEXT: pinsrb $1, %esi, %xmm0
492 ; SSE41-NEXT: pinsrb $2, %edx, %xmm0
493 ; SSE41-NEXT: pinsrb $3, %ecx, %xmm0
494 ; SSE41-NEXT: pinsrb $4, %r8d, %xmm0
495 ; SSE41-NEXT: pinsrb $5, %r9d, %xmm0
496 ; SSE41-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm0
497 ; SSE41-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm0
498 ; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm0
499 ; SSE41-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm0
500 ; SSE41-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm0
501 ; SSE41-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm0
502 ; SSE41-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm0
503 ; SSE41-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm0
504 ; SSE41-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm0
505 ; SSE41-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
508 ; AVX-LABEL: test_buildvector_v16i8_register:
510 ; AVX-NEXT: vmovd %edi, %xmm0
511 ; AVX-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
512 ; AVX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
513 ; AVX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
514 ; AVX-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0
515 ; AVX-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0
516 ; AVX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
517 ; AVX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
518 ; AVX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0
519 ; AVX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0
520 ; AVX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0
521 ; AVX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0
522 ; AVX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0
523 ; AVX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0
524 ; AVX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0
525 ; AVX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
527 %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0
528 %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1
529 %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2
530 %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3
531 %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4
532 %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5
533 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
534 %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7
535 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
536 %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9
537 %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10
538 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
539 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
540 %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
541 %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
542 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
546 define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
547 ; SSE2-LABEL: test_buildvector_v16i8_partial:
549 ; SSE2-NEXT: pxor %xmm0, %xmm0
550 ; SSE2-NEXT: pinsrw $1, %edi, %xmm0
551 ; SSE2-NEXT: pinsrw $3, %esi, %xmm0
552 ; SSE2-NEXT: pinsrw $4, %edx, %xmm0
553 ; SSE2-NEXT: shll $8, %ecx
554 ; SSE2-NEXT: pinsrw $5, %ecx, %xmm0
555 ; SSE2-NEXT: pinsrw $6, %r8d, %xmm0
556 ; SSE2-NEXT: shll $8, %r9d
557 ; SSE2-NEXT: pinsrw $7, %r9d, %xmm0
560 ; SSE41-LABEL: test_buildvector_v16i8_partial:
562 ; SSE41-NEXT: pxor %xmm0, %xmm0
563 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0
564 ; SSE41-NEXT: pinsrb $6, %esi, %xmm0
565 ; SSE41-NEXT: pinsrb $8, %edx, %xmm0
566 ; SSE41-NEXT: pinsrb $11, %ecx, %xmm0
567 ; SSE41-NEXT: pinsrb $12, %r8d, %xmm0
568 ; SSE41-NEXT: pinsrb $15, %r9d, %xmm0
571 ; AVX-LABEL: test_buildvector_v16i8_partial:
573 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
574 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
575 ; AVX-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
576 ; AVX-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
577 ; AVX-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
578 ; AVX-NEXT: vpinsrb $12, %r8d, %xmm0, %xmm0
579 ; AVX-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
581 %ins0 = insertelement <16 x i8> undef, i8 undef, i32 0
582 %ins1 = insertelement <16 x i8> %ins0, i8 undef, i32 1
583 %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2
584 %ins3 = insertelement <16 x i8> %ins2, i8 undef, i32 3
585 %ins4 = insertelement <16 x i8> %ins3, i8 undef, i32 4
586 %ins5 = insertelement <16 x i8> %ins4, i8 undef, i32 5
587 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
588 %ins7 = insertelement <16 x i8> %ins6, i8 undef, i32 7
589 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
590 %ins9 = insertelement <16 x i8> %ins8, i8 undef, i32 9
591 %ins10 = insertelement <16 x i8> %ins9, i8 undef, i32 10
592 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
593 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
594 %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13
595 %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14
596 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
600 define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
601 ; SSE2-LABEL: test_buildvector_v16i8_register_zero:
603 ; SSE2-NEXT: movzbl %sil, %eax
604 ; SSE2-NEXT: movzbl %dil, %esi
605 ; SSE2-NEXT: movd %esi, %xmm0
606 ; SSE2-NEXT: pinsrw $2, %eax, %xmm0
607 ; SSE2-NEXT: movzbl %dl, %eax
608 ; SSE2-NEXT: pinsrw $3, %eax, %xmm0
609 ; SSE2-NEXT: movzbl %cl, %eax
610 ; SSE2-NEXT: pinsrw $4, %eax, %xmm0
611 ; SSE2-NEXT: shll $8, %r8d
612 ; SSE2-NEXT: pinsrw $5, %r8d, %xmm0
613 ; SSE2-NEXT: movzbl %r9b, %eax
614 ; SSE2-NEXT: pinsrw $6, %eax, %xmm0
615 ; SSE2-NEXT: movl {{[0-9]+}}(%rsp), %eax
616 ; SSE2-NEXT: shll $8, %eax
617 ; SSE2-NEXT: pinsrw $7, %eax, %xmm0
620 ; SSE41-LABEL: test_buildvector_v16i8_register_zero:
622 ; SSE41-NEXT: movzbl %dil, %eax
623 ; SSE41-NEXT: movd %eax, %xmm0
624 ; SSE41-NEXT: pinsrb $4, %esi, %xmm0
625 ; SSE41-NEXT: pinsrb $6, %edx, %xmm0
626 ; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
627 ; SSE41-NEXT: pinsrb $11, %r8d, %xmm0
628 ; SSE41-NEXT: pinsrb $12, %r9d, %xmm0
629 ; SSE41-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
632 ; AVX-LABEL: test_buildvector_v16i8_register_zero:
634 ; AVX-NEXT: movzbl %dil, %eax
635 ; AVX-NEXT: vmovd %eax, %xmm0
636 ; AVX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
637 ; AVX-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
638 ; AVX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
639 ; AVX-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0
640 ; AVX-NEXT: vpinsrb $12, %r9d, %xmm0, %xmm0
641 ; AVX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
643 %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0
644 %ins1 = insertelement <16 x i8> %ins0, i8 0, i32 1
645 %ins2 = insertelement <16 x i8> %ins1, i8 0, i32 2
646 %ins3 = insertelement <16 x i8> %ins2, i8 0, i32 3
647 %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4
648 %ins5 = insertelement <16 x i8> %ins4, i8 0, i32 5
649 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
650 %ins7 = insertelement <16 x i8> %ins6, i8 0, i32 7
651 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
652 %ins9 = insertelement <16 x i8> %ins8, i8 0, i32 9
653 %ins10 = insertelement <16 x i8> %ins9, i8 0, i32 10
654 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
655 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
656 %ins13 = insertelement <16 x i8> %ins12, i8 0, i32 13
657 %ins14 = insertelement <16 x i8> %ins13, i8 0, i32 14
658 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
662 define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
663 ; SSE2-LABEL: test_buildvector_v16i8_register_zero_2:
665 ; SSE2-NEXT: shll $8, %esi
666 ; SSE2-NEXT: movzbl %dil, %eax
667 ; SSE2-NEXT: orl %esi, %eax
668 ; SSE2-NEXT: pxor %xmm0, %xmm0
669 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0
670 ; SSE2-NEXT: movzbl %dl, %eax
671 ; SSE2-NEXT: pinsrw $3, %eax, %xmm0
672 ; SSE2-NEXT: movzbl %cl, %eax
673 ; SSE2-NEXT: pinsrw $4, %eax, %xmm0
674 ; SSE2-NEXT: shll $8, %r8d
675 ; SSE2-NEXT: pinsrw $5, %r8d, %xmm0
676 ; SSE2-NEXT: movzbl %r9b, %eax
677 ; SSE2-NEXT: pinsrw $6, %eax, %xmm0
678 ; SSE2-NEXT: movl {{[0-9]+}}(%rsp), %eax
679 ; SSE2-NEXT: shll $8, %eax
680 ; SSE2-NEXT: pinsrw $7, %eax, %xmm0
683 ; SSE41-LABEL: test_buildvector_v16i8_register_zero_2:
685 ; SSE41-NEXT: pxor %xmm0, %xmm0
686 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0
687 ; SSE41-NEXT: pinsrb $3, %esi, %xmm0
688 ; SSE41-NEXT: pinsrb $6, %edx, %xmm0
689 ; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
690 ; SSE41-NEXT: pinsrb $11, %r8d, %xmm0
691 ; SSE41-NEXT: pinsrb $12, %r9d, %xmm0
692 ; SSE41-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
695 ; AVX-LABEL: test_buildvector_v16i8_register_zero_2:
697 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
698 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
699 ; AVX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
700 ; AVX-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
701 ; AVX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
702 ; AVX-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0
703 ; AVX-NEXT: vpinsrb $12, %r9d, %xmm0, %xmm0
704 ; AVX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
706 %ins0 = insertelement <16 x i8> undef, i8 0, i32 0
707 %ins1 = insertelement <16 x i8> %ins0, i8 0, i32 1
708 %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2
709 %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3
710 %ins4 = insertelement <16 x i8> %ins3, i8 0, i32 4
711 %ins5 = insertelement <16 x i8> %ins4, i8 0, i32 5
712 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
713 %ins7 = insertelement <16 x i8> %ins6, i8 0, i32 7
714 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
715 %ins9 = insertelement <16 x i8> %ins8, i8 0, i32 9
716 %ins10 = insertelement <16 x i8> %ins9, i8 0, i32 10
717 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
718 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
719 %ins13 = insertelement <16 x i8> %ins12, i8 0, i32 13
720 %ins14 = insertelement <16 x i8> %ins13, i8 0, i32 14
721 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
725 ; PR46461 - Don't let reduceBuildVecExtToExtBuildVec break splat(zero_extend) patterns,
726 ; resulting in the BUILD_VECTOR lowering to individual insertions into a zero vector.
728 define void @PR46461(i16 %x, <16 x i32>* %y) {
729 ; SSE-LABEL: PR46461:
731 ; SSE-NEXT: movzwl %di, %eax
732 ; SSE-NEXT: shrl %eax
733 ; SSE-NEXT: movd %eax, %xmm0
734 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
735 ; SSE-NEXT: movdqa %xmm0, 48(%rsi)
736 ; SSE-NEXT: movdqa %xmm0, 32(%rsi)
737 ; SSE-NEXT: movdqa %xmm0, 16(%rsi)
738 ; SSE-NEXT: movdqa %xmm0, (%rsi)
741 ; AVX1-LABEL: PR46461:
743 ; AVX1-NEXT: movzwl %di, %eax
744 ; AVX1-NEXT: shrl %eax
745 ; AVX1-NEXT: vmovd %eax, %xmm0
746 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
747 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
748 ; AVX1-NEXT: vmovaps %ymm0, 32(%rsi)
749 ; AVX1-NEXT: vmovaps %ymm0, (%rsi)
750 ; AVX1-NEXT: vzeroupper
753 ; AVX2-LABEL: PR46461:
755 ; AVX2-NEXT: movzwl %di, %eax
756 ; AVX2-NEXT: shrl %eax
757 ; AVX2-NEXT: vmovd %eax, %xmm0
758 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
759 ; AVX2-NEXT: vmovdqa %ymm0, 32(%rsi)
760 ; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
761 ; AVX2-NEXT: vzeroupper
764 %a = zext i16 %z to i32
765 %b = insertelement <16 x i32> undef, i32 %a, i32 0
766 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
767 store <16 x i32> %c, <16 x i32>* %y
772 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5688
773 define <4 x i32> @ossfuzz5688(i32 %a0) {
774 ; CHECK-LABEL: ossfuzz5688:
777 %1 = insertelement <4 x i32> zeroinitializer, i32 -2147483648, i32 %a0
778 %2 = extractelement <4 x i32> %1, i32 %a0
779 %3 = extractelement <4 x i32> <i32 30, i32 53, i32 42, i32 12>, i32 %2
780 %4 = extractelement <4 x i32> zeroinitializer, i32 %2
781 %5 = insertelement <4 x i32> undef, i32 %3, i32 undef
782 store i32 %4, i32* undef
786 ; If we do not define all bytes that are extracted, this is a miscompile.
788 define i32 @PR46586(i8* %p, <4 x i32> %v) {
789 ; SSE2-LABEL: PR46586:
791 ; SSE2-NEXT: movzbl 3(%rdi), %eax
792 ; SSE2-NEXT: pxor %xmm1, %xmm1
793 ; SSE2-NEXT: pinsrw $6, %eax, %xmm1
794 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
795 ; SSE2-NEXT: movd %xmm1, %eax
796 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
797 ; SSE2-NEXT: movd %xmm0, %ecx
798 ; SSE2-NEXT: xorl %edx, %edx
799 ; SSE2-NEXT: divl %ecx
800 ; SSE2-NEXT: movl %edx, %eax
803 ; SSE41-LABEL: PR46586:
805 ; SSE41-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
806 ; SSE41-NEXT: extractps $3, %xmm0, %ecx
807 ; SSE41-NEXT: pextrb $3, %xmm1, %eax
808 ; SSE41-NEXT: xorl %edx, %edx
809 ; SSE41-NEXT: divl %ecx
810 ; SSE41-NEXT: movl %edx, %eax
813 ; AVX-LABEL: PR46586:
815 ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
816 ; AVX-NEXT: vextractps $3, %xmm0, %ecx
817 ; AVX-NEXT: vpextrb $3, %xmm1, %eax
818 ; AVX-NEXT: xorl %edx, %edx
819 ; AVX-NEXT: divl %ecx
820 ; AVX-NEXT: movl %edx, %eax
822 %p0 = getelementptr inbounds i8, i8* %p, i64 0
823 %p3 = getelementptr inbounds i8, i8* %p, i64 3
824 %t25 = load i8, i8* %p0
825 %t28 = load i8, i8* %p3
826 %t29 = insertelement <4 x i8> undef, i8 %t25, i32 0
827 %t32 = insertelement <4 x i8> %t29, i8 %t28, i32 3
828 %t33 = zext <4 x i8> %t32 to <4 x i32>
829 %t34 = urem <4 x i32> %t33, %v
830 %t35 = extractelement <4 x i32> %t34, i32 3