1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx | FileCheck %s --check-prefixes=X86,X86-MMX
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSSE3
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512
11 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
17 define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
18 ; X86-LABEL: build_v2i32_01:
20 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
22 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
23 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
24 ; X86-NEXT: paddd %mm1, %mm1
25 ; X86-NEXT: movq %mm1, (%eax)
28 ; X64-LABEL: build_v2i32_01:
30 ; X64-NEXT: movd %edx, %mm0
31 ; X64-NEXT: movd %esi, %mm1
32 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
33 ; X64-NEXT: paddd %mm1, %mm1
34 ; X64-NEXT: movq %mm1, (%rdi)
36 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
37 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
38 %3 = bitcast <2 x i32> %2 to x86_mmx
39 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
40 store x86_mmx %4, x86_mmx *%p0
44 define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
45 ; X86-LABEL: build_v2i32_0z:
47 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
48 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
49 ; X86-NEXT: paddd %mm0, %mm0
50 ; X86-NEXT: movq %mm0, (%eax)
53 ; X64-LABEL: build_v2i32_0z:
55 ; X64-NEXT: movd %esi, %mm0
56 ; X64-NEXT: paddd %mm0, %mm0
57 ; X64-NEXT: movq %mm0, (%rdi)
59 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
60 %2 = insertelement <2 x i32> %1, i32 0, i32 1
61 %3 = bitcast <2 x i32> %2 to x86_mmx
62 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
63 store x86_mmx %4, x86_mmx *%p0
67 define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
68 ; X86-MMX-LABEL: build_v2i32_u1:
70 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
71 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
72 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
73 ; X86-MMX-NEXT: paddd %mm0, %mm0
74 ; X86-MMX-NEXT: movq %mm0, (%eax)
77 ; X86-SSE-LABEL: build_v2i32_u1:
79 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
80 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
81 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
82 ; X86-SSE-NEXT: paddd %mm0, %mm0
83 ; X86-SSE-NEXT: movq %mm0, (%eax)
86 ; X64-LABEL: build_v2i32_u1:
88 ; X64-NEXT: movd %edx, %mm0
89 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
90 ; X64-NEXT: paddd %mm0, %mm0
91 ; X64-NEXT: movq %mm0, (%rdi)
93 %1 = insertelement <2 x i32> undef, i32 undef, i32 0
94 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
95 %3 = bitcast <2 x i32> %2 to x86_mmx
96 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
97 store x86_mmx %4, x86_mmx *%p0
101 define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
102 ; X86-LABEL: build_v2i32_z1:
104 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
105 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
106 ; X86-NEXT: pxor %mm1, %mm1
107 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
108 ; X86-NEXT: paddd %mm1, %mm1
109 ; X86-NEXT: movq %mm1, (%eax)
112 ; X64-LABEL: build_v2i32_z1:
114 ; X64-NEXT: movd %edx, %mm0
115 ; X64-NEXT: pxor %mm1, %mm1
116 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
117 ; X64-NEXT: paddd %mm1, %mm1
118 ; X64-NEXT: movq %mm1, (%rdi)
120 %1 = insertelement <2 x i32> undef, i32 0, i32 0
121 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
122 %3 = bitcast <2 x i32> %2 to x86_mmx
123 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
124 store x86_mmx %4, x86_mmx *%p0
128 define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
129 ; X86-MMX-LABEL: build_v2i32_00:
131 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
132 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
133 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
134 ; X86-MMX-NEXT: paddd %mm0, %mm0
135 ; X86-MMX-NEXT: movq %mm0, (%eax)
138 ; X86-SSE-LABEL: build_v2i32_00:
140 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
141 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
142 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
143 ; X86-SSE-NEXT: paddd %mm0, %mm0
144 ; X86-SSE-NEXT: movq %mm0, (%eax)
147 ; X64-LABEL: build_v2i32_00:
149 ; X64-NEXT: movd %esi, %mm0
150 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
151 ; X64-NEXT: paddd %mm0, %mm0
152 ; X64-NEXT: movq %mm0, (%rdi)
154 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
155 %2 = insertelement <2 x i32> %1, i32 %a0, i32 1
156 %3 = bitcast <2 x i32> %2 to x86_mmx
157 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
158 store x86_mmx %4, x86_mmx *%p0
166 define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
167 ; X86-LABEL: build_v4i16_0123:
169 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
170 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
171 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
172 ; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
173 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
174 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
175 ; X86-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
176 ; X86-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
177 ; X86-NEXT: paddd %mm2, %mm2
178 ; X86-NEXT: movq %mm2, (%eax)
181 ; X64-LABEL: build_v4i16_0123:
183 ; X64-NEXT: movd %r8d, %mm0
184 ; X64-NEXT: movd %ecx, %mm1
185 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
186 ; X64-NEXT: movd %edx, %mm0
187 ; X64-NEXT: movd %esi, %mm2
188 ; X64-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
189 ; X64-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
190 ; X64-NEXT: paddd %mm2, %mm2
191 ; X64-NEXT: movq %mm2, (%rdi)
193 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
194 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
195 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2
196 %4 = insertelement <4 x i16> %3, i16 %a3, i32 3
197 %5 = bitcast <4 x i16> %4 to x86_mmx
198 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
199 store x86_mmx %6, x86_mmx *%p0
203 define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
204 ; X86-LABEL: build_v4i16_01zz:
206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
207 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
208 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
209 ; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
210 ; X86-NEXT: pxor %mm0, %mm0
211 ; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
212 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
213 ; X86-NEXT: paddd %mm1, %mm1
214 ; X86-NEXT: movq %mm1, (%eax)
217 ; X64-LABEL: build_v4i16_01zz:
219 ; X64-NEXT: movd %edx, %mm0
220 ; X64-NEXT: movd %esi, %mm1
221 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
222 ; X64-NEXT: pxor %mm0, %mm0
223 ; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
224 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
225 ; X64-NEXT: paddd %mm1, %mm1
226 ; X64-NEXT: movq %mm1, (%rdi)
228 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
229 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
230 %3 = insertelement <4 x i16> %2, i16 0, i32 2
231 %4 = insertelement <4 x i16> %3, i16 0, i32 3
232 %5 = bitcast <4 x i16> %4 to x86_mmx
233 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
234 store x86_mmx %6, x86_mmx *%p0
238 define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
239 ; X86-LABEL: build_v4i16_0uuz:
241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
242 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
243 ; X86-NEXT: paddd %mm0, %mm0
244 ; X86-NEXT: movq %mm0, (%eax)
247 ; X64-LABEL: build_v4i16_0uuz:
249 ; X64-NEXT: movd %esi, %mm0
250 ; X64-NEXT: paddd %mm0, %mm0
251 ; X64-NEXT: movq %mm0, (%rdi)
253 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
254 %2 = insertelement <4 x i16> %1, i16 undef, i32 1
255 %3 = insertelement <4 x i16> %2, i16 undef, i32 2
256 %4 = insertelement <4 x i16> %3, i16 0, i32 3
257 %5 = bitcast <4 x i16> %4 to x86_mmx
258 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
259 store x86_mmx %6, x86_mmx *%p0
263 define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
264 ; X86-LABEL: build_v4i16_0zuz:
266 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
267 ; X86-NEXT: movd %eax, %mm0
268 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
269 ; X86-NEXT: paddd %mm0, %mm0
270 ; X86-NEXT: movq %mm0, (%eax)
273 ; X64-LABEL: build_v4i16_0zuz:
275 ; X64-NEXT: movzwl %si, %eax
276 ; X64-NEXT: movd %eax, %mm0
277 ; X64-NEXT: paddd %mm0, %mm0
278 ; X64-NEXT: movq %mm0, (%rdi)
280 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
281 %2 = insertelement <4 x i16> %1, i16 0, i32 1
282 %3 = insertelement <4 x i16> %2, i16 undef, i32 2
283 %4 = insertelement <4 x i16> %3, i16 0, i32 3
284 %5 = bitcast <4 x i16> %4 to x86_mmx
285 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
286 store x86_mmx %6, x86_mmx *%p0
290 define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
291 ; X86-LABEL: build_v4i16_012u:
293 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
294 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
295 ; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
296 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
297 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
298 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
299 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
300 ; X86-NEXT: paddd %mm2, %mm2
301 ; X86-NEXT: movq %mm2, (%eax)
304 ; X64-LABEL: build_v4i16_012u:
306 ; X64-NEXT: movd %ecx, %mm0
307 ; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
308 ; X64-NEXT: movd %edx, %mm1
309 ; X64-NEXT: movd %esi, %mm2
310 ; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
311 ; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
312 ; X64-NEXT: paddd %mm2, %mm2
313 ; X64-NEXT: movq %mm2, (%rdi)
315 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
316 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
317 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2
318 %4 = insertelement <4 x i16> %3, i16 undef, i32 3
319 %5 = bitcast <4 x i16> %4 to x86_mmx
320 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
321 store x86_mmx %6, x86_mmx *%p0
325 define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
326 ; X86-MMX-LABEL: build_v4i16_0u00:
328 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
329 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
330 ; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
331 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
332 ; X86-MMX-NEXT: paddd %mm0, %mm0
333 ; X86-MMX-NEXT: movq %mm0, (%eax)
336 ; X86-SSE-LABEL: build_v4i16_0u00:
338 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
339 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
340 ; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
341 ; X86-SSE-NEXT: paddd %mm0, %mm0
342 ; X86-SSE-NEXT: movq %mm0, (%eax)
345 ; X64-LABEL: build_v4i16_0u00:
347 ; X64-NEXT: movd %esi, %mm0
348 ; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
349 ; X64-NEXT: paddd %mm0, %mm0
350 ; X64-NEXT: movq %mm0, (%rdi)
352 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
353 %2 = insertelement <4 x i16> %1, i16 undef, i32 1
354 %3 = insertelement <4 x i16> %2, i16 %a0, i32 2
355 %4 = insertelement <4 x i16> %3, i16 %a0, i32 3
356 %5 = bitcast <4 x i16> %4 to x86_mmx
357 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
358 store x86_mmx %6, x86_mmx *%p0
366 define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
367 ; X86-LABEL: build_v8i8_01234567:
369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
370 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
371 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
372 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
373 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
374 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
375 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
376 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
377 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
378 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
379 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
380 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
381 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm3
382 ; X86-NEXT: punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3]
383 ; X86-NEXT: punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1]
384 ; X86-NEXT: punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0]
385 ; X86-NEXT: paddd %mm3, %mm3
386 ; X86-NEXT: movq %mm3, (%eax)
389 ; X64-LABEL: build_v8i8_01234567:
391 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0
392 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm1
393 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
394 ; X64-NEXT: movd %r9d, %mm0
395 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2
396 ; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
397 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
398 ; X64-NEXT: movd %r8d, %mm1
399 ; X64-NEXT: movd %ecx, %mm2
400 ; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
401 ; X64-NEXT: movd %edx, %mm1
402 ; X64-NEXT: movd %esi, %mm3
403 ; X64-NEXT: punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3]
404 ; X64-NEXT: punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1]
405 ; X64-NEXT: punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0]
406 ; X64-NEXT: paddd %mm3, %mm3
407 ; X64-NEXT: movq %mm3, (%rdi)
409 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
410 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1
411 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
412 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
413 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4
414 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5
415 %7 = insertelement <8 x i8> %6, i8 %a6, i32 6
416 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7
417 %9 = bitcast <8 x i8> %8 to x86_mmx
418 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
419 store x86_mmx %10, x86_mmx *%p0
423 define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
424 ; X86-LABEL: build_v8i8_0u2345z7:
426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
427 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
428 ; X86-NEXT: pxor %mm1, %mm1
429 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
430 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
431 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
432 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
433 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
434 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
435 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
436 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
437 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
438 ; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
439 ; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
440 ; X86-NEXT: punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0]
441 ; X86-NEXT: paddd %mm0, %mm0
442 ; X86-NEXT: movq %mm0, (%eax)
445 ; X64-LABEL: build_v8i8_0u2345z7:
447 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0
448 ; X64-NEXT: pxor %mm1, %mm1
449 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
450 ; X64-NEXT: movd %r9d, %mm0
451 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2
452 ; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
453 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
454 ; X64-NEXT: movd %r8d, %mm1
455 ; X64-NEXT: movd %ecx, %mm2
456 ; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
457 ; X64-NEXT: movd %esi, %mm1
458 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
459 ; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1]
460 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
461 ; X64-NEXT: paddd %mm1, %mm1
462 ; X64-NEXT: movq %mm1, (%rdi)
464 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
465 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
466 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
467 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
468 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4
469 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5
470 %7 = insertelement <8 x i8> %6, i8 0, i32 6
471 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7
472 %9 = bitcast <8 x i8> %8 to x86_mmx
473 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
474 store x86_mmx %10, x86_mmx *%p0
478 define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
479 ; X86-LABEL: build_v8i8_0123zzzu:
481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
482 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
483 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
484 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
485 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
486 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
487 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
488 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
489 ; X86-NEXT: pxor %mm0, %mm0
490 ; X86-NEXT: pxor %mm1, %mm1
491 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
492 ; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
493 ; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
494 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
495 ; X86-NEXT: paddd %mm2, %mm2
496 ; X86-NEXT: movq %mm2, (%eax)
499 ; X64-LABEL: build_v8i8_0123zzzu:
501 ; X64-NEXT: movd %r8d, %mm0
502 ; X64-NEXT: movd %ecx, %mm1
503 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
504 ; X64-NEXT: movd %edx, %mm0
505 ; X64-NEXT: movd %esi, %mm2
506 ; X64-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
507 ; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
508 ; X64-NEXT: pxor %mm0, %mm0
509 ; X64-NEXT: pxor %mm1, %mm1
510 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
511 ; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
512 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
513 ; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
514 ; X64-NEXT: paddd %mm2, %mm2
515 ; X64-NEXT: movq %mm2, (%rdi)
517 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
518 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1
519 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
520 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
521 %5 = insertelement <8 x i8> %4, i8 0, i32 4
522 %6 = insertelement <8 x i8> %5, i8 0, i32 5
523 %7 = insertelement <8 x i8> %6, i8 0, i32 6
524 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
525 %9 = bitcast <8 x i8> %8 to x86_mmx
526 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
527 store x86_mmx %10, x86_mmx *%p0
531 define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
532 ; X86-LABEL: build_v8i8_0uuuuzzz:
534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
535 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
536 ; X86-NEXT: paddd %mm0, %mm0
537 ; X86-NEXT: movq %mm0, (%eax)
540 ; X64-LABEL: build_v8i8_0uuuuzzz:
542 ; X64-NEXT: movd %esi, %mm0
543 ; X64-NEXT: paddd %mm0, %mm0
544 ; X64-NEXT: movq %mm0, (%rdi)
546 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
547 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
548 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
549 %4 = insertelement <8 x i8> %3, i8 undef, i32 3
550 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
551 %6 = insertelement <8 x i8> %5, i8 0, i32 5
552 %7 = insertelement <8 x i8> %6, i8 0, i32 6
553 %8 = insertelement <8 x i8> %7, i8 0, i32 7
554 %9 = bitcast <8 x i8> %8 to x86_mmx
555 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
556 store x86_mmx %10, x86_mmx *%p0
560 define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
561 ; X86-LABEL: build_v8i8_0zzzzzzu:
563 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
564 ; X86-NEXT: movd %eax, %mm0
565 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
566 ; X86-NEXT: paddd %mm0, %mm0
567 ; X86-NEXT: movq %mm0, (%eax)
570 ; X64-LABEL: build_v8i8_0zzzzzzu:
572 ; X64-NEXT: movzbl %sil, %eax
573 ; X64-NEXT: movd %eax, %mm0
574 ; X64-NEXT: paddd %mm0, %mm0
575 ; X64-NEXT: movq %mm0, (%rdi)
577 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
578 %2 = insertelement <8 x i8> %1, i8 0, i32 1
579 %3 = insertelement <8 x i8> %2, i8 0, i32 2
580 %4 = insertelement <8 x i8> %3, i8 0, i32 3
581 %5 = insertelement <8 x i8> %4, i8 0, i32 4
582 %6 = insertelement <8 x i8> %5, i8 0, i32 5
583 %7 = insertelement <8 x i8> %6, i8 0, i32 6
584 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
585 %9 = bitcast <8 x i8> %8 to x86_mmx
586 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
587 store x86_mmx %10, x86_mmx *%p0
591 define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
592 ; X86-MMX-LABEL: build_v8i8_00000000:
594 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
595 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
596 ; X86-MMX-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
597 ; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
598 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
599 ; X86-MMX-NEXT: paddd %mm0, %mm0
600 ; X86-MMX-NEXT: movq %mm0, (%eax)
603 ; X86-SSE-LABEL: build_v8i8_00000000:
605 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
606 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
607 ; X86-SSE-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
608 ; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
609 ; X86-SSE-NEXT: paddd %mm0, %mm0
610 ; X86-SSE-NEXT: movq %mm0, (%eax)
613 ; X64-LABEL: build_v8i8_00000000:
615 ; X64-NEXT: movd %esi, %mm0
616 ; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
617 ; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
618 ; X64-NEXT: paddd %mm0, %mm0
619 ; X64-NEXT: movq %mm0, (%rdi)
621 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
622 %2 = insertelement <8 x i8> %1, i8 %a0, i32 1
623 %3 = insertelement <8 x i8> %2, i8 %a0, i32 2
624 %4 = insertelement <8 x i8> %3, i8 %a0, i32 3
625 %5 = insertelement <8 x i8> %4, i8 %a0, i32 4
626 %6 = insertelement <8 x i8> %5, i8 %a0, i32 5
627 %7 = insertelement <8 x i8> %6, i8 %a0, i32 6
628 %8 = insertelement <8 x i8> %7, i8 %a0, i32 7
629 %9 = bitcast <8 x i8> %8 to x86_mmx
630 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
631 store x86_mmx %10, x86_mmx *%p0
639 define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind {
640 ; X86-MMX-LABEL: build_v2f32_01:
642 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
643 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
644 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1
645 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
646 ; X86-MMX-NEXT: paddd %mm1, %mm1
647 ; X86-MMX-NEXT: movq %mm1, (%eax)
650 ; X86-SSE-LABEL: build_v2f32_01:
652 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
653 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
654 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0
655 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
656 ; X86-SSE-NEXT: movdq2q %xmm0, %mm1
657 ; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
658 ; X86-SSE-NEXT: paddd %mm1, %mm1
659 ; X86-SSE-NEXT: movq %mm1, (%eax)
662 ; X64-LABEL: build_v2f32_01:
664 ; X64-NEXT: movdq2q %xmm1, %mm0
665 ; X64-NEXT: movdq2q %xmm0, %mm1
666 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
667 ; X64-NEXT: paddd %mm1, %mm1
668 ; X64-NEXT: movq %mm1, (%rdi)
670 %1 = insertelement <2 x float> undef, float %a0, i32 0
671 %2 = insertelement <2 x float> %1, float %a1, i32 1
672 %3 = bitcast <2 x float> %2 to x86_mmx
673 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
674 store x86_mmx %4, x86_mmx *%p0
678 define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind {
679 ; X86-MMX-LABEL: build_v2f32_0z:
681 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
682 ; X86-MMX-NEXT: pxor %mm0, %mm0
683 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1
684 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
685 ; X86-MMX-NEXT: paddd %mm1, %mm1
686 ; X86-MMX-NEXT: movq %mm1, (%eax)
689 ; X86-SSE-LABEL: build_v2f32_0z:
691 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
692 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
693 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0
694 ; X86-SSE-NEXT: pxor %mm1, %mm1
695 ; X86-SSE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
696 ; X86-SSE-NEXT: paddd %mm0, %mm0
697 ; X86-SSE-NEXT: movq %mm0, (%eax)
700 ; X64-LABEL: build_v2f32_0z:
702 ; X64-NEXT: movdq2q %xmm0, %mm0
703 ; X64-NEXT: pxor %mm1, %mm1
704 ; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
705 ; X64-NEXT: paddd %mm0, %mm0
706 ; X64-NEXT: movq %mm0, (%rdi)
708 %1 = insertelement <2 x float> undef, float %a0, i32 0
709 %2 = insertelement <2 x float> %1, float 0.0, i32 1
710 %3 = bitcast <2 x float> %2 to x86_mmx
711 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
712 store x86_mmx %4, x86_mmx *%p0
716 define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind {
717 ; X86-MMX-LABEL: build_v2f32_u1:
719 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
720 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
721 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
722 ; X86-MMX-NEXT: paddd %mm0, %mm0
723 ; X86-MMX-NEXT: movq %mm0, (%eax)
726 ; X86-SSE-LABEL: build_v2f32_u1:
728 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
729 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
730 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0
731 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
732 ; X86-SSE-NEXT: paddd %mm0, %mm0
733 ; X86-SSE-NEXT: movq %mm0, (%eax)
736 ; X64-LABEL: build_v2f32_u1:
738 ; X64-NEXT: movdq2q %xmm1, %mm0
739 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
740 ; X64-NEXT: paddd %mm0, %mm0
741 ; X64-NEXT: movq %mm0, (%rdi)
743 %1 = insertelement <2 x float> undef, float undef, i32 0
744 %2 = insertelement <2 x float> %1, float %a1, i32 1
745 %3 = bitcast <2 x float> %2 to x86_mmx
746 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
747 store x86_mmx %4, x86_mmx *%p0
751 define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind {
752 ; X86-MMX-LABEL: build_v2f32_z1:
754 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
755 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
756 ; X86-MMX-NEXT: pxor %mm1, %mm1
757 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
758 ; X86-MMX-NEXT: paddd %mm1, %mm1
759 ; X86-MMX-NEXT: movq %mm1, (%eax)
762 ; X86-SSE-LABEL: build_v2f32_z1:
764 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
765 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
766 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0
767 ; X86-SSE-NEXT: pxor %mm1, %mm1
768 ; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
769 ; X86-SSE-NEXT: paddd %mm1, %mm1
770 ; X86-SSE-NEXT: movq %mm1, (%eax)
773 ; X64-LABEL: build_v2f32_z1:
775 ; X64-NEXT: movdq2q %xmm1, %mm0
776 ; X64-NEXT: pxor %mm1, %mm1
777 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
778 ; X64-NEXT: paddd %mm1, %mm1
779 ; X64-NEXT: movq %mm1, (%rdi)
781 %1 = insertelement <2 x float> undef, float 0.0, i32 0
782 %2 = insertelement <2 x float> %1, float %a1, i32 1
783 %3 = bitcast <2 x float> %2 to x86_mmx
784 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
785 store x86_mmx %4, x86_mmx *%p0
789 define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind {
790 ; X86-MMX-LABEL: build_v2f32_00:
792 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
793 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
794 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
795 ; X86-MMX-NEXT: paddd %mm0, %mm0
796 ; X86-MMX-NEXT: movq %mm0, (%eax)
799 ; X86-SSE-LABEL: build_v2f32_00:
801 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
802 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
803 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0
804 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
805 ; X86-SSE-NEXT: paddd %mm0, %mm0
806 ; X86-SSE-NEXT: movq %mm0, (%eax)
809 ; X64-LABEL: build_v2f32_00:
811 ; X64-NEXT: movdq2q %xmm0, %mm0
812 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
813 ; X64-NEXT: paddd %mm0, %mm0
814 ; X64-NEXT: movq %mm0, (%rdi)
816 %1 = insertelement <2 x float> undef, float %a0, i32 0
817 %2 = insertelement <2 x float> %1, float %a0, i32 1
818 %3 = bitcast <2 x float> %2 to x86_mmx
819 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
820 store x86_mmx %4, x86_mmx *%p0