1 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
2 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
6 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
8 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
12 %0 = bitcast <1 x i64> %b to <4 x i16>
13 %1 = bitcast <1 x i64> %a to <4 x i16>
14 %2 = bitcast <4 x i16> %1 to x86_mmx
15 %3 = bitcast <4 x i16> %0 to x86_mmx
16 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
17 %5 = bitcast x86_mmx %4 to <4 x i16>
18 %6 = bitcast <4 x i16> %5 to <1 x i64>
19 %7 = extractelement <1 x i64> %6, i32 0
23 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
25 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
29 %0 = bitcast <1 x i64> %b to <2 x i32>
30 %1 = bitcast <1 x i64> %a to <2 x i32>
31 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
32 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
33 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
34 %3 = bitcast x86_mmx %2 to <2 x i32>
35 %4 = bitcast <2 x i32> %3 to <1 x i64>
36 %5 = extractelement <1 x i64> %4, i32 0
40 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
42 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
46 %0 = bitcast <1 x i64> %b to <4 x i16>
47 %1 = bitcast <1 x i64> %a to <4 x i16>
48 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
49 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
50 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
51 %3 = bitcast x86_mmx %2 to <4 x i16>
52 %4 = bitcast <4 x i16> %3 to <1 x i64>
53 %5 = extractelement <1 x i64> %4, i32 0
57 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
59 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
63 %0 = bitcast <1 x i64> %b to <8 x i8>
64 %1 = bitcast <1 x i64> %a to <8 x i8>
65 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
66 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
67 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
68 %3 = bitcast x86_mmx %2 to <8 x i8>
69 %4 = bitcast <8 x i8> %3 to <1 x i64>
70 %5 = extractelement <1 x i64> %4, i32 0
74 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
76 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
80 %0 = bitcast <1 x i64> %b to <2 x i32>
81 %1 = bitcast <1 x i64> %a to <2 x i32>
82 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
83 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
84 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
85 %3 = bitcast x86_mmx %2 to <2 x i32>
86 %4 = bitcast <2 x i32> %3 to <1 x i64>
87 %5 = extractelement <1 x i64> %4, i32 0
91 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
93 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
97 %0 = bitcast <1 x i64> %b to <4 x i16>
98 %1 = bitcast <1 x i64> %a to <4 x i16>
99 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
100 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
101 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
102 %3 = bitcast x86_mmx %2 to <4 x i16>
103 %4 = bitcast <4 x i16> %3 to <1 x i64>
104 %5 = extractelement <1 x i64> %4, i32 0
108 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
110 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
114 %0 = bitcast <1 x i64> %b to <8 x i8>
115 %1 = bitcast <1 x i64> %a to <8 x i8>
116 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
117 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
118 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
119 %3 = bitcast x86_mmx %2 to <8 x i8>
120 %4 = bitcast <8 x i8> %3 to <1 x i64>
121 %5 = extractelement <1 x i64> %4, i32 0
125 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
127 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
129 ; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
130 ; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
132 %0 = bitcast <1 x i64> %b to <2 x i32>
133 %1 = bitcast <1 x i64> %a to <2 x i32>
134 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
135 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
136 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
137 %3 = bitcast x86_mmx %2 to <2 x i32>
138 %4 = bitcast <2 x i32> %3 to <1 x i64>
139 %5 = extractelement <1 x i64> %4, i32 0
143 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
145 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
147 ; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
148 ; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
150 %0 = bitcast <1 x i64> %b to <4 x i16>
151 %1 = bitcast <1 x i64> %a to <4 x i16>
152 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
153 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
154 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
155 %3 = bitcast x86_mmx %2 to <4 x i16>
156 %4 = bitcast <4 x i16> %3 to <1 x i64>
157 %5 = extractelement <1 x i64> %4, i32 0
161 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
163 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
165 ; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
166 ; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
168 %0 = bitcast <1 x i64> %b to <8 x i8>
169 %1 = bitcast <1 x i64> %a to <8 x i8>
170 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
171 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
172 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173 %3 = bitcast x86_mmx %2 to <8 x i8>
174 %4 = bitcast <8 x i8> %3 to <1 x i64>
175 %5 = extractelement <1 x i64> %4, i32 0
179 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
181 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
183 ; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
184 ; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
186 %0 = bitcast <1 x i64> %b to <2 x i32>
187 %1 = bitcast <1 x i64> %a to <2 x i32>
188 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
189 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
190 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
191 %3 = bitcast x86_mmx %2 to <2 x i32>
192 %4 = bitcast <2 x i32> %3 to <1 x i64>
193 %5 = extractelement <1 x i64> %4, i32 0
197 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
199 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
201 ; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
202 ; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
204 %0 = bitcast <1 x i64> %b to <4 x i16>
205 %1 = bitcast <1 x i64> %a to <4 x i16>
206 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
207 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
208 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
209 %3 = bitcast x86_mmx %2 to <4 x i16>
210 %4 = bitcast <4 x i16> %3 to <1 x i64>
211 %5 = extractelement <1 x i64> %4, i32 0
215 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
217 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
219 ; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
220 ; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
222 %0 = bitcast <1 x i64> %b to <8 x i8>
223 %1 = bitcast <1 x i64> %a to <8 x i8>
224 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
225 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
226 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
227 %3 = bitcast x86_mmx %2 to <8 x i8>
228 %4 = bitcast <8 x i8> %3 to <1 x i64>
229 %5 = extractelement <1 x i64> %4, i32 0
233 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
235 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
239 %0 = bitcast <1 x i64> %b to <4 x i16>
240 %1 = bitcast <1 x i64> %a to <4 x i16>
241 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
242 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
243 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
244 %3 = bitcast x86_mmx %2 to <8 x i8>
245 %4 = bitcast <8 x i8> %3 to <1 x i64>
246 %5 = extractelement <1 x i64> %4, i32 0
250 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
252 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
256 %0 = bitcast <1 x i64> %b to <2 x i32>
257 %1 = bitcast <1 x i64> %a to <2 x i32>
258 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
259 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
260 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
261 %3 = bitcast x86_mmx %2 to <4 x i16>
262 %4 = bitcast <4 x i16> %3 to <1 x i64>
263 %5 = extractelement <1 x i64> %4, i32 0
267 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
269 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
273 %0 = bitcast <1 x i64> %b to <4 x i16>
274 %1 = bitcast <1 x i64> %a to <4 x i16>
275 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
276 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
277 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
278 %3 = bitcast x86_mmx %2 to <8 x i8>
279 %4 = bitcast <8 x i8> %3 to <1 x i64>
280 %5 = extractelement <1 x i64> %4, i32 0
284 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
286 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
290 %0 = bitcast <1 x i64> %a to <2 x i32>
291 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
292 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
293 %2 = bitcast x86_mmx %1 to <2 x i32>
294 %3 = bitcast <2 x i32> %2 to <1 x i64>
295 %4 = extractelement <1 x i64> %3, i32 0
299 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
301 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
305 %0 = bitcast <1 x i64> %a to <4 x i16>
306 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
307 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
308 %2 = bitcast x86_mmx %1 to <4 x i16>
309 %3 = bitcast <4 x i16> %2 to <1 x i64>
310 %4 = extractelement <1 x i64> %3, i32 0
314 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
316 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
320 %0 = extractelement <1 x i64> %a, i32 0
321 %mmx_var.i = bitcast i64 %0 to x86_mmx
322 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
323 %2 = bitcast x86_mmx %1 to i64
327 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
329 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
333 %0 = bitcast <1 x i64> %a to <2 x i32>
334 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
335 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
336 %2 = bitcast x86_mmx %1 to <2 x i32>
337 %3 = bitcast <2 x i32> %2 to <1 x i64>
338 %4 = extractelement <1 x i64> %3, i32 0
342 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
344 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
348 %0 = bitcast <1 x i64> %a to <4 x i16>
349 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
350 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
351 %2 = bitcast x86_mmx %1 to <4 x i16>
352 %3 = bitcast <4 x i16> %2 to <1 x i64>
353 %4 = extractelement <1 x i64> %3, i32 0
357 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
359 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
363 %0 = extractelement <1 x i64> %a, i32 0
364 %mmx_var.i = bitcast i64 %0 to x86_mmx
365 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
366 %2 = bitcast x86_mmx %1 to i64
370 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
372 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
376 %0 = bitcast <1 x i64> %a to <2 x i32>
377 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
378 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
379 %2 = bitcast x86_mmx %1 to <2 x i32>
380 %3 = bitcast <2 x i32> %2 to <1 x i64>
381 %4 = extractelement <1 x i64> %3, i32 0
385 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
387 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
391 %0 = bitcast <1 x i64> %a to <4 x i16>
392 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
393 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
394 %2 = bitcast x86_mmx %1 to <4 x i16>
395 %3 = bitcast <4 x i16> %2 to <1 x i64>
396 %4 = extractelement <1 x i64> %3, i32 0
400 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
402 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
406 %0 = bitcast <1 x i64> %a to <2 x i32>
407 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
408 %1 = extractelement <1 x i64> %b, i32 0
409 %mmx_var1.i = bitcast i64 %1 to x86_mmx
410 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
411 %3 = bitcast x86_mmx %2 to <2 x i32>
412 %4 = bitcast <2 x i32> %3 to <1 x i64>
413 %5 = extractelement <1 x i64> %4, i32 0
417 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
419 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
423 %0 = bitcast <1 x i64> %a to <4 x i16>
424 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
425 %1 = extractelement <1 x i64> %b, i32 0
426 %mmx_var1.i = bitcast i64 %1 to x86_mmx
427 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
428 %3 = bitcast x86_mmx %2 to <4 x i16>
429 %4 = bitcast <4 x i16> %3 to <1 x i64>
430 %5 = extractelement <1 x i64> %4, i32 0
434 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
436 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
440 %0 = extractelement <1 x i64> %a, i32 0
441 %mmx_var.i = bitcast i64 %0 to x86_mmx
442 %1 = extractelement <1 x i64> %b, i32 0
443 %mmx_var1.i = bitcast i64 %1 to x86_mmx
444 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
445 %3 = bitcast x86_mmx %2 to i64
449 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
451 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
455 %0 = bitcast <1 x i64> %a to <2 x i32>
456 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
457 %1 = extractelement <1 x i64> %b, i32 0
458 %mmx_var1.i = bitcast i64 %1 to x86_mmx
459 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
460 %3 = bitcast x86_mmx %2 to <2 x i32>
461 %4 = bitcast <2 x i32> %3 to <1 x i64>
462 %5 = extractelement <1 x i64> %4, i32 0
466 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
468 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
472 %0 = bitcast <1 x i64> %a to <4 x i16>
473 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
474 %1 = extractelement <1 x i64> %b, i32 0
475 %mmx_var1.i = bitcast i64 %1 to x86_mmx
476 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
477 %3 = bitcast x86_mmx %2 to <4 x i16>
478 %4 = bitcast <4 x i16> %3 to <1 x i64>
479 %5 = extractelement <1 x i64> %4, i32 0
483 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
485 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
489 %0 = extractelement <1 x i64> %a, i32 0
490 %mmx_var.i = bitcast i64 %0 to x86_mmx
491 %1 = extractelement <1 x i64> %b, i32 0
492 %mmx_var1.i = bitcast i64 %1 to x86_mmx
493 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
494 %3 = bitcast x86_mmx %2 to i64
498 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
500 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
504 %0 = bitcast <1 x i64> %a to <2 x i32>
505 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
506 %1 = extractelement <1 x i64> %b, i32 0
507 %mmx_var1.i = bitcast i64 %1 to x86_mmx
508 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
509 %3 = bitcast x86_mmx %2 to <2 x i32>
510 %4 = bitcast <2 x i32> %3 to <1 x i64>
511 %5 = extractelement <1 x i64> %4, i32 0
515 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
517 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
521 %0 = bitcast <1 x i64> %a to <4 x i16>
522 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
523 %1 = extractelement <1 x i64> %b, i32 0
524 %mmx_var1.i = bitcast i64 %1 to x86_mmx
525 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
526 %3 = bitcast x86_mmx %2 to <4 x i16>
527 %4 = bitcast <4 x i16> %3 to <1 x i64>
528 %5 = extractelement <1 x i64> %4, i32 0
532 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
534 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
538 %0 = bitcast <1 x i64> %b to <2 x i32>
539 %1 = bitcast <1 x i64> %a to <2 x i32>
540 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
541 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
542 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
543 %3 = bitcast x86_mmx %2 to <2 x i32>
544 %4 = bitcast <2 x i32> %3 to <1 x i64>
545 %5 = extractelement <1 x i64> %4, i32 0
549 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
551 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
555 %0 = bitcast <1 x i64> %b to <2 x i32>
556 %1 = bitcast <1 x i64> %a to <2 x i32>
557 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
558 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
559 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
560 %3 = bitcast x86_mmx %2 to <2 x i32>
561 %4 = bitcast <2 x i32> %3 to <1 x i64>
562 %5 = extractelement <1 x i64> %4, i32 0
566 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
568 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
572 %0 = bitcast <1 x i64> %b to <2 x i32>
573 %1 = bitcast <1 x i64> %a to <2 x i32>
574 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
575 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
576 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
577 %3 = bitcast x86_mmx %2 to <2 x i32>
578 %4 = bitcast <2 x i32> %3 to <1 x i64>
579 %5 = extractelement <1 x i64> %4, i32 0
583 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
585 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
589 %0 = bitcast <1 x i64> %b to <2 x i32>
590 %1 = bitcast <1 x i64> %a to <2 x i32>
591 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
592 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
593 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
594 %3 = bitcast x86_mmx %2 to <2 x i32>
595 %4 = bitcast <2 x i32> %3 to <1 x i64>
596 %5 = extractelement <1 x i64> %4, i32 0
600 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
602 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
606 %0 = bitcast <1 x i64> %b to <4 x i16>
607 %1 = bitcast <1 x i64> %a to <4 x i16>
608 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
609 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
610 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
611 %3 = bitcast x86_mmx %2 to <4 x i16>
612 %4 = bitcast <4 x i16> %3 to <1 x i64>
613 %5 = extractelement <1 x i64> %4, i32 0
617 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
621 %0 = bitcast <1 x i64> %b to <4 x i16>
622 %1 = bitcast <1 x i64> %a to <4 x i16>
623 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
624 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
625 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
626 %3 = bitcast x86_mmx %2 to <4 x i16>
627 %4 = bitcast <4 x i16> %3 to <1 x i64>
628 %5 = extractelement <1 x i64> %4, i32 0
632 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
634 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
638 %0 = bitcast <1 x i64> %b to <4 x i16>
639 %1 = bitcast <1 x i64> %a to <4 x i16>
640 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
641 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
642 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
643 %3 = bitcast x86_mmx %2 to <4 x i16>
644 %4 = bitcast <4 x i16> %3 to <1 x i64>
645 %5 = extractelement <1 x i64> %4, i32 0
649 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
651 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
655 %0 = bitcast <1 x i64> %b to <4 x i16>
656 %1 = bitcast <1 x i64> %a to <4 x i16>
657 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
658 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
659 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
660 %3 = bitcast x86_mmx %2 to <2 x i32>
661 %4 = bitcast <2 x i32> %3 to <1 x i64>
662 %5 = extractelement <1 x i64> %4, i32 0
666 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
668 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
672 %0 = bitcast <1 x i64> %b to <4 x i16>
673 %1 = bitcast <1 x i64> %a to <4 x i16>
674 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
675 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
676 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
677 %3 = bitcast x86_mmx %2 to <4 x i16>
678 %4 = bitcast <4 x i16> %3 to <1 x i64>
679 %5 = extractelement <1 x i64> %4, i32 0
683 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
685 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
689 %0 = bitcast <1 x i64> %b to <8 x i8>
690 %1 = bitcast <1 x i64> %a to <8 x i8>
691 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
692 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
693 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
694 %3 = bitcast x86_mmx %2 to <8 x i8>
695 %4 = bitcast <8 x i8> %3 to <1 x i64>
696 %5 = extractelement <1 x i64> %4, i32 0
700 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
702 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
706 %0 = bitcast <1 x i64> %b to <4 x i16>
707 %1 = bitcast <1 x i64> %a to <4 x i16>
708 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
709 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
710 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
711 %3 = bitcast x86_mmx %2 to <4 x i16>
712 %4 = bitcast <4 x i16> %3 to <1 x i64>
713 %5 = extractelement <1 x i64> %4, i32 0
717 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
719 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
723 %0 = bitcast <1 x i64> %b to <8 x i8>
724 %1 = bitcast <1 x i64> %a to <8 x i8>
725 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
726 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
727 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
728 %3 = bitcast x86_mmx %2 to <8 x i8>
729 %4 = bitcast <8 x i8> %3 to <1 x i64>
730 %5 = extractelement <1 x i64> %4, i32 0
734 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
738 %0 = extractelement <1 x i64> %a, i32 0
739 %mmx_var = bitcast i64 %0 to x86_mmx
740 %1 = extractelement <1 x i64> %b, i32 0
741 %mmx_var1 = bitcast i64 %1 to x86_mmx
742 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
743 %3 = bitcast x86_mmx %2 to i64
747 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
749 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
751 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
755 %0 = bitcast <1 x i64> %b to <2 x i32>
756 %1 = bitcast <1 x i64> %a to <2 x i32>
757 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
758 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
759 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
760 %3 = bitcast x86_mmx %2 to <2 x i32>
761 %4 = bitcast <2 x i32> %3 to <1 x i64>
762 %5 = extractelement <1 x i64> %4, i32 0
766 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
768 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
772 %0 = bitcast <1 x i64> %b to <4 x i16>
773 %1 = bitcast <1 x i64> %a to <4 x i16>
774 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
775 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
776 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
777 %3 = bitcast x86_mmx %2 to <4 x i16>
778 %4 = bitcast <4 x i16> %3 to <1 x i64>
779 %5 = extractelement <1 x i64> %4, i32 0
783 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
785 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
789 %0 = bitcast <1 x i64> %b to <8 x i8>
790 %1 = bitcast <1 x i64> %a to <8 x i8>
791 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
792 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
793 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
794 %3 = bitcast x86_mmx %2 to <8 x i8>
795 %4 = bitcast <8 x i8> %3 to <1 x i64>
796 %5 = extractelement <1 x i64> %4, i32 0
800 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
802 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
806 %0 = bitcast <1 x i64> %b to <4 x i16>
807 %1 = bitcast <1 x i64> %a to <4 x i16>
808 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
809 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
810 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
811 %3 = bitcast x86_mmx %2 to <4 x i16>
812 %4 = bitcast <4 x i16> %3 to <1 x i64>
813 %5 = extractelement <1 x i64> %4, i32 0
817 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
819 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
823 %0 = bitcast <1 x i64> %b to <8 x i8>
824 %1 = bitcast <1 x i64> %a to <8 x i8>
825 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
826 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
827 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
828 %3 = bitcast x86_mmx %2 to <8 x i8>
829 %4 = bitcast <8 x i8> %3 to <1 x i64>
830 %5 = extractelement <1 x i64> %4, i32 0
834 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
836 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
840 %0 = bitcast <1 x i64> %b to <4 x i16>
841 %1 = bitcast <1 x i64> %a to <4 x i16>
842 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
843 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
844 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
845 %3 = bitcast x86_mmx %2 to <4 x i16>
846 %4 = bitcast <4 x i16> %3 to <1 x i64>
847 %5 = extractelement <1 x i64> %4, i32 0
851 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
853 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
857 %0 = bitcast <1 x i64> %b to <8 x i8>
858 %1 = bitcast <1 x i64> %a to <8 x i8>
859 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
860 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
861 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
862 %3 = bitcast x86_mmx %2 to <8 x i8>
863 %4 = bitcast <8 x i8> %3 to <1 x i64>
864 %5 = extractelement <1 x i64> %4, i32 0
868 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
870 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
874 %0 = extractelement <1 x i64> %a, i32 0
875 %mmx_var = bitcast i64 %0 to x86_mmx
876 %1 = extractelement <1 x i64> %b, i32 0
877 %mmx_var1 = bitcast i64 %1 to x86_mmx
878 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
879 %3 = bitcast x86_mmx %2 to i64
883 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
885 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
889 %0 = bitcast <1 x i64> %b to <2 x i32>
890 %1 = bitcast <1 x i64> %a to <2 x i32>
891 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
892 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
893 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
894 %3 = bitcast x86_mmx %2 to <2 x i32>
895 %4 = bitcast <2 x i32> %3 to <1 x i64>
896 %5 = extractelement <1 x i64> %4, i32 0
900 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
902 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
906 %0 = bitcast <1 x i64> %b to <4 x i16>
907 %1 = bitcast <1 x i64> %a to <4 x i16>
908 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
909 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
910 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
911 %3 = bitcast x86_mmx %2 to <4 x i16>
912 %4 = bitcast <4 x i16> %3 to <1 x i64>
913 %5 = extractelement <1 x i64> %4, i32 0
917 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
919 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
923 %0 = bitcast <1 x i64> %b to <8 x i8>
924 %1 = bitcast <1 x i64> %a to <8 x i8>
925 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
926 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
927 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
928 %3 = bitcast x86_mmx %2 to <8 x i8>
929 %4 = bitcast <8 x i8> %3 to <1 x i64>
930 %5 = extractelement <1 x i64> %4, i32 0
934 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
936 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
940 %0 = bitcast <1 x i64> %b to <8 x i8>
941 %1 = bitcast <1 x i64> %a to <8 x i8>
942 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
943 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
944 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
945 %3 = bitcast x86_mmx %2 to i64
949 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
951 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
955 %0 = bitcast <1 x i64> %b to <4 x i16>
956 %1 = bitcast <1 x i64> %a to <4 x i16>
957 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
958 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
959 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
960 %3 = bitcast x86_mmx %2 to <4 x i16>
961 %4 = bitcast <4 x i16> %3 to <1 x i64>
962 %5 = extractelement <1 x i64> %4, i32 0
966 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
968 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
972 %0 = bitcast <1 x i64> %b to <8 x i8>
973 %1 = bitcast <1 x i64> %a to <8 x i8>
974 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
975 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
976 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
977 %3 = bitcast x86_mmx %2 to <8 x i8>
978 %4 = bitcast <8 x i8> %3 to <1 x i64>
979 %5 = extractelement <1 x i64> %4, i32 0
983 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
985 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
989 %0 = bitcast <1 x i64> %b to <4 x i16>
990 %1 = bitcast <1 x i64> %a to <4 x i16>
991 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
992 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
993 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
994 %3 = bitcast x86_mmx %2 to <4 x i16>
995 %4 = bitcast <4 x i16> %3 to <1 x i64>
996 %5 = extractelement <1 x i64> %4, i32 0
1000 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
1002 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1003 ; ALL-LABEL: @test28
1006 %0 = bitcast <1 x i64> %b to <8 x i8>
1007 %1 = bitcast <1 x i64> %a to <8 x i8>
1008 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1009 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1010 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1011 %3 = bitcast x86_mmx %2 to <8 x i8>
1012 %4 = bitcast <8 x i8> %3 to <1 x i64>
1013 %5 = extractelement <1 x i64> %4, i32 0
1017 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
1019 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1020 ; ALL-LABEL: @test27
1023 %0 = bitcast <1 x i64> %b to <4 x i16>
1024 %1 = bitcast <1 x i64> %a to <4 x i16>
1025 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1026 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1027 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1028 %3 = bitcast x86_mmx %2 to <4 x i16>
1029 %4 = bitcast <4 x i16> %3 to <1 x i64>
1030 %5 = extractelement <1 x i64> %4, i32 0
1034 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
1036 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1037 ; ALL-LABEL: @test26
1040 %0 = bitcast <1 x i64> %b to <8 x i8>
1041 %1 = bitcast <1 x i64> %a to <8 x i8>
1042 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1043 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1044 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1045 %3 = bitcast x86_mmx %2 to <8 x i8>
1046 %4 = bitcast <8 x i8> %3 to <1 x i64>
1047 %5 = extractelement <1 x i64> %4, i32 0
1051 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
1053 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
1054 ; ALL-LABEL: @test25
1057 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
1058 %0 = extractelement <1 x i64> %a, i32 0
1059 %mmx_var.i = bitcast i64 %0 to x86_mmx
1060 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
1064 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
1066 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
1067 ; ALL-LABEL: @test24
1070 %0 = bitcast <1 x i64> %a to <8 x i8>
1071 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
1072 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1076 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1078 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1079 ; ALL-LABEL: @test23
1082 %0 = bitcast <1 x i64> %n to <8 x i8>
1083 %1 = bitcast <1 x i64> %d to <8 x i8>
1084 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1085 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1086 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1090 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1092 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1093 ; ALL-LABEL: @test22
1096 %0 = bitcast <1 x i64> %b to <4 x i16>
1097 %1 = bitcast <1 x i64> %a to <4 x i16>
1098 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1099 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1100 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1101 %3 = bitcast x86_mmx %2 to <4 x i16>
1102 %4 = bitcast <4 x i16> %3 to <1 x i64>
1103 %5 = extractelement <1 x i64> %4, i32 0
1107 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1109 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1110 ; ALL-LABEL: @test21
1111 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1112 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1114 %0 = bitcast <1 x i64> %a to <4 x i16>
1115 %1 = bitcast <4 x i16> %0 to x86_mmx
1116 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1117 %3 = bitcast x86_mmx %2 to <4 x i16>
1118 %4 = bitcast <4 x i16> %3 to <1 x i64>
1119 %5 = extractelement <1 x i64> %4, i32 0
1123 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1124 ; ALL-LABEL: @test21_2
1125 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1126 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1129 %0 = bitcast <1 x i64> %a to <4 x i16>
1130 %1 = bitcast <4 x i16> %0 to x86_mmx
1131 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1132 %3 = bitcast x86_mmx %2 to <4 x i16>
1133 %4 = bitcast <4 x i16> %3 to <2 x i32>
1134 %5 = extractelement <2 x i32> %4, i32 0
1138 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1140 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1141 ; ALL-LABEL: @test20
1144 %0 = bitcast <1 x i64> %b to <2 x i32>
1145 %1 = bitcast <1 x i64> %a to <2 x i32>
1146 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1147 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1148 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1149 %3 = bitcast x86_mmx %2 to i64
1153 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1155 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1156 ; ALL-LABEL: @test19
1159 %0 = bitcast <1 x i64> %a to <2 x i32>
1160 %1 = bitcast <2 x i32> %0 to x86_mmx
1161 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1165 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1167 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1168 ; ALL-LABEL: @test18
1171 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1172 %1 = bitcast x86_mmx %0 to <2 x i32>
1173 %2 = bitcast <2 x i32> %1 to <1 x i64>
1174 %3 = extractelement <1 x i64> %2, i32 0
1178 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1180 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1181 ; ALL-LABEL: @test17
1184 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1185 %1 = bitcast x86_mmx %0 to <2 x i32>
1186 %2 = bitcast <2 x i32> %1 to <1 x i64>
1187 %3 = extractelement <1 x i64> %2, i32 0
1191 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1193 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1194 ; ALL-LABEL: @test16
1197 %0 = extractelement <1 x i64> %a, i32 0
1198 %mmx_var = bitcast i64 %0 to x86_mmx
1199 %1 = extractelement <1 x i64> %b, i32 0
1200 %mmx_var1 = bitcast i64 %1 to x86_mmx
1201 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1202 %3 = bitcast x86_mmx %2 to i64
1206 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1208 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1209 ; ALL-LABEL: @test15
1212 %0 = bitcast <1 x i64> %a to <2 x i32>
1213 %1 = bitcast <2 x i32> %0 to x86_mmx
1214 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1215 %3 = bitcast x86_mmx %2 to <2 x i32>
1216 %4 = bitcast <2 x i32> %3 to <1 x i64>
1217 %5 = extractelement <1 x i64> %4, i32 0
1221 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1223 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1224 ; ALL-LABEL: @test14
1227 %0 = bitcast <1 x i64> %a to <4 x i16>
1228 %1 = bitcast <4 x i16> %0 to x86_mmx
1229 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1230 %3 = bitcast x86_mmx %2 to <4 x i16>
1231 %4 = bitcast <4 x i16> %3 to <1 x i64>
1232 %5 = extractelement <1 x i64> %4, i32 0
1236 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1238 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1239 ; ALL-LABEL: @test13
1242 %0 = bitcast <1 x i64> %a to <8 x i8>
1243 %1 = bitcast <8 x i8> %0 to x86_mmx
1244 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1245 %3 = bitcast x86_mmx %2 to <8 x i8>
1246 %4 = bitcast <8 x i8> %3 to <1 x i64>
1247 %5 = extractelement <1 x i64> %4, i32 0
1251 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1253 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1254 ; ALL-LABEL: @test12
1257 %0 = bitcast <1 x i64> %b to <2 x i32>
1258 %1 = bitcast <1 x i64> %a to <2 x i32>
1259 %2 = bitcast <2 x i32> %1 to x86_mmx
1260 %3 = bitcast <2 x i32> %0 to x86_mmx
1261 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1262 %5 = bitcast x86_mmx %4 to <2 x i32>
1263 %6 = bitcast <2 x i32> %5 to <1 x i64>
1264 %7 = extractelement <1 x i64> %6, i32 0
1268 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1270 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1271 ; ALL-LABEL: @test11
1274 %0 = bitcast <1 x i64> %b to <4 x i16>
1275 %1 = bitcast <1 x i64> %a to <4 x i16>
1276 %2 = bitcast <4 x i16> %1 to x86_mmx
1277 %3 = bitcast <4 x i16> %0 to x86_mmx
1278 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1279 %5 = bitcast x86_mmx %4 to <4 x i16>
1280 %6 = bitcast <4 x i16> %5 to <1 x i64>
1281 %7 = extractelement <1 x i64> %6, i32 0
1285 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1287 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1288 ; ALL-LABEL: @test10
1291 %0 = bitcast <1 x i64> %b to <8 x i8>
1292 %1 = bitcast <1 x i64> %a to <8 x i8>
1293 %2 = bitcast <8 x i8> %1 to x86_mmx
1294 %3 = bitcast <8 x i8> %0 to x86_mmx
1295 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1296 %5 = bitcast x86_mmx %4 to <8 x i8>
1297 %6 = bitcast <8 x i8> %5 to <1 x i64>
1298 %7 = extractelement <1 x i64> %6, i32 0
1302 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1304 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1308 %0 = bitcast <1 x i64> %b to <8 x i8>
1309 %1 = bitcast <1 x i64> %a to <8 x i8>
1310 %2 = bitcast <8 x i8> %1 to x86_mmx
1311 %3 = bitcast <8 x i8> %0 to x86_mmx
1312 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1313 %5 = bitcast x86_mmx %4 to <8 x i8>
1314 %6 = bitcast <8 x i8> %5 to <1 x i64>
1315 %7 = extractelement <1 x i64> %6, i32 0
1319 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1321 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1325 %0 = bitcast <1 x i64> %b to <4 x i16>
1326 %1 = bitcast <1 x i64> %a to <4 x i16>
1327 %2 = bitcast <4 x i16> %1 to x86_mmx
1328 %3 = bitcast <4 x i16> %0 to x86_mmx
1329 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1330 %5 = bitcast x86_mmx %4 to <4 x i16>
1331 %6 = bitcast <4 x i16> %5 to <1 x i64>
1332 %7 = extractelement <1 x i64> %6, i32 0
1336 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1338 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1342 %0 = bitcast <1 x i64> %b to <8 x i8>
1343 %1 = bitcast <1 x i64> %a to <8 x i8>
1344 %2 = bitcast <8 x i8> %1 to x86_mmx
1345 %3 = bitcast <8 x i8> %0 to x86_mmx
1346 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1347 %5 = bitcast x86_mmx %4 to <8 x i8>
1348 %6 = bitcast <8 x i8> %5 to <1 x i64>
1349 %7 = extractelement <1 x i64> %6, i32 0
1353 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1355 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1359 %0 = bitcast <1 x i64> %b to <4 x i16>
1360 %1 = bitcast <1 x i64> %a to <4 x i16>
1361 %2 = bitcast <4 x i16> %1 to x86_mmx
1362 %3 = bitcast <4 x i16> %0 to x86_mmx
1363 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1364 %5 = bitcast x86_mmx %4 to <4 x i16>
1365 %6 = bitcast <4 x i16> %5 to <1 x i64>
1366 %7 = extractelement <1 x i64> %6, i32 0
1370 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1372 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1376 %0 = bitcast <1 x i64> %b to <2 x i32>
1377 %1 = bitcast <1 x i64> %a to <2 x i32>
1378 %2 = bitcast <2 x i32> %1 to x86_mmx
1379 %3 = bitcast <2 x i32> %0 to x86_mmx
1380 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1381 %5 = bitcast x86_mmx %4 to <2 x i32>
1382 %6 = bitcast <2 x i32> %5 to <1 x i64>
1383 %7 = extractelement <1 x i64> %6, i32 0
1387 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1389 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1393 %0 = bitcast <1 x i64> %b to <4 x i16>
1394 %1 = bitcast <1 x i64> %a to <4 x i16>
1395 %2 = bitcast <4 x i16> %1 to x86_mmx
1396 %3 = bitcast <4 x i16> %0 to x86_mmx
1397 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1398 %5 = bitcast x86_mmx %4 to <4 x i16>
1399 %6 = bitcast <4 x i16> %5 to <1 x i64>
1400 %7 = extractelement <1 x i64> %6, i32 0
1404 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1406 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1410 %0 = bitcast <1 x i64> %b to <4 x i16>
1411 %1 = bitcast <1 x i64> %a to <4 x i16>
1412 %2 = bitcast <4 x i16> %1 to x86_mmx
1413 %3 = bitcast <4 x i16> %0 to x86_mmx
1414 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1415 %5 = bitcast x86_mmx %4 to <4 x i16>
1416 %6 = bitcast <4 x i16> %5 to <1 x i64>
1417 %7 = extractelement <1 x i64> %6, i32 0
1421 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1423 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1427 %0 = bitcast <1 x i64> %b to <2 x i32>
1428 %1 = bitcast <1 x i64> %a to <2 x i32>
1429 %2 = bitcast <2 x i32> %1 to x86_mmx
1430 %3 = bitcast <2 x i32> %0 to x86_mmx
1431 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1432 %5 = bitcast x86_mmx %4 to <2 x i32>
1433 %6 = bitcast <2 x i32> %5 to <1 x i64>
1434 %7 = extractelement <1 x i64> %6, i32 0
1438 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
1439 ; ALL-LABEL: @test89
1441 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
1445 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
1448 define void @test90() {
1449 ; ALL-LABEL: @test90
1451 call void @llvm.x86.mmx.emms()
1455 declare void @llvm.x86.mmx.emms()
1457 define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind {
1458 ; X86-LABEL: test_mm_insert_pi16:
1459 ; X86: # %bb.0: # %entry
1460 ; X86-NEXT: pushl %ebp
1461 ; X86-NEXT: movl %esp, %ebp
1462 ; X86-NEXT: andl $-8, %esp
1463 ; X86-NEXT: subl $16, %esp
1464 ; X86-NEXT: movl 8(%ebp), %eax
1465 ; X86-NEXT: movl 12(%ebp), %ecx
1466 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1467 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1468 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1469 ; X86-NEXT: pinsrw $2, 16(%ebp), %mm0
1470 ; X86-NEXT: movq %mm0, (%esp)
1471 ; X86-NEXT: movl (%esp), %eax
1472 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1473 ; X86-NEXT: movl %ebp, %esp
1474 ; X86-NEXT: popl %ebp
1477 ; X64-LABEL: test_mm_insert_pi16:
1478 ; X64: # %bb.0: # %entry
1479 ; X64-NEXT: movq %rdi, %mm0
1480 ; X64-NEXT: pinsrw $2, %esi, %mm0
1481 ; X64-NEXT: movq %mm0, %rax
1484 %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1485 %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2)
1486 %2 = bitcast x86_mmx %1 to <1 x i64>
1490 declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg)
1492 define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind {
1493 ; X86-LABEL: test_mm_extract_pi16:
1494 ; X86: # %bb.0: # %entry
1495 ; X86-NEXT: pushl %ebp
1496 ; X86-NEXT: movl %esp, %ebp
1497 ; X86-NEXT: andl $-8, %esp
1498 ; X86-NEXT: subl $8, %esp
1499 ; X86-NEXT: movl 8(%ebp), %eax
1500 ; X86-NEXT: movl 12(%ebp), %ecx
1501 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1502 ; X86-NEXT: movl %eax, (%esp)
1503 ; X86-NEXT: movq (%esp), %mm0
1504 ; X86-NEXT: pextrw $2, %mm0, %eax
1505 ; X86-NEXT: movl %ebp, %esp
1506 ; X86-NEXT: popl %ebp
1509 ; X64-LABEL: test_mm_extract_pi16:
1510 ; X64: # %bb.0: # %entry
1511 ; X64-NEXT: movq %rdi, %mm0
1512 ; X64-NEXT: pextrw $2, %mm0, %eax
1515 %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1516 %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2)
1520 declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg)