1 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
2 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
6 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
8 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
12 %0 = bitcast <1 x i64> %b to <4 x i16>
13 %1 = bitcast <1 x i64> %a to <4 x i16>
14 %2 = bitcast <4 x i16> %1 to x86_mmx
15 %3 = bitcast <4 x i16> %0 to x86_mmx
16 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
17 %5 = bitcast x86_mmx %4 to <4 x i16>
18 %6 = bitcast <4 x i16> %5 to <1 x i64>
19 %7 = extractelement <1 x i64> %6, i32 0
23 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
25 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
29 %0 = bitcast <1 x i64> %b to <2 x i32>
30 %1 = bitcast <1 x i64> %a to <2 x i32>
31 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
32 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
33 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
34 %3 = bitcast x86_mmx %2 to <2 x i32>
35 %4 = bitcast <2 x i32> %3 to <1 x i64>
36 %5 = extractelement <1 x i64> %4, i32 0
40 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
42 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
46 %0 = bitcast <1 x i64> %b to <4 x i16>
47 %1 = bitcast <1 x i64> %a to <4 x i16>
48 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
49 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
50 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
51 %3 = bitcast x86_mmx %2 to <4 x i16>
52 %4 = bitcast <4 x i16> %3 to <1 x i64>
53 %5 = extractelement <1 x i64> %4, i32 0
57 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
59 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
63 %0 = bitcast <1 x i64> %b to <8 x i8>
64 %1 = bitcast <1 x i64> %a to <8 x i8>
65 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
66 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
67 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
68 %3 = bitcast x86_mmx %2 to <8 x i8>
69 %4 = bitcast <8 x i8> %3 to <1 x i64>
70 %5 = extractelement <1 x i64> %4, i32 0
74 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
76 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
80 %0 = bitcast <1 x i64> %b to <2 x i32>
81 %1 = bitcast <1 x i64> %a to <2 x i32>
82 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
83 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
84 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
85 %3 = bitcast x86_mmx %2 to <2 x i32>
86 %4 = bitcast <2 x i32> %3 to <1 x i64>
87 %5 = extractelement <1 x i64> %4, i32 0
91 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
93 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
97 %0 = bitcast <1 x i64> %b to <4 x i16>
98 %1 = bitcast <1 x i64> %a to <4 x i16>
99 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
100 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
101 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
102 %3 = bitcast x86_mmx %2 to <4 x i16>
103 %4 = bitcast <4 x i16> %3 to <1 x i64>
104 %5 = extractelement <1 x i64> %4, i32 0
108 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
110 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
114 %0 = bitcast <1 x i64> %b to <8 x i8>
115 %1 = bitcast <1 x i64> %a to <8 x i8>
116 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
117 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
118 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
119 %3 = bitcast x86_mmx %2 to <8 x i8>
120 %4 = bitcast <8 x i8> %3 to <1 x i64>
121 %5 = extractelement <1 x i64> %4, i32 0
125 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
127 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
129 ; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
130 ; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
132 %0 = bitcast <1 x i64> %b to <2 x i32>
133 %1 = bitcast <1 x i64> %a to <2 x i32>
134 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
135 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
136 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
137 %3 = bitcast x86_mmx %2 to <2 x i32>
138 %4 = bitcast <2 x i32> %3 to <1 x i64>
139 %5 = extractelement <1 x i64> %4, i32 0
143 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
145 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
147 ; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
148 ; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
150 %0 = bitcast <1 x i64> %b to <4 x i16>
151 %1 = bitcast <1 x i64> %a to <4 x i16>
152 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
153 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
154 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
155 %3 = bitcast x86_mmx %2 to <4 x i16>
156 %4 = bitcast <4 x i16> %3 to <1 x i64>
157 %5 = extractelement <1 x i64> %4, i32 0
161 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
163 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
165 ; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
166 ; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
168 %0 = bitcast <1 x i64> %b to <8 x i8>
169 %1 = bitcast <1 x i64> %a to <8 x i8>
170 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
171 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
172 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173 %3 = bitcast x86_mmx %2 to <8 x i8>
174 %4 = bitcast <8 x i8> %3 to <1 x i64>
175 %5 = extractelement <1 x i64> %4, i32 0
179 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
181 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
183 ; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
184 ; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
186 %0 = bitcast <1 x i64> %b to <2 x i32>
187 %1 = bitcast <1 x i64> %a to <2 x i32>
188 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
189 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
190 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
191 %3 = bitcast x86_mmx %2 to <2 x i32>
192 %4 = bitcast <2 x i32> %3 to <1 x i64>
193 %5 = extractelement <1 x i64> %4, i32 0
197 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
199 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
201 ; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
202 ; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
204 %0 = bitcast <1 x i64> %b to <4 x i16>
205 %1 = bitcast <1 x i64> %a to <4 x i16>
206 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
207 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
208 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
209 %3 = bitcast x86_mmx %2 to <4 x i16>
210 %4 = bitcast <4 x i16> %3 to <1 x i64>
211 %5 = extractelement <1 x i64> %4, i32 0
215 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
217 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
219 ; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
220 ; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
222 %0 = bitcast <1 x i64> %b to <8 x i8>
223 %1 = bitcast <1 x i64> %a to <8 x i8>
224 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
225 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
226 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
227 %3 = bitcast x86_mmx %2 to <8 x i8>
228 %4 = bitcast <8 x i8> %3 to <1 x i64>
229 %5 = extractelement <1 x i64> %4, i32 0
233 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
235 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
239 %0 = bitcast <1 x i64> %b to <4 x i16>
240 %1 = bitcast <1 x i64> %a to <4 x i16>
241 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
242 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
243 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
244 %3 = bitcast x86_mmx %2 to <8 x i8>
245 %4 = bitcast <8 x i8> %3 to <1 x i64>
246 %5 = extractelement <1 x i64> %4, i32 0
250 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
252 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
256 %0 = bitcast <1 x i64> %b to <2 x i32>
257 %1 = bitcast <1 x i64> %a to <2 x i32>
258 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
259 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
260 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
261 %3 = bitcast x86_mmx %2 to <4 x i16>
262 %4 = bitcast <4 x i16> %3 to <1 x i64>
263 %5 = extractelement <1 x i64> %4, i32 0
267 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
269 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
273 %0 = bitcast <1 x i64> %b to <4 x i16>
274 %1 = bitcast <1 x i64> %a to <4 x i16>
275 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
276 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
277 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
278 %3 = bitcast x86_mmx %2 to <8 x i8>
279 %4 = bitcast <8 x i8> %3 to <1 x i64>
280 %5 = extractelement <1 x i64> %4, i32 0
284 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
286 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
290 %0 = bitcast <1 x i64> %a to <2 x i32>
291 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
292 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
293 %2 = bitcast x86_mmx %1 to <2 x i32>
294 %3 = bitcast <2 x i32> %2 to <1 x i64>
295 %4 = extractelement <1 x i64> %3, i32 0
299 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
301 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
305 %0 = bitcast <1 x i64> %a to <4 x i16>
306 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
307 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
308 %2 = bitcast x86_mmx %1 to <4 x i16>
309 %3 = bitcast <4 x i16> %2 to <1 x i64>
310 %4 = extractelement <1 x i64> %3, i32 0
314 define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
315 ; ALL-LABEL: @test72_2
318 %0 = bitcast <1 x i64> %a to <4 x i16>
319 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
320 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind
321 %2 = bitcast x86_mmx %1 to <4 x i16>
322 %3 = bitcast <4 x i16> %2 to <1 x i64>
323 %4 = extractelement <1 x i64> %3, i32 0
327 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
329 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
333 %0 = extractelement <1 x i64> %a, i32 0
334 %mmx_var.i = bitcast i64 %0 to x86_mmx
335 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
336 %2 = bitcast x86_mmx %1 to i64
340 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
342 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
346 %0 = bitcast <1 x i64> %a to <2 x i32>
347 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
348 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
349 %2 = bitcast x86_mmx %1 to <2 x i32>
350 %3 = bitcast <2 x i32> %2 to <1 x i64>
351 %4 = extractelement <1 x i64> %3, i32 0
355 define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
356 ; ALL-LABEL: @test70_2
359 %0 = bitcast <1 x i64> %a to <2 x i32>
360 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
361 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind
362 %2 = bitcast x86_mmx %1 to <2 x i32>
363 %3 = bitcast <2 x i32> %2 to <1 x i64>
364 %4 = extractelement <1 x i64> %3, i32 0
368 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
370 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
374 %0 = bitcast <1 x i64> %a to <4 x i16>
375 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
376 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
377 %2 = bitcast x86_mmx %1 to <4 x i16>
378 %3 = bitcast <4 x i16> %2 to <1 x i64>
379 %4 = extractelement <1 x i64> %3, i32 0
383 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
385 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
389 %0 = extractelement <1 x i64> %a, i32 0
390 %mmx_var.i = bitcast i64 %0 to x86_mmx
391 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
392 %2 = bitcast x86_mmx %1 to i64
396 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
398 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
402 %0 = bitcast <1 x i64> %a to <2 x i32>
403 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
404 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
405 %2 = bitcast x86_mmx %1 to <2 x i32>
406 %3 = bitcast <2 x i32> %2 to <1 x i64>
407 %4 = extractelement <1 x i64> %3, i32 0
411 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
413 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
417 %0 = bitcast <1 x i64> %a to <4 x i16>
418 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
419 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
420 %2 = bitcast x86_mmx %1 to <4 x i16>
421 %3 = bitcast <4 x i16> %2 to <1 x i64>
422 %4 = extractelement <1 x i64> %3, i32 0
426 define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp {
427 ; ALL-LABEL: @test66_2
430 %0 = bitcast <1 x i64> %a to <4 x i16>
431 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
432 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind
433 %2 = bitcast x86_mmx %1 to <4 x i16>
434 %3 = bitcast <4 x i16> %2 to <1 x i64>
435 %4 = extractelement <1 x i64> %3, i32 0
439 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
441 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
445 %0 = bitcast <1 x i64> %a to <2 x i32>
446 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
447 %1 = extractelement <1 x i64> %b, i32 0
448 %mmx_var1.i = bitcast i64 %1 to x86_mmx
449 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
450 %3 = bitcast x86_mmx %2 to <2 x i32>
451 %4 = bitcast <2 x i32> %3 to <1 x i64>
452 %5 = extractelement <1 x i64> %4, i32 0
456 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
458 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
462 %0 = bitcast <1 x i64> %a to <4 x i16>
463 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
464 %1 = extractelement <1 x i64> %b, i32 0
465 %mmx_var1.i = bitcast i64 %1 to x86_mmx
466 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
467 %3 = bitcast x86_mmx %2 to <4 x i16>
468 %4 = bitcast <4 x i16> %3 to <1 x i64>
469 %5 = extractelement <1 x i64> %4, i32 0
473 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
475 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
479 %0 = extractelement <1 x i64> %a, i32 0
480 %mmx_var.i = bitcast i64 %0 to x86_mmx
481 %1 = extractelement <1 x i64> %b, i32 0
482 %mmx_var1.i = bitcast i64 %1 to x86_mmx
483 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
484 %3 = bitcast x86_mmx %2 to i64
488 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
490 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
494 %0 = bitcast <1 x i64> %a to <2 x i32>
495 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
496 %1 = extractelement <1 x i64> %b, i32 0
497 %mmx_var1.i = bitcast i64 %1 to x86_mmx
498 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
499 %3 = bitcast x86_mmx %2 to <2 x i32>
500 %4 = bitcast <2 x i32> %3 to <1 x i64>
501 %5 = extractelement <1 x i64> %4, i32 0
505 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
507 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
511 %0 = bitcast <1 x i64> %a to <4 x i16>
512 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
513 %1 = extractelement <1 x i64> %b, i32 0
514 %mmx_var1.i = bitcast i64 %1 to x86_mmx
515 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
516 %3 = bitcast x86_mmx %2 to <4 x i16>
517 %4 = bitcast <4 x i16> %3 to <1 x i64>
518 %5 = extractelement <1 x i64> %4, i32 0
522 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
524 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
528 %0 = extractelement <1 x i64> %a, i32 0
529 %mmx_var.i = bitcast i64 %0 to x86_mmx
530 %1 = extractelement <1 x i64> %b, i32 0
531 %mmx_var1.i = bitcast i64 %1 to x86_mmx
532 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
533 %3 = bitcast x86_mmx %2 to i64
537 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
539 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
543 %0 = bitcast <1 x i64> %a to <2 x i32>
544 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
545 %1 = extractelement <1 x i64> %b, i32 0
546 %mmx_var1.i = bitcast i64 %1 to x86_mmx
547 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
548 %3 = bitcast x86_mmx %2 to <2 x i32>
549 %4 = bitcast <2 x i32> %3 to <1 x i64>
550 %5 = extractelement <1 x i64> %4, i32 0
554 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
556 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
560 %0 = bitcast <1 x i64> %a to <4 x i16>
561 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
562 %1 = extractelement <1 x i64> %b, i32 0
563 %mmx_var1.i = bitcast i64 %1 to x86_mmx
564 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
565 %3 = bitcast x86_mmx %2 to <4 x i16>
566 %4 = bitcast <4 x i16> %3 to <1 x i64>
567 %5 = extractelement <1 x i64> %4, i32 0
571 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
573 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
577 %0 = bitcast <1 x i64> %b to <2 x i32>
578 %1 = bitcast <1 x i64> %a to <2 x i32>
579 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
580 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
581 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
582 %3 = bitcast x86_mmx %2 to <2 x i32>
583 %4 = bitcast <2 x i32> %3 to <1 x i64>
584 %5 = extractelement <1 x i64> %4, i32 0
588 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
590 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
594 %0 = bitcast <1 x i64> %b to <2 x i32>
595 %1 = bitcast <1 x i64> %a to <2 x i32>
596 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
597 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
598 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
599 %3 = bitcast x86_mmx %2 to <2 x i32>
600 %4 = bitcast <2 x i32> %3 to <1 x i64>
601 %5 = extractelement <1 x i64> %4, i32 0
605 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
607 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
611 %0 = bitcast <1 x i64> %b to <2 x i32>
612 %1 = bitcast <1 x i64> %a to <2 x i32>
613 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
614 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
615 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
616 %3 = bitcast x86_mmx %2 to <2 x i32>
617 %4 = bitcast <2 x i32> %3 to <1 x i64>
618 %5 = extractelement <1 x i64> %4, i32 0
622 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
624 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
628 %0 = bitcast <1 x i64> %b to <2 x i32>
629 %1 = bitcast <1 x i64> %a to <2 x i32>
630 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
631 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
632 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
633 %3 = bitcast x86_mmx %2 to <2 x i32>
634 %4 = bitcast <2 x i32> %3 to <1 x i64>
635 %5 = extractelement <1 x i64> %4, i32 0
639 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
641 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
645 %0 = bitcast <1 x i64> %b to <4 x i16>
646 %1 = bitcast <1 x i64> %a to <4 x i16>
647 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
648 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
649 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
650 %3 = bitcast x86_mmx %2 to <4 x i16>
651 %4 = bitcast <4 x i16> %3 to <1 x i64>
652 %5 = extractelement <1 x i64> %4, i32 0
656 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
660 %0 = bitcast <1 x i64> %b to <4 x i16>
661 %1 = bitcast <1 x i64> %a to <4 x i16>
662 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
663 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
664 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
665 %3 = bitcast x86_mmx %2 to <4 x i16>
666 %4 = bitcast <4 x i16> %3 to <1 x i64>
667 %5 = extractelement <1 x i64> %4, i32 0
671 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
673 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
677 %0 = bitcast <1 x i64> %b to <4 x i16>
678 %1 = bitcast <1 x i64> %a to <4 x i16>
679 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
680 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
681 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
682 %3 = bitcast x86_mmx %2 to <4 x i16>
683 %4 = bitcast <4 x i16> %3 to <1 x i64>
684 %5 = extractelement <1 x i64> %4, i32 0
688 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
690 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
694 %0 = bitcast <1 x i64> %b to <4 x i16>
695 %1 = bitcast <1 x i64> %a to <4 x i16>
696 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
697 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
698 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
699 %3 = bitcast x86_mmx %2 to <2 x i32>
700 %4 = bitcast <2 x i32> %3 to <1 x i64>
701 %5 = extractelement <1 x i64> %4, i32 0
705 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
707 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
711 %0 = bitcast <1 x i64> %b to <4 x i16>
712 %1 = bitcast <1 x i64> %a to <4 x i16>
713 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
714 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
715 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
716 %3 = bitcast x86_mmx %2 to <4 x i16>
717 %4 = bitcast <4 x i16> %3 to <1 x i64>
718 %5 = extractelement <1 x i64> %4, i32 0
722 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
724 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
728 %0 = bitcast <1 x i64> %b to <8 x i8>
729 %1 = bitcast <1 x i64> %a to <8 x i8>
730 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
731 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
732 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
733 %3 = bitcast x86_mmx %2 to <8 x i8>
734 %4 = bitcast <8 x i8> %3 to <1 x i64>
735 %5 = extractelement <1 x i64> %4, i32 0
739 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
741 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
745 %0 = bitcast <1 x i64> %b to <4 x i16>
746 %1 = bitcast <1 x i64> %a to <4 x i16>
747 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
748 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
749 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
750 %3 = bitcast x86_mmx %2 to <4 x i16>
751 %4 = bitcast <4 x i16> %3 to <1 x i64>
752 %5 = extractelement <1 x i64> %4, i32 0
756 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
758 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
762 %0 = bitcast <1 x i64> %b to <8 x i8>
763 %1 = bitcast <1 x i64> %a to <8 x i8>
764 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
765 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
766 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
767 %3 = bitcast x86_mmx %2 to <8 x i8>
768 %4 = bitcast <8 x i8> %3 to <1 x i64>
769 %5 = extractelement <1 x i64> %4, i32 0
773 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
777 %0 = extractelement <1 x i64> %a, i32 0
778 %mmx_var = bitcast i64 %0 to x86_mmx
779 %1 = extractelement <1 x i64> %b, i32 0
780 %mmx_var1 = bitcast i64 %1 to x86_mmx
781 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
782 %3 = bitcast x86_mmx %2 to i64
786 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
788 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
790 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
794 %0 = bitcast <1 x i64> %b to <2 x i32>
795 %1 = bitcast <1 x i64> %a to <2 x i32>
796 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
797 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
798 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
799 %3 = bitcast x86_mmx %2 to <2 x i32>
800 %4 = bitcast <2 x i32> %3 to <1 x i64>
801 %5 = extractelement <1 x i64> %4, i32 0
805 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
807 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
811 %0 = bitcast <1 x i64> %b to <4 x i16>
812 %1 = bitcast <1 x i64> %a to <4 x i16>
813 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
814 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
815 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
816 %3 = bitcast x86_mmx %2 to <4 x i16>
817 %4 = bitcast <4 x i16> %3 to <1 x i64>
818 %5 = extractelement <1 x i64> %4, i32 0
822 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
824 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
828 %0 = bitcast <1 x i64> %b to <8 x i8>
829 %1 = bitcast <1 x i64> %a to <8 x i8>
830 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
831 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
832 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
833 %3 = bitcast x86_mmx %2 to <8 x i8>
834 %4 = bitcast <8 x i8> %3 to <1 x i64>
835 %5 = extractelement <1 x i64> %4, i32 0
839 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
841 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
845 %0 = bitcast <1 x i64> %b to <4 x i16>
846 %1 = bitcast <1 x i64> %a to <4 x i16>
847 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
848 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
849 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
850 %3 = bitcast x86_mmx %2 to <4 x i16>
851 %4 = bitcast <4 x i16> %3 to <1 x i64>
852 %5 = extractelement <1 x i64> %4, i32 0
856 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
858 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
862 %0 = bitcast <1 x i64> %b to <8 x i8>
863 %1 = bitcast <1 x i64> %a to <8 x i8>
864 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
865 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
866 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
867 %3 = bitcast x86_mmx %2 to <8 x i8>
868 %4 = bitcast <8 x i8> %3 to <1 x i64>
869 %5 = extractelement <1 x i64> %4, i32 0
873 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
875 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
879 %0 = bitcast <1 x i64> %b to <4 x i16>
880 %1 = bitcast <1 x i64> %a to <4 x i16>
881 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
882 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
883 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
884 %3 = bitcast x86_mmx %2 to <4 x i16>
885 %4 = bitcast <4 x i16> %3 to <1 x i64>
886 %5 = extractelement <1 x i64> %4, i32 0
890 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
892 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
896 %0 = bitcast <1 x i64> %b to <8 x i8>
897 %1 = bitcast <1 x i64> %a to <8 x i8>
898 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
899 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
900 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
901 %3 = bitcast x86_mmx %2 to <8 x i8>
902 %4 = bitcast <8 x i8> %3 to <1 x i64>
903 %5 = extractelement <1 x i64> %4, i32 0
907 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
909 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
913 %0 = extractelement <1 x i64> %a, i32 0
914 %mmx_var = bitcast i64 %0 to x86_mmx
915 %1 = extractelement <1 x i64> %b, i32 0
916 %mmx_var1 = bitcast i64 %1 to x86_mmx
917 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
918 %3 = bitcast x86_mmx %2 to i64
922 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
924 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
928 %0 = bitcast <1 x i64> %b to <2 x i32>
929 %1 = bitcast <1 x i64> %a to <2 x i32>
930 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
931 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
932 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
933 %3 = bitcast x86_mmx %2 to <2 x i32>
934 %4 = bitcast <2 x i32> %3 to <1 x i64>
935 %5 = extractelement <1 x i64> %4, i32 0
939 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
941 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
945 %0 = bitcast <1 x i64> %b to <4 x i16>
946 %1 = bitcast <1 x i64> %a to <4 x i16>
947 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
948 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
949 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
950 %3 = bitcast x86_mmx %2 to <4 x i16>
951 %4 = bitcast <4 x i16> %3 to <1 x i64>
952 %5 = extractelement <1 x i64> %4, i32 0
956 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
958 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
962 %0 = bitcast <1 x i64> %b to <8 x i8>
963 %1 = bitcast <1 x i64> %a to <8 x i8>
964 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
965 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
966 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
967 %3 = bitcast x86_mmx %2 to <8 x i8>
968 %4 = bitcast <8 x i8> %3 to <1 x i64>
969 %5 = extractelement <1 x i64> %4, i32 0
973 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
975 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
979 %0 = bitcast <1 x i64> %b to <8 x i8>
980 %1 = bitcast <1 x i64> %a to <8 x i8>
981 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
982 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
983 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
984 %3 = bitcast x86_mmx %2 to i64
988 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
990 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
994 %0 = bitcast <1 x i64> %b to <4 x i16>
995 %1 = bitcast <1 x i64> %a to <4 x i16>
996 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
997 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
998 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
999 %3 = bitcast x86_mmx %2 to <4 x i16>
1000 %4 = bitcast <4 x i16> %3 to <1 x i64>
1001 %5 = extractelement <1 x i64> %4, i32 0
1005 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
1007 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1008 ; ALL-LABEL: @test30
1011 %0 = bitcast <1 x i64> %b to <8 x i8>
1012 %1 = bitcast <1 x i64> %a to <8 x i8>
1013 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1014 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1015 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1016 %3 = bitcast x86_mmx %2 to <8 x i8>
1017 %4 = bitcast <8 x i8> %3 to <1 x i64>
1018 %5 = extractelement <1 x i64> %4, i32 0
1022 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
1024 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1025 ; ALL-LABEL: @test29
1028 %0 = bitcast <1 x i64> %b to <4 x i16>
1029 %1 = bitcast <1 x i64> %a to <4 x i16>
1030 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1031 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1032 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1033 %3 = bitcast x86_mmx %2 to <4 x i16>
1034 %4 = bitcast <4 x i16> %3 to <1 x i64>
1035 %5 = extractelement <1 x i64> %4, i32 0
1039 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
1041 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1042 ; ALL-LABEL: @test28
1045 %0 = bitcast <1 x i64> %b to <8 x i8>
1046 %1 = bitcast <1 x i64> %a to <8 x i8>
1047 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1048 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1049 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1050 %3 = bitcast x86_mmx %2 to <8 x i8>
1051 %4 = bitcast <8 x i8> %3 to <1 x i64>
1052 %5 = extractelement <1 x i64> %4, i32 0
1056 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
1058 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1059 ; ALL-LABEL: @test27
1062 %0 = bitcast <1 x i64> %b to <4 x i16>
1063 %1 = bitcast <1 x i64> %a to <4 x i16>
1064 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1065 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1066 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1067 %3 = bitcast x86_mmx %2 to <4 x i16>
1068 %4 = bitcast <4 x i16> %3 to <1 x i64>
1069 %5 = extractelement <1 x i64> %4, i32 0
1073 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
1075 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1076 ; ALL-LABEL: @test26
1079 %0 = bitcast <1 x i64> %b to <8 x i8>
1080 %1 = bitcast <1 x i64> %a to <8 x i8>
1081 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1082 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1083 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1084 %3 = bitcast x86_mmx %2 to <8 x i8>
1085 %4 = bitcast <8 x i8> %3 to <1 x i64>
1086 %5 = extractelement <1 x i64> %4, i32 0
1090 declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind
1092 define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp {
1093 ; ALL-LABEL: @test25
1096 %0 = extractelement <1 x i64> %a, i32 0
1097 %mmx_var.i = bitcast i64 %0 to x86_mmx
1098 tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind
1102 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
1104 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
1105 ; ALL-LABEL: @test24
1108 %0 = bitcast <1 x i64> %a to <8 x i8>
1109 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
1110 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1114 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind
1116 define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp {
1117 ; ALL-LABEL: @test23
1120 %0 = bitcast <1 x i64> %n to <8 x i8>
1121 %1 = bitcast <1 x i64> %d to <8 x i8>
1122 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1123 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1124 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind
1128 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1130 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1131 ; ALL-LABEL: @test22
1134 %0 = bitcast <1 x i64> %b to <4 x i16>
1135 %1 = bitcast <1 x i64> %a to <4 x i16>
1136 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1137 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1138 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1139 %3 = bitcast x86_mmx %2 to <4 x i16>
1140 %4 = bitcast <4 x i16> %3 to <1 x i64>
1141 %5 = extractelement <1 x i64> %4, i32 0
1145 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1147 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1148 ; ALL-LABEL: @test21
1149 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1150 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1152 %0 = bitcast <1 x i64> %a to <4 x i16>
1153 %1 = bitcast <4 x i16> %0 to x86_mmx
1154 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1155 %3 = bitcast x86_mmx %2 to <4 x i16>
1156 %4 = bitcast <4 x i16> %3 to <1 x i64>
1157 %5 = extractelement <1 x i64> %4, i32 0
1161 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1162 ; ALL-LABEL: @test21_2
1163 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1164 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1167 %0 = bitcast <1 x i64> %a to <4 x i16>
1168 %1 = bitcast <4 x i16> %0 to x86_mmx
1169 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1170 %3 = bitcast x86_mmx %2 to <4 x i16>
1171 %4 = bitcast <4 x i16> %3 to <2 x i32>
1172 %5 = extractelement <2 x i32> %4, i32 0
1176 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1178 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1179 ; ALL-LABEL: @test20
1182 %0 = bitcast <1 x i64> %b to <2 x i32>
1183 %1 = bitcast <1 x i64> %a to <2 x i32>
1184 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1185 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1186 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1187 %3 = bitcast x86_mmx %2 to i64
1191 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1193 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1194 ; ALL-LABEL: @test19
1197 %0 = bitcast <1 x i64> %a to <2 x i32>
1198 %1 = bitcast <2 x i32> %0 to x86_mmx
1199 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1203 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1205 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1206 ; ALL-LABEL: @test18
1209 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1210 %1 = bitcast x86_mmx %0 to <2 x i32>
1211 %2 = bitcast <2 x i32> %1 to <1 x i64>
1212 %3 = extractelement <1 x i64> %2, i32 0
1216 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1218 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1219 ; ALL-LABEL: @test17
1222 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1223 %1 = bitcast x86_mmx %0 to <2 x i32>
1224 %2 = bitcast <2 x i32> %1 to <1 x i64>
1225 %3 = extractelement <1 x i64> %2, i32 0
1229 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1231 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1232 ; ALL-LABEL: @test16
1235 %0 = extractelement <1 x i64> %a, i32 0
1236 %mmx_var = bitcast i64 %0 to x86_mmx
1237 %1 = extractelement <1 x i64> %b, i32 0
1238 %mmx_var1 = bitcast i64 %1 to x86_mmx
1239 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1240 %3 = bitcast x86_mmx %2 to i64
1244 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1246 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1247 ; ALL-LABEL: @test15
1250 %0 = bitcast <1 x i64> %a to <2 x i32>
1251 %1 = bitcast <2 x i32> %0 to x86_mmx
1252 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1253 %3 = bitcast x86_mmx %2 to <2 x i32>
1254 %4 = bitcast <2 x i32> %3 to <1 x i64>
1255 %5 = extractelement <1 x i64> %4, i32 0
1259 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1261 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1262 ; ALL-LABEL: @test14
1265 %0 = bitcast <1 x i64> %a to <4 x i16>
1266 %1 = bitcast <4 x i16> %0 to x86_mmx
1267 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1268 %3 = bitcast x86_mmx %2 to <4 x i16>
1269 %4 = bitcast <4 x i16> %3 to <1 x i64>
1270 %5 = extractelement <1 x i64> %4, i32 0
1274 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1276 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1277 ; ALL-LABEL: @test13
1280 %0 = bitcast <1 x i64> %a to <8 x i8>
1281 %1 = bitcast <8 x i8> %0 to x86_mmx
1282 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1283 %3 = bitcast x86_mmx %2 to <8 x i8>
1284 %4 = bitcast <8 x i8> %3 to <1 x i64>
1285 %5 = extractelement <1 x i64> %4, i32 0
1289 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1291 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1292 ; ALL-LABEL: @test12
1295 %0 = bitcast <1 x i64> %b to <2 x i32>
1296 %1 = bitcast <1 x i64> %a to <2 x i32>
1297 %2 = bitcast <2 x i32> %1 to x86_mmx
1298 %3 = bitcast <2 x i32> %0 to x86_mmx
1299 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1300 %5 = bitcast x86_mmx %4 to <2 x i32>
1301 %6 = bitcast <2 x i32> %5 to <1 x i64>
1302 %7 = extractelement <1 x i64> %6, i32 0
1306 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1308 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1309 ; ALL-LABEL: @test11
1312 %0 = bitcast <1 x i64> %b to <4 x i16>
1313 %1 = bitcast <1 x i64> %a to <4 x i16>
1314 %2 = bitcast <4 x i16> %1 to x86_mmx
1315 %3 = bitcast <4 x i16> %0 to x86_mmx
1316 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1317 %5 = bitcast x86_mmx %4 to <4 x i16>
1318 %6 = bitcast <4 x i16> %5 to <1 x i64>
1319 %7 = extractelement <1 x i64> %6, i32 0
1323 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1325 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1326 ; ALL-LABEL: @test10
1329 %0 = bitcast <1 x i64> %b to <8 x i8>
1330 %1 = bitcast <1 x i64> %a to <8 x i8>
1331 %2 = bitcast <8 x i8> %1 to x86_mmx
1332 %3 = bitcast <8 x i8> %0 to x86_mmx
1333 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1334 %5 = bitcast x86_mmx %4 to <8 x i8>
1335 %6 = bitcast <8 x i8> %5 to <1 x i64>
1336 %7 = extractelement <1 x i64> %6, i32 0
1340 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1342 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1346 %0 = bitcast <1 x i64> %b to <8 x i8>
1347 %1 = bitcast <1 x i64> %a to <8 x i8>
1348 %2 = bitcast <8 x i8> %1 to x86_mmx
1349 %3 = bitcast <8 x i8> %0 to x86_mmx
1350 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1351 %5 = bitcast x86_mmx %4 to <8 x i8>
1352 %6 = bitcast <8 x i8> %5 to <1 x i64>
1353 %7 = extractelement <1 x i64> %6, i32 0
1357 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1359 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1363 %0 = bitcast <1 x i64> %b to <4 x i16>
1364 %1 = bitcast <1 x i64> %a to <4 x i16>
1365 %2 = bitcast <4 x i16> %1 to x86_mmx
1366 %3 = bitcast <4 x i16> %0 to x86_mmx
1367 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1368 %5 = bitcast x86_mmx %4 to <4 x i16>
1369 %6 = bitcast <4 x i16> %5 to <1 x i64>
1370 %7 = extractelement <1 x i64> %6, i32 0
1374 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1376 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1380 %0 = bitcast <1 x i64> %b to <8 x i8>
1381 %1 = bitcast <1 x i64> %a to <8 x i8>
1382 %2 = bitcast <8 x i8> %1 to x86_mmx
1383 %3 = bitcast <8 x i8> %0 to x86_mmx
1384 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1385 %5 = bitcast x86_mmx %4 to <8 x i8>
1386 %6 = bitcast <8 x i8> %5 to <1 x i64>
1387 %7 = extractelement <1 x i64> %6, i32 0
1391 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1393 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1397 %0 = bitcast <1 x i64> %b to <4 x i16>
1398 %1 = bitcast <1 x i64> %a to <4 x i16>
1399 %2 = bitcast <4 x i16> %1 to x86_mmx
1400 %3 = bitcast <4 x i16> %0 to x86_mmx
1401 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1402 %5 = bitcast x86_mmx %4 to <4 x i16>
1403 %6 = bitcast <4 x i16> %5 to <1 x i64>
1404 %7 = extractelement <1 x i64> %6, i32 0
1408 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1410 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1414 %0 = bitcast <1 x i64> %b to <2 x i32>
1415 %1 = bitcast <1 x i64> %a to <2 x i32>
1416 %2 = bitcast <2 x i32> %1 to x86_mmx
1417 %3 = bitcast <2 x i32> %0 to x86_mmx
1418 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1419 %5 = bitcast x86_mmx %4 to <2 x i32>
1420 %6 = bitcast <2 x i32> %5 to <1 x i64>
1421 %7 = extractelement <1 x i64> %6, i32 0
1425 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1427 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1431 %0 = bitcast <1 x i64> %b to <4 x i16>
1432 %1 = bitcast <1 x i64> %a to <4 x i16>
1433 %2 = bitcast <4 x i16> %1 to x86_mmx
1434 %3 = bitcast <4 x i16> %0 to x86_mmx
1435 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1436 %5 = bitcast x86_mmx %4 to <4 x i16>
1437 %6 = bitcast <4 x i16> %5 to <1 x i64>
1438 %7 = extractelement <1 x i64> %6, i32 0
1442 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1444 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1448 %0 = bitcast <1 x i64> %b to <4 x i16>
1449 %1 = bitcast <1 x i64> %a to <4 x i16>
1450 %2 = bitcast <4 x i16> %1 to x86_mmx
1451 %3 = bitcast <4 x i16> %0 to x86_mmx
1452 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1453 %5 = bitcast x86_mmx %4 to <4 x i16>
1454 %6 = bitcast <4 x i16> %5 to <1 x i64>
1455 %7 = extractelement <1 x i64> %6, i32 0
1459 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1461 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1465 %0 = bitcast <1 x i64> %b to <2 x i32>
1466 %1 = bitcast <1 x i64> %a to <2 x i32>
1467 %2 = bitcast <2 x i32> %1 to x86_mmx
1468 %3 = bitcast <2 x i32> %0 to x86_mmx
1469 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1470 %5 = bitcast x86_mmx %4 to <2 x i32>
1471 %6 = bitcast <2 x i32> %5 to <1 x i64>
1472 %7 = extractelement <1 x i64> %6, i32 0
1476 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
1477 ; ALL-LABEL: @test89
1479 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
1483 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
1486 define void @test90() {
1487 ; ALL-LABEL: @test90
1489 call void @llvm.x86.mmx.emms()
1493 declare void @llvm.x86.mmx.emms()
1495 define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind {
1496 ; X86-LABEL: test_mm_insert_pi16:
1497 ; X86: # %bb.0: # %entry
1498 ; X86-NEXT: pushl %ebp
1499 ; X86-NEXT: movl %esp, %ebp
1500 ; X86-NEXT: andl $-8, %esp
1501 ; X86-NEXT: subl $16, %esp
1502 ; X86-NEXT: movl 8(%ebp), %eax
1503 ; X86-NEXT: movl 12(%ebp), %ecx
1504 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1505 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1506 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1507 ; X86-NEXT: pinsrw $2, 16(%ebp), %mm0
1508 ; X86-NEXT: movq %mm0, (%esp)
1509 ; X86-NEXT: movl (%esp), %eax
1510 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1511 ; X86-NEXT: movl %ebp, %esp
1512 ; X86-NEXT: popl %ebp
1515 ; X64-LABEL: test_mm_insert_pi16:
1516 ; X64: # %bb.0: # %entry
1517 ; X64-NEXT: movq %rdi, %mm0
1518 ; X64-NEXT: pinsrw $2, %esi, %mm0
1519 ; X64-NEXT: movq %mm0, %rax
1522 %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1523 %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2)
1524 %2 = bitcast x86_mmx %1 to <1 x i64>
1528 declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg)
1530 define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind {
1531 ; X86-LABEL: test_mm_extract_pi16:
1532 ; X86: # %bb.0: # %entry
1533 ; X86-NEXT: pushl %ebp
1534 ; X86-NEXT: movl %esp, %ebp
1535 ; X86-NEXT: andl $-8, %esp
1536 ; X86-NEXT: subl $8, %esp
1537 ; X86-NEXT: movl 8(%ebp), %eax
1538 ; X86-NEXT: movl 12(%ebp), %ecx
1539 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1540 ; X86-NEXT: movl %eax, (%esp)
1541 ; X86-NEXT: movq (%esp), %mm0
1542 ; X86-NEXT: pextrw $2, %mm0, %eax
1543 ; X86-NEXT: movl %ebp, %esp
1544 ; X86-NEXT: popl %ebp
1547 ; X64-LABEL: test_mm_extract_pi16:
1548 ; X64: # %bb.0: # %entry
1549 ; X64-NEXT: movq %rdi, %mm0
1550 ; X64-NEXT: pextrw $2, %mm0, %eax
1553 %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1554 %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2)
1558 declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg)