1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
3 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
5 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
8 %0 = bitcast <1 x i64> %b to <4 x i16>
9 %1 = bitcast <1 x i64> %a to <4 x i16>
10 %2 = bitcast <4 x i16> %1 to x86_mmx
11 %3 = bitcast <4 x i16> %0 to x86_mmx
12 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
13 %5 = bitcast x86_mmx %4 to <4 x i16>
14 %6 = bitcast <4 x i16> %5 to <1 x i64>
15 %7 = extractelement <1 x i64> %6, i32 0
19 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
21 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
24 %0 = bitcast <1 x i64> %b to <2 x i32>
25 %1 = bitcast <1 x i64> %a to <2 x i32>
26 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
27 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
28 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
29 %3 = bitcast x86_mmx %2 to <2 x i32>
30 %4 = bitcast <2 x i32> %3 to <1 x i64>
31 %5 = extractelement <1 x i64> %4, i32 0
35 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
37 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
40 %0 = bitcast <1 x i64> %b to <4 x i16>
41 %1 = bitcast <1 x i64> %a to <4 x i16>
42 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
43 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
44 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
45 %3 = bitcast x86_mmx %2 to <4 x i16>
46 %4 = bitcast <4 x i16> %3 to <1 x i64>
47 %5 = extractelement <1 x i64> %4, i32 0
51 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
53 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
56 %0 = bitcast <1 x i64> %b to <8 x i8>
57 %1 = bitcast <1 x i64> %a to <8 x i8>
58 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
59 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
60 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
61 %3 = bitcast x86_mmx %2 to <8 x i8>
62 %4 = bitcast <8 x i8> %3 to <1 x i64>
63 %5 = extractelement <1 x i64> %4, i32 0
67 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
69 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
72 %0 = bitcast <1 x i64> %b to <2 x i32>
73 %1 = bitcast <1 x i64> %a to <2 x i32>
74 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
75 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
76 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
77 %3 = bitcast x86_mmx %2 to <2 x i32>
78 %4 = bitcast <2 x i32> %3 to <1 x i64>
79 %5 = extractelement <1 x i64> %4, i32 0
83 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
85 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
88 %0 = bitcast <1 x i64> %b to <4 x i16>
89 %1 = bitcast <1 x i64> %a to <4 x i16>
90 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
91 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
92 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
93 %3 = bitcast x86_mmx %2 to <4 x i16>
94 %4 = bitcast <4 x i16> %3 to <1 x i64>
95 %5 = extractelement <1 x i64> %4, i32 0
99 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
101 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
104 %0 = bitcast <1 x i64> %b to <8 x i8>
105 %1 = bitcast <1 x i64> %a to <8 x i8>
106 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
107 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
108 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
109 %3 = bitcast x86_mmx %2 to <8 x i8>
110 %4 = bitcast <8 x i8> %3 to <1 x i64>
111 %5 = extractelement <1 x i64> %4, i32 0
115 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
117 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
120 %0 = bitcast <1 x i64> %b to <2 x i32>
121 %1 = bitcast <1 x i64> %a to <2 x i32>
122 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
123 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
124 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
125 %3 = bitcast x86_mmx %2 to <2 x i32>
126 %4 = bitcast <2 x i32> %3 to <1 x i64>
127 %5 = extractelement <1 x i64> %4, i32 0
131 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
133 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
136 %0 = bitcast <1 x i64> %b to <4 x i16>
137 %1 = bitcast <1 x i64> %a to <4 x i16>
138 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
139 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
140 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
141 %3 = bitcast x86_mmx %2 to <4 x i16>
142 %4 = bitcast <4 x i16> %3 to <1 x i64>
143 %5 = extractelement <1 x i64> %4, i32 0
147 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
149 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
152 %0 = bitcast <1 x i64> %b to <8 x i8>
153 %1 = bitcast <1 x i64> %a to <8 x i8>
154 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
155 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
156 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
157 %3 = bitcast x86_mmx %2 to <8 x i8>
158 %4 = bitcast <8 x i8> %3 to <1 x i64>
159 %5 = extractelement <1 x i64> %4, i32 0
163 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
165 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
168 %0 = bitcast <1 x i64> %b to <2 x i32>
169 %1 = bitcast <1 x i64> %a to <2 x i32>
170 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
171 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
172 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173 %3 = bitcast x86_mmx %2 to <2 x i32>
174 %4 = bitcast <2 x i32> %3 to <1 x i64>
175 %5 = extractelement <1 x i64> %4, i32 0
179 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
181 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
184 %0 = bitcast <1 x i64> %b to <4 x i16>
185 %1 = bitcast <1 x i64> %a to <4 x i16>
186 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
187 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
188 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
189 %3 = bitcast x86_mmx %2 to <4 x i16>
190 %4 = bitcast <4 x i16> %3 to <1 x i64>
191 %5 = extractelement <1 x i64> %4, i32 0
195 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
197 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
200 %0 = bitcast <1 x i64> %b to <8 x i8>
201 %1 = bitcast <1 x i64> %a to <8 x i8>
202 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
203 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
204 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
205 %3 = bitcast x86_mmx %2 to <8 x i8>
206 %4 = bitcast <8 x i8> %3 to <1 x i64>
207 %5 = extractelement <1 x i64> %4, i32 0
211 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
213 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
216 %0 = bitcast <1 x i64> %b to <4 x i16>
217 %1 = bitcast <1 x i64> %a to <4 x i16>
218 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
219 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
220 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
221 %3 = bitcast x86_mmx %2 to <8 x i8>
222 %4 = bitcast <8 x i8> %3 to <1 x i64>
223 %5 = extractelement <1 x i64> %4, i32 0
227 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
229 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
232 %0 = bitcast <1 x i64> %b to <2 x i32>
233 %1 = bitcast <1 x i64> %a to <2 x i32>
234 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
235 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
236 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
237 %3 = bitcast x86_mmx %2 to <4 x i16>
238 %4 = bitcast <4 x i16> %3 to <1 x i64>
239 %5 = extractelement <1 x i64> %4, i32 0
243 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
245 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
248 %0 = bitcast <1 x i64> %b to <4 x i16>
249 %1 = bitcast <1 x i64> %a to <4 x i16>
250 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
251 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
252 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
253 %3 = bitcast x86_mmx %2 to <8 x i8>
254 %4 = bitcast <8 x i8> %3 to <1 x i64>
255 %5 = extractelement <1 x i64> %4, i32 0
259 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
261 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
264 %0 = bitcast <1 x i64> %a to <2 x i32>
265 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
266 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
267 %2 = bitcast x86_mmx %1 to <2 x i32>
268 %3 = bitcast <2 x i32> %2 to <1 x i64>
269 %4 = extractelement <1 x i64> %3, i32 0
273 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
275 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
278 %0 = bitcast <1 x i64> %a to <4 x i16>
279 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
280 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
281 %2 = bitcast x86_mmx %1 to <4 x i16>
282 %3 = bitcast <4 x i16> %2 to <1 x i64>
283 %4 = extractelement <1 x i64> %3, i32 0
287 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
289 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
292 %0 = extractelement <1 x i64> %a, i32 0
293 %mmx_var.i = bitcast i64 %0 to x86_mmx
294 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
295 %2 = bitcast x86_mmx %1 to i64
299 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
301 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
304 %0 = bitcast <1 x i64> %a to <2 x i32>
305 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
306 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
307 %2 = bitcast x86_mmx %1 to <2 x i32>
308 %3 = bitcast <2 x i32> %2 to <1 x i64>
309 %4 = extractelement <1 x i64> %3, i32 0
313 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
315 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
318 %0 = bitcast <1 x i64> %a to <4 x i16>
319 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
320 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
321 %2 = bitcast x86_mmx %1 to <4 x i16>
322 %3 = bitcast <4 x i16> %2 to <1 x i64>
323 %4 = extractelement <1 x i64> %3, i32 0
327 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
329 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
332 %0 = extractelement <1 x i64> %a, i32 0
333 %mmx_var.i = bitcast i64 %0 to x86_mmx
334 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
335 %2 = bitcast x86_mmx %1 to i64
339 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
341 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
344 %0 = bitcast <1 x i64> %a to <2 x i32>
345 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
346 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
347 %2 = bitcast x86_mmx %1 to <2 x i32>
348 %3 = bitcast <2 x i32> %2 to <1 x i64>
349 %4 = extractelement <1 x i64> %3, i32 0
353 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
355 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
358 %0 = bitcast <1 x i64> %a to <4 x i16>
359 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
360 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
361 %2 = bitcast x86_mmx %1 to <4 x i16>
362 %3 = bitcast <4 x i16> %2 to <1 x i64>
363 %4 = extractelement <1 x i64> %3, i32 0
367 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
369 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
372 %0 = bitcast <1 x i64> %a to <2 x i32>
373 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
374 %1 = extractelement <1 x i64> %b, i32 0
375 %mmx_var1.i = bitcast i64 %1 to x86_mmx
376 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
377 %3 = bitcast x86_mmx %2 to <2 x i32>
378 %4 = bitcast <2 x i32> %3 to <1 x i64>
379 %5 = extractelement <1 x i64> %4, i32 0
383 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
385 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
388 %0 = bitcast <1 x i64> %a to <4 x i16>
389 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
390 %1 = extractelement <1 x i64> %b, i32 0
391 %mmx_var1.i = bitcast i64 %1 to x86_mmx
392 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
393 %3 = bitcast x86_mmx %2 to <4 x i16>
394 %4 = bitcast <4 x i16> %3 to <1 x i64>
395 %5 = extractelement <1 x i64> %4, i32 0
399 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
401 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
404 %0 = extractelement <1 x i64> %a, i32 0
405 %mmx_var.i = bitcast i64 %0 to x86_mmx
406 %1 = extractelement <1 x i64> %b, i32 0
407 %mmx_var1.i = bitcast i64 %1 to x86_mmx
408 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
409 %3 = bitcast x86_mmx %2 to i64
413 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
415 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
418 %0 = bitcast <1 x i64> %a to <2 x i32>
419 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
420 %1 = extractelement <1 x i64> %b, i32 0
421 %mmx_var1.i = bitcast i64 %1 to x86_mmx
422 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
423 %3 = bitcast x86_mmx %2 to <2 x i32>
424 %4 = bitcast <2 x i32> %3 to <1 x i64>
425 %5 = extractelement <1 x i64> %4, i32 0
429 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
431 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
434 %0 = bitcast <1 x i64> %a to <4 x i16>
435 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
436 %1 = extractelement <1 x i64> %b, i32 0
437 %mmx_var1.i = bitcast i64 %1 to x86_mmx
438 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
439 %3 = bitcast x86_mmx %2 to <4 x i16>
440 %4 = bitcast <4 x i16> %3 to <1 x i64>
441 %5 = extractelement <1 x i64> %4, i32 0
445 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
447 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
450 %0 = extractelement <1 x i64> %a, i32 0
451 %mmx_var.i = bitcast i64 %0 to x86_mmx
452 %1 = extractelement <1 x i64> %b, i32 0
453 %mmx_var1.i = bitcast i64 %1 to x86_mmx
454 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
455 %3 = bitcast x86_mmx %2 to i64
459 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
461 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
464 %0 = bitcast <1 x i64> %a to <2 x i32>
465 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
466 %1 = extractelement <1 x i64> %b, i32 0
467 %mmx_var1.i = bitcast i64 %1 to x86_mmx
468 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
469 %3 = bitcast x86_mmx %2 to <2 x i32>
470 %4 = bitcast <2 x i32> %3 to <1 x i64>
471 %5 = extractelement <1 x i64> %4, i32 0
475 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
477 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
480 %0 = bitcast <1 x i64> %a to <4 x i16>
481 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
482 %1 = extractelement <1 x i64> %b, i32 0
483 %mmx_var1.i = bitcast i64 %1 to x86_mmx
484 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
485 %3 = bitcast x86_mmx %2 to <4 x i16>
486 %4 = bitcast <4 x i16> %3 to <1 x i64>
487 %5 = extractelement <1 x i64> %4, i32 0
491 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
493 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
496 %0 = bitcast <1 x i64> %b to <2 x i32>
497 %1 = bitcast <1 x i64> %a to <2 x i32>
498 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
499 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
500 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
501 %3 = bitcast x86_mmx %2 to <2 x i32>
502 %4 = bitcast <2 x i32> %3 to <1 x i64>
503 %5 = extractelement <1 x i64> %4, i32 0
507 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
509 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
512 %0 = bitcast <1 x i64> %b to <2 x i32>
513 %1 = bitcast <1 x i64> %a to <2 x i32>
514 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
515 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
516 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
517 %3 = bitcast x86_mmx %2 to <2 x i32>
518 %4 = bitcast <2 x i32> %3 to <1 x i64>
519 %5 = extractelement <1 x i64> %4, i32 0
523 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
525 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
528 %0 = bitcast <1 x i64> %b to <2 x i32>
529 %1 = bitcast <1 x i64> %a to <2 x i32>
530 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
531 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
532 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
533 %3 = bitcast x86_mmx %2 to <2 x i32>
534 %4 = bitcast <2 x i32> %3 to <1 x i64>
535 %5 = extractelement <1 x i64> %4, i32 0
539 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
541 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
544 %0 = bitcast <1 x i64> %b to <2 x i32>
545 %1 = bitcast <1 x i64> %a to <2 x i32>
546 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
547 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
548 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
549 %3 = bitcast x86_mmx %2 to <2 x i32>
550 %4 = bitcast <2 x i32> %3 to <1 x i64>
551 %5 = extractelement <1 x i64> %4, i32 0
555 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
557 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
560 %0 = bitcast <1 x i64> %b to <4 x i16>
561 %1 = bitcast <1 x i64> %a to <4 x i16>
562 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
563 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
564 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
565 %3 = bitcast x86_mmx %2 to <4 x i16>
566 %4 = bitcast <4 x i16> %3 to <1 x i64>
567 %5 = extractelement <1 x i64> %4, i32 0
571 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
574 %0 = bitcast <1 x i64> %b to <4 x i16>
575 %1 = bitcast <1 x i64> %a to <4 x i16>
576 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
577 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
578 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
579 %3 = bitcast x86_mmx %2 to <4 x i16>
580 %4 = bitcast <4 x i16> %3 to <1 x i64>
581 %5 = extractelement <1 x i64> %4, i32 0
585 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
587 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
590 %0 = bitcast <1 x i64> %b to <4 x i16>
591 %1 = bitcast <1 x i64> %a to <4 x i16>
592 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
593 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
594 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
595 %3 = bitcast x86_mmx %2 to <4 x i16>
596 %4 = bitcast <4 x i16> %3 to <1 x i64>
597 %5 = extractelement <1 x i64> %4, i32 0
601 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
603 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
606 %0 = bitcast <1 x i64> %b to <4 x i16>
607 %1 = bitcast <1 x i64> %a to <4 x i16>
608 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
609 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
610 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
611 %3 = bitcast x86_mmx %2 to <2 x i32>
612 %4 = bitcast <2 x i32> %3 to <1 x i64>
613 %5 = extractelement <1 x i64> %4, i32 0
617 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
619 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
622 %0 = bitcast <1 x i64> %b to <4 x i16>
623 %1 = bitcast <1 x i64> %a to <4 x i16>
624 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
625 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
626 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
627 %3 = bitcast x86_mmx %2 to <4 x i16>
628 %4 = bitcast <4 x i16> %3 to <1 x i64>
629 %5 = extractelement <1 x i64> %4, i32 0
633 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
635 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
638 %0 = bitcast <1 x i64> %b to <8 x i8>
639 %1 = bitcast <1 x i64> %a to <8 x i8>
640 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
641 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
642 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
643 %3 = bitcast x86_mmx %2 to <8 x i8>
644 %4 = bitcast <8 x i8> %3 to <1 x i64>
645 %5 = extractelement <1 x i64> %4, i32 0
649 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
651 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
654 %0 = bitcast <1 x i64> %b to <4 x i16>
655 %1 = bitcast <1 x i64> %a to <4 x i16>
656 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
657 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
658 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
659 %3 = bitcast x86_mmx %2 to <4 x i16>
660 %4 = bitcast <4 x i16> %3 to <1 x i64>
661 %5 = extractelement <1 x i64> %4, i32 0
665 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
667 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
670 %0 = bitcast <1 x i64> %b to <8 x i8>
671 %1 = bitcast <1 x i64> %a to <8 x i8>
672 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
673 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
674 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
675 %3 = bitcast x86_mmx %2 to <8 x i8>
676 %4 = bitcast <8 x i8> %3 to <1 x i64>
677 %5 = extractelement <1 x i64> %4, i32 0
681 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
684 %0 = extractelement <1 x i64> %a, i32 0
685 %mmx_var = bitcast i64 %0 to x86_mmx
686 %1 = extractelement <1 x i64> %b, i32 0
687 %mmx_var1 = bitcast i64 %1 to x86_mmx
688 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
689 %3 = bitcast x86_mmx %2 to i64
693 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
695 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
697 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
700 %0 = bitcast <1 x i64> %b to <2 x i32>
701 %1 = bitcast <1 x i64> %a to <2 x i32>
702 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
703 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
704 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
705 %3 = bitcast x86_mmx %2 to <2 x i32>
706 %4 = bitcast <2 x i32> %3 to <1 x i64>
707 %5 = extractelement <1 x i64> %4, i32 0
711 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
713 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
716 %0 = bitcast <1 x i64> %b to <4 x i16>
717 %1 = bitcast <1 x i64> %a to <4 x i16>
718 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
719 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
720 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
721 %3 = bitcast x86_mmx %2 to <4 x i16>
722 %4 = bitcast <4 x i16> %3 to <1 x i64>
723 %5 = extractelement <1 x i64> %4, i32 0
727 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
729 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
732 %0 = bitcast <1 x i64> %b to <8 x i8>
733 %1 = bitcast <1 x i64> %a to <8 x i8>
734 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
735 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
736 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
737 %3 = bitcast x86_mmx %2 to <8 x i8>
738 %4 = bitcast <8 x i8> %3 to <1 x i64>
739 %5 = extractelement <1 x i64> %4, i32 0
743 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
745 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
748 %0 = bitcast <1 x i64> %b to <4 x i16>
749 %1 = bitcast <1 x i64> %a to <4 x i16>
750 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
751 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
752 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
753 %3 = bitcast x86_mmx %2 to <4 x i16>
754 %4 = bitcast <4 x i16> %3 to <1 x i64>
755 %5 = extractelement <1 x i64> %4, i32 0
759 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
761 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
764 %0 = bitcast <1 x i64> %b to <8 x i8>
765 %1 = bitcast <1 x i64> %a to <8 x i8>
766 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
767 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
768 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
769 %3 = bitcast x86_mmx %2 to <8 x i8>
770 %4 = bitcast <8 x i8> %3 to <1 x i64>
771 %5 = extractelement <1 x i64> %4, i32 0
775 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
777 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
780 %0 = bitcast <1 x i64> %b to <4 x i16>
781 %1 = bitcast <1 x i64> %a to <4 x i16>
782 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
783 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
784 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
785 %3 = bitcast x86_mmx %2 to <4 x i16>
786 %4 = bitcast <4 x i16> %3 to <1 x i64>
787 %5 = extractelement <1 x i64> %4, i32 0
791 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
793 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
796 %0 = bitcast <1 x i64> %b to <8 x i8>
797 %1 = bitcast <1 x i64> %a to <8 x i8>
798 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
799 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
800 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
801 %3 = bitcast x86_mmx %2 to <8 x i8>
802 %4 = bitcast <8 x i8> %3 to <1 x i64>
803 %5 = extractelement <1 x i64> %4, i32 0
807 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
809 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
812 %0 = extractelement <1 x i64> %a, i32 0
813 %mmx_var = bitcast i64 %0 to x86_mmx
814 %1 = extractelement <1 x i64> %b, i32 0
815 %mmx_var1 = bitcast i64 %1 to x86_mmx
816 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
817 %3 = bitcast x86_mmx %2 to i64
821 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
823 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
826 %0 = bitcast <1 x i64> %b to <2 x i32>
827 %1 = bitcast <1 x i64> %a to <2 x i32>
828 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
829 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
830 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
831 %3 = bitcast x86_mmx %2 to <2 x i32>
832 %4 = bitcast <2 x i32> %3 to <1 x i64>
833 %5 = extractelement <1 x i64> %4, i32 0
837 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
839 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
842 %0 = bitcast <1 x i64> %b to <4 x i16>
843 %1 = bitcast <1 x i64> %a to <4 x i16>
844 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
845 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
846 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
847 %3 = bitcast x86_mmx %2 to <4 x i16>
848 %4 = bitcast <4 x i16> %3 to <1 x i64>
849 %5 = extractelement <1 x i64> %4, i32 0
853 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
855 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
858 %0 = bitcast <1 x i64> %b to <8 x i8>
859 %1 = bitcast <1 x i64> %a to <8 x i8>
860 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
861 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
862 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
863 %3 = bitcast x86_mmx %2 to <8 x i8>
864 %4 = bitcast <8 x i8> %3 to <1 x i64>
865 %5 = extractelement <1 x i64> %4, i32 0
869 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
871 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
874 %0 = bitcast <1 x i64> %b to <8 x i8>
875 %1 = bitcast <1 x i64> %a to <8 x i8>
876 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
877 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
878 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
879 %3 = bitcast x86_mmx %2 to i64
883 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
885 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
888 %0 = bitcast <1 x i64> %b to <4 x i16>
889 %1 = bitcast <1 x i64> %a to <4 x i16>
890 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
891 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
892 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
893 %3 = bitcast x86_mmx %2 to <4 x i16>
894 %4 = bitcast <4 x i16> %3 to <1 x i64>
895 %5 = extractelement <1 x i64> %4, i32 0
899 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
901 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
904 %0 = bitcast <1 x i64> %b to <8 x i8>
905 %1 = bitcast <1 x i64> %a to <8 x i8>
906 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
907 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
908 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
909 %3 = bitcast x86_mmx %2 to <8 x i8>
910 %4 = bitcast <8 x i8> %3 to <1 x i64>
911 %5 = extractelement <1 x i64> %4, i32 0
915 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
917 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
920 %0 = bitcast <1 x i64> %b to <4 x i16>
921 %1 = bitcast <1 x i64> %a to <4 x i16>
922 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
923 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
924 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
925 %3 = bitcast x86_mmx %2 to <4 x i16>
926 %4 = bitcast <4 x i16> %3 to <1 x i64>
927 %5 = extractelement <1 x i64> %4, i32 0
931 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
933 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
936 %0 = bitcast <1 x i64> %b to <8 x i8>
937 %1 = bitcast <1 x i64> %a to <8 x i8>
938 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
939 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
940 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
941 %3 = bitcast x86_mmx %2 to <8 x i8>
942 %4 = bitcast <8 x i8> %3 to <1 x i64>
943 %5 = extractelement <1 x i64> %4, i32 0
947 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
949 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
952 %0 = bitcast <1 x i64> %b to <4 x i16>
953 %1 = bitcast <1 x i64> %a to <4 x i16>
954 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
955 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
956 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
957 %3 = bitcast x86_mmx %2 to <4 x i16>
958 %4 = bitcast <4 x i16> %3 to <1 x i64>
959 %5 = extractelement <1 x i64> %4, i32 0
963 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
965 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
968 %0 = bitcast <1 x i64> %b to <8 x i8>
969 %1 = bitcast <1 x i64> %a to <8 x i8>
970 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
971 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
972 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
973 %3 = bitcast x86_mmx %2 to <8 x i8>
974 %4 = bitcast <8 x i8> %3 to <1 x i64>
975 %5 = extractelement <1 x i64> %4, i32 0
979 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
981 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
984 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
985 %0 = extractelement <1 x i64> %a, i32 0
986 %mmx_var.i = bitcast i64 %0 to x86_mmx
987 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
991 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
993 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
996 %0 = bitcast <1 x i64> %a to <8 x i8>
997 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
998 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1002 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1004 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1007 %0 = bitcast <1 x i64> %n to <8 x i8>
1008 %1 = bitcast <1 x i64> %d to <8 x i8>
1009 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1010 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1011 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1015 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1017 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1020 %0 = bitcast <1 x i64> %b to <4 x i16>
1021 %1 = bitcast <1 x i64> %a to <4 x i16>
1022 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1023 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1024 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1025 %3 = bitcast x86_mmx %2 to <4 x i16>
1026 %4 = bitcast <4 x i16> %3 to <1 x i64>
1027 %5 = extractelement <1 x i64> %4, i32 0
1031 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1033 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1036 %0 = bitcast <1 x i64> %a to <4 x i16>
1037 %1 = bitcast <4 x i16> %0 to x86_mmx
1038 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1039 %3 = bitcast x86_mmx %2 to <4 x i16>
1040 %4 = bitcast <4 x i16> %3 to <1 x i64>
1041 %5 = extractelement <1 x i64> %4, i32 0
1045 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1047 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1050 %0 = bitcast <1 x i64> %b to <2 x i32>
1051 %1 = bitcast <1 x i64> %a to <2 x i32>
1052 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1053 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1054 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1055 %3 = bitcast x86_mmx %2 to i64
1059 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1061 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1064 %0 = bitcast <1 x i64> %a to <2 x i32>
1065 %1 = bitcast <2 x i32> %0 to x86_mmx
1066 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1070 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1072 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1075 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1076 %1 = bitcast x86_mmx %0 to <2 x i32>
1077 %2 = bitcast <2 x i32> %1 to <1 x i64>
1078 %3 = extractelement <1 x i64> %2, i32 0
1082 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1084 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1087 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1088 %1 = bitcast x86_mmx %0 to <2 x i32>
1089 %2 = bitcast <2 x i32> %1 to <1 x i64>
1090 %3 = extractelement <1 x i64> %2, i32 0
1094 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1096 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1099 %0 = extractelement <1 x i64> %a, i32 0
1100 %mmx_var = bitcast i64 %0 to x86_mmx
1101 %1 = extractelement <1 x i64> %b, i32 0
1102 %mmx_var1 = bitcast i64 %1 to x86_mmx
1103 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1104 %3 = bitcast x86_mmx %2 to i64
1108 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1110 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1113 %0 = bitcast <1 x i64> %a to <2 x i32>
1114 %1 = bitcast <2 x i32> %0 to x86_mmx
1115 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1116 %3 = bitcast x86_mmx %2 to <2 x i32>
1117 %4 = bitcast <2 x i32> %3 to <1 x i64>
1118 %5 = extractelement <1 x i64> %4, i32 0
1122 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1124 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1127 %0 = bitcast <1 x i64> %a to <4 x i16>
1128 %1 = bitcast <4 x i16> %0 to x86_mmx
1129 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1130 %3 = bitcast x86_mmx %2 to <4 x i16>
1131 %4 = bitcast <4 x i16> %3 to <1 x i64>
1132 %5 = extractelement <1 x i64> %4, i32 0
1136 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1138 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1141 %0 = bitcast <1 x i64> %a to <8 x i8>
1142 %1 = bitcast <8 x i8> %0 to x86_mmx
1143 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1144 %3 = bitcast x86_mmx %2 to <8 x i8>
1145 %4 = bitcast <8 x i8> %3 to <1 x i64>
1146 %5 = extractelement <1 x i64> %4, i32 0
1150 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1152 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1155 %0 = bitcast <1 x i64> %b to <2 x i32>
1156 %1 = bitcast <1 x i64> %a to <2 x i32>
1157 %2 = bitcast <2 x i32> %1 to x86_mmx
1158 %3 = bitcast <2 x i32> %0 to x86_mmx
1159 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1160 %5 = bitcast x86_mmx %4 to <2 x i32>
1161 %6 = bitcast <2 x i32> %5 to <1 x i64>
1162 %7 = extractelement <1 x i64> %6, i32 0
1166 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1168 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1171 %0 = bitcast <1 x i64> %b to <4 x i16>
1172 %1 = bitcast <1 x i64> %a to <4 x i16>
1173 %2 = bitcast <4 x i16> %1 to x86_mmx
1174 %3 = bitcast <4 x i16> %0 to x86_mmx
1175 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1176 %5 = bitcast x86_mmx %4 to <4 x i16>
1177 %6 = bitcast <4 x i16> %5 to <1 x i64>
1178 %7 = extractelement <1 x i64> %6, i32 0
1182 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1184 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1187 %0 = bitcast <1 x i64> %b to <8 x i8>
1188 %1 = bitcast <1 x i64> %a to <8 x i8>
1189 %2 = bitcast <8 x i8> %1 to x86_mmx
1190 %3 = bitcast <8 x i8> %0 to x86_mmx
1191 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1192 %5 = bitcast x86_mmx %4 to <8 x i8>
1193 %6 = bitcast <8 x i8> %5 to <1 x i64>
1194 %7 = extractelement <1 x i64> %6, i32 0
1198 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1200 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1203 %0 = bitcast <1 x i64> %b to <8 x i8>
1204 %1 = bitcast <1 x i64> %a to <8 x i8>
1205 %2 = bitcast <8 x i8> %1 to x86_mmx
1206 %3 = bitcast <8 x i8> %0 to x86_mmx
1207 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1208 %5 = bitcast x86_mmx %4 to <8 x i8>
1209 %6 = bitcast <8 x i8> %5 to <1 x i64>
1210 %7 = extractelement <1 x i64> %6, i32 0
1214 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1216 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1219 %0 = bitcast <1 x i64> %b to <4 x i16>
1220 %1 = bitcast <1 x i64> %a to <4 x i16>
1221 %2 = bitcast <4 x i16> %1 to x86_mmx
1222 %3 = bitcast <4 x i16> %0 to x86_mmx
1223 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1224 %5 = bitcast x86_mmx %4 to <4 x i16>
1225 %6 = bitcast <4 x i16> %5 to <1 x i64>
1226 %7 = extractelement <1 x i64> %6, i32 0
1230 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1232 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1235 %0 = bitcast <1 x i64> %b to <8 x i8>
1236 %1 = bitcast <1 x i64> %a to <8 x i8>
1237 %2 = bitcast <8 x i8> %1 to x86_mmx
1238 %3 = bitcast <8 x i8> %0 to x86_mmx
1239 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1240 %5 = bitcast x86_mmx %4 to <8 x i8>
1241 %6 = bitcast <8 x i8> %5 to <1 x i64>
1242 %7 = extractelement <1 x i64> %6, i32 0
1246 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1248 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1251 %0 = bitcast <1 x i64> %b to <4 x i16>
1252 %1 = bitcast <1 x i64> %a to <4 x i16>
1253 %2 = bitcast <4 x i16> %1 to x86_mmx
1254 %3 = bitcast <4 x i16> %0 to x86_mmx
1255 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1256 %5 = bitcast x86_mmx %4 to <4 x i16>
1257 %6 = bitcast <4 x i16> %5 to <1 x i64>
1258 %7 = extractelement <1 x i64> %6, i32 0
1262 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1264 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1267 %0 = bitcast <1 x i64> %b to <2 x i32>
1268 %1 = bitcast <1 x i64> %a to <2 x i32>
1269 %2 = bitcast <2 x i32> %1 to x86_mmx
1270 %3 = bitcast <2 x i32> %0 to x86_mmx
1271 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1272 %5 = bitcast x86_mmx %4 to <2 x i32>
1273 %6 = bitcast <2 x i32> %5 to <1 x i64>
1274 %7 = extractelement <1 x i64> %6, i32 0
1278 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1280 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1283 %0 = bitcast <1 x i64> %b to <4 x i16>
1284 %1 = bitcast <1 x i64> %a to <4 x i16>
1285 %2 = bitcast <4 x i16> %1 to x86_mmx
1286 %3 = bitcast <4 x i16> %0 to x86_mmx
1287 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1288 %5 = bitcast x86_mmx %4 to <4 x i16>
1289 %6 = bitcast <4 x i16> %5 to <1 x i64>
1290 %7 = extractelement <1 x i64> %6, i32 0
1294 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1296 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1299 %0 = bitcast <1 x i64> %b to <4 x i16>
1300 %1 = bitcast <1 x i64> %a to <4 x i16>
1301 %2 = bitcast <4 x i16> %1 to x86_mmx
1302 %3 = bitcast <4 x i16> %0 to x86_mmx
1303 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1304 %5 = bitcast x86_mmx %4 to <4 x i16>
1305 %6 = bitcast <4 x i16> %5 to <1 x i64>
1306 %7 = extractelement <1 x i64> %6, i32 0
1310 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1312 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1315 %0 = bitcast <1 x i64> %b to <2 x i32>
1316 %1 = bitcast <1 x i64> %a to <2 x i32>
1317 %2 = bitcast <2 x i32> %1 to x86_mmx
1318 %3 = bitcast <2 x i32> %0 to x86_mmx
1319 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1320 %5 = bitcast x86_mmx %4 to <2 x i32>
1321 %6 = bitcast <2 x i32> %5 to <1 x i64>
1322 %7 = extractelement <1 x i64> %6, i32 0