1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
4 define <1 x i64> @stack_fold_cvtpd2pi(<2 x double> %a0) {
5 ; CHECK-LABEL: stack_fold_cvtpd2pi:
7 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
11 ; CHECK-NEXT: cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
12 ; CHECK-NEXT: movq %mm0, %rax
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
18 declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
20 define <2 x double> @stack_fold_cvtpi2pd(<1 x i64> %a0) {
21 ; CHECK-LABEL: stack_fold_cvtpi2pd:
23 ; CHECK-NEXT: movq %rdi, %mm0
24 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
28 ; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
30 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
31 %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %a0) nounwind readnone
34 declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone
36 define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) {
37 ; CHECK-LABEL: stack_fold_cvtpi2ps:
39 ; CHECK-NEXT: movq %rdi, %mm0
40 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
44 ; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
46 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
47 %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) nounwind readnone
50 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone
52 define <1 x i64> @stack_fold_cvtps2pi(<4 x float> %a0) {
53 ; CHECK-LABEL: stack_fold_cvtps2pi:
55 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
59 ; CHECK-NEXT: cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
60 ; CHECK-NEXT: movq %mm0, %rax
62 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
63 %2 = call <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
66 declare <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
68 define <1 x i64> @stack_fold_cvttpd2pi(<2 x double> %a0) {
69 ; CHECK-LABEL: stack_fold_cvttpd2pi:
71 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
75 ; CHECK-NEXT: cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
76 ; CHECK-NEXT: movq %mm0, %rax
78 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
79 %2 = call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
82 declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
84 define <1 x i64> @stack_fold_cvttps2pi(<4 x float> %a0) {
85 ; CHECK-LABEL: stack_fold_cvttps2pi:
87 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
91 ; CHECK-NEXT: cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
92 ; CHECK-NEXT: movq %mm0, %rax
94 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
95 %2 = call <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
98 declare <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
100 ; TODO stack_fold_movd_load
102 ; padd forces execution on mmx
103 define i32 @stack_fold_movd_store(<1 x i64> %a0) nounwind {
104 ; CHECK-LABEL: stack_fold_movd_store:
106 ; CHECK-NEXT: pushq %rbp
107 ; CHECK-NEXT: pushq %r15
108 ; CHECK-NEXT: pushq %r14
109 ; CHECK-NEXT: pushq %r13
110 ; CHECK-NEXT: pushq %r12
111 ; CHECK-NEXT: pushq %rbx
112 ; CHECK-NEXT: movq %rdi, %mm0
113 ; CHECK-NEXT: paddb %mm0, %mm0
114 ; CHECK-NEXT: movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
117 ; CHECK-NEXT: #NO_APP
118 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
119 ; CHECK-NEXT: popq %rbx
120 ; CHECK-NEXT: popq %r12
121 ; CHECK-NEXT: popq %r13
122 ; CHECK-NEXT: popq %r14
123 ; CHECK-NEXT: popq %r15
124 ; CHECK-NEXT: popq %rbp
126 %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0)
127 %2 = bitcast <1 x i64> %1 to <2 x i32>
128 %3 = extractelement <2 x i32> %2, i32 0
129 %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
133 ; TODO stack_fold_movq_load
135 ; padd forces execution on mmx
136 define i64 @stack_fold_movq_store(<1 x i64> %a0) nounwind {
137 ; CHECK-LABEL: stack_fold_movq_store:
139 ; CHECK-NEXT: pushq %rbp
140 ; CHECK-NEXT: pushq %r15
141 ; CHECK-NEXT: pushq %r14
142 ; CHECK-NEXT: pushq %r13
143 ; CHECK-NEXT: pushq %r12
144 ; CHECK-NEXT: pushq %rbx
145 ; CHECK-NEXT: movq %rdi, %mm0
146 ; CHECK-NEXT: paddb %mm0, %mm0
147 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
150 ; CHECK-NEXT: #NO_APP
151 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
152 ; CHECK-NEXT: popq %rbx
153 ; CHECK-NEXT: popq %r12
154 ; CHECK-NEXT: popq %r13
155 ; CHECK-NEXT: popq %r14
156 ; CHECK-NEXT: popq %r15
157 ; CHECK-NEXT: popq %rbp
159 %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0)
160 %2 = bitcast <1 x i64> %1 to i64
161 %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
165 define <1 x i64> @stack_fold_pabsb(<1 x i64> %a0) {
166 ; CHECK-LABEL: stack_fold_pabsb:
168 ; CHECK-NEXT: movq %rdi, %mm0
169 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
172 ; CHECK-NEXT: #NO_APP
173 ; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
174 ; CHECK-NEXT: movq %mm0, %rax
176 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
177 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %a0) nounwind readnone
180 declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone
182 define <1 x i64> @stack_fold_pabsd(<1 x i64> %a0) {
183 ; CHECK-LABEL: stack_fold_pabsd:
185 ; CHECK-NEXT: movq %rdi, %mm0
186 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
189 ; CHECK-NEXT: #NO_APP
190 ; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
191 ; CHECK-NEXT: movq %mm0, %rax
193 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
194 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %a0) nounwind readnone
197 declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone
199 define <1 x i64> @stack_fold_pabsw(<1 x i64> %a0) {
200 ; CHECK-LABEL: stack_fold_pabsw:
202 ; CHECK-NEXT: movq %rdi, %mm0
203 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
206 ; CHECK-NEXT: #NO_APP
207 ; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
208 ; CHECK-NEXT: movq %mm0, %rax
210 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
211 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %a0) nounwind readnone
214 declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone
216 define <1 x i64> @stack_fold_packssdw(<1 x i64> %a, <1 x i64> %b) {
217 ; CHECK-LABEL: stack_fold_packssdw:
219 ; CHECK-NEXT: movq %rsi, %mm0
220 ; CHECK-NEXT: movq %rdi, %mm1
221 ; CHECK-NEXT: packssdw %mm0, %mm1
222 ; CHECK-NEXT: movq %mm1, %rax
225 ; CHECK-NEXT: #NO_APP
227 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
228 %2 = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
231 declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone
233 define <1 x i64> @stack_fold_packsswb(<1 x i64> %a, <1 x i64> %b) {
234 ; CHECK-LABEL: stack_fold_packsswb:
236 ; CHECK-NEXT: movq %rsi, %mm0
237 ; CHECK-NEXT: movq %rdi, %mm1
238 ; CHECK-NEXT: packsswb %mm0, %mm1
239 ; CHECK-NEXT: movq %mm1, %rax
242 ; CHECK-NEXT: #NO_APP
244 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
245 %2 = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone
248 declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone
250 define <1 x i64> @stack_fold_packuswb(<1 x i64> %a, <1 x i64> %b) {
251 ; CHECK-LABEL: stack_fold_packuswb:
253 ; CHECK-NEXT: movq %rsi, %mm0
254 ; CHECK-NEXT: movq %rdi, %mm1
255 ; CHECK-NEXT: packuswb %mm0, %mm1
256 ; CHECK-NEXT: movq %mm1, %rax
259 ; CHECK-NEXT: #NO_APP
261 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
262 %2 = call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone
265 declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone
267 define <1 x i64> @stack_fold_paddb(<1 x i64> %a, <1 x i64> %b) {
268 ; CHECK-LABEL: stack_fold_paddb:
270 ; CHECK-NEXT: movq %rsi, %mm0
271 ; CHECK-NEXT: movq %rdi, %mm1
272 ; CHECK-NEXT: paddb %mm0, %mm1
273 ; CHECK-NEXT: movq %mm1, %rax
276 ; CHECK-NEXT: #NO_APP
278 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
279 %2 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
282 declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone
284 define <1 x i64> @stack_fold_paddd(<1 x i64> %a, <1 x i64> %b) {
285 ; CHECK-LABEL: stack_fold_paddd:
287 ; CHECK-NEXT: movq %rsi, %mm0
288 ; CHECK-NEXT: movq %rdi, %mm1
289 ; CHECK-NEXT: paddd %mm0, %mm1
290 ; CHECK-NEXT: movq %mm1, %rax
293 ; CHECK-NEXT: #NO_APP
295 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
296 %2 = call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
299 declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone
301 define <1 x i64> @stack_fold_paddq(<1 x i64> %a, <1 x i64> %b) {
302 ; CHECK-LABEL: stack_fold_paddq:
304 ; CHECK-NEXT: movq %rsi, %mm0
305 ; CHECK-NEXT: movq %rdi, %mm1
306 ; CHECK-NEXT: paddq %mm0, %mm1
307 ; CHECK-NEXT: movq %mm1, %rax
310 ; CHECK-NEXT: #NO_APP
312 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
313 %2 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone
316 declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
318 define <1 x i64> @stack_fold_paddsb(<1 x i64> %a, <1 x i64> %b) {
319 ; CHECK-LABEL: stack_fold_paddsb:
321 ; CHECK-NEXT: movq %rsi, %mm0
322 ; CHECK-NEXT: movq %rdi, %mm1
323 ; CHECK-NEXT: paddsb %mm0, %mm1
324 ; CHECK-NEXT: movq %mm1, %rax
327 ; CHECK-NEXT: #NO_APP
329 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
330 %2 = call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
333 declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone
335 define <1 x i64> @stack_fold_paddsw(<1 x i64> %a, <1 x i64> %b) {
336 ; CHECK-LABEL: stack_fold_paddsw:
338 ; CHECK-NEXT: movq %rsi, %mm0
339 ; CHECK-NEXT: movq %rdi, %mm1
340 ; CHECK-NEXT: paddsw %mm0, %mm1
341 ; CHECK-NEXT: movq %mm1, %rax
344 ; CHECK-NEXT: #NO_APP
346 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
347 %2 = call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
350 declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone
352 define <1 x i64> @stack_fold_paddusb(<1 x i64> %a, <1 x i64> %b) {
353 ; CHECK-LABEL: stack_fold_paddusb:
355 ; CHECK-NEXT: movq %rsi, %mm0
356 ; CHECK-NEXT: movq %rdi, %mm1
357 ; CHECK-NEXT: paddusb %mm0, %mm1
358 ; CHECK-NEXT: movq %mm1, %rax
361 ; CHECK-NEXT: #NO_APP
363 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
364 %2 = call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
367 declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone
369 define <1 x i64> @stack_fold_paddusw(<1 x i64> %a, <1 x i64> %b) {
370 ; CHECK-LABEL: stack_fold_paddusw:
372 ; CHECK-NEXT: movq %rsi, %mm0
373 ; CHECK-NEXT: movq %rdi, %mm1
374 ; CHECK-NEXT: paddusw %mm0, %mm1
375 ; CHECK-NEXT: movq %mm1, %rax
378 ; CHECK-NEXT: #NO_APP
380 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
381 %2 = call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
384 declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone
386 define <1 x i64> @stack_fold_paddw(<1 x i64> %a, <1 x i64> %b) {
387 ; CHECK-LABEL: stack_fold_paddw:
389 ; CHECK-NEXT: movq %rsi, %mm0
390 ; CHECK-NEXT: movq %rdi, %mm1
391 ; CHECK-NEXT: paddw %mm0, %mm1
392 ; CHECK-NEXT: movq %mm1, %rax
395 ; CHECK-NEXT: #NO_APP
397 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
398 %2 = call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
401 declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone
403 define <1 x i64> @stack_fold_palignr(<1 x i64> %a, <1 x i64> %b) {
404 ; CHECK-LABEL: stack_fold_palignr:
406 ; CHECK-NEXT: movq %rsi, %mm0
407 ; CHECK-NEXT: movq %rdi, %mm1
408 ; CHECK-NEXT: palignr $1, %mm0, %mm1
409 ; CHECK-NEXT: movq %mm1, %rax
412 ; CHECK-NEXT: #NO_APP
414 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
415 %2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %a, <1 x i64> %b, i8 1) nounwind readnone
418 declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone
420 define <1 x i64> @stack_fold_pand(<1 x i64> %a, <1 x i64> %b) {
421 ; CHECK-LABEL: stack_fold_pand:
423 ; CHECK-NEXT: movq %rsi, %mm0
424 ; CHECK-NEXT: movq %rdi, %mm1
425 ; CHECK-NEXT: pand %mm0, %mm1
426 ; CHECK-NEXT: movq %mm1, %rax
429 ; CHECK-NEXT: #NO_APP
431 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
432 %2 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %a, <1 x i64> %b) nounwind readnone
435 declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone
437 define <1 x i64> @stack_fold_pandn(<1 x i64> %a, <1 x i64> %b) {
438 ; CHECK-LABEL: stack_fold_pandn:
440 ; CHECK-NEXT: movq %rsi, %mm0
441 ; CHECK-NEXT: movq %rdi, %mm1
442 ; CHECK-NEXT: pandn %mm0, %mm1
443 ; CHECK-NEXT: movq %mm1, %rax
446 ; CHECK-NEXT: #NO_APP
448 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
449 %2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %a, <1 x i64> %b) nounwind readnone
452 declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
454 define <1 x i64> @stack_fold_pavgb(<1 x i64> %a, <1 x i64> %b) {
455 ; CHECK-LABEL: stack_fold_pavgb:
457 ; CHECK-NEXT: movq %rsi, %mm0
458 ; CHECK-NEXT: movq %rdi, %mm1
459 ; CHECK-NEXT: pavgb %mm0, %mm1
460 ; CHECK-NEXT: movq %mm1, %rax
463 ; CHECK-NEXT: #NO_APP
465 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
466 %2 = call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
469 declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone
471 define <1 x i64> @stack_fold_pavgw(<1 x i64> %a, <1 x i64> %b) {
472 ; CHECK-LABEL: stack_fold_pavgw:
474 ; CHECK-NEXT: movq %rsi, %mm0
475 ; CHECK-NEXT: movq %rdi, %mm1
476 ; CHECK-NEXT: pavgw %mm0, %mm1
477 ; CHECK-NEXT: movq %mm1, %rax
480 ; CHECK-NEXT: #NO_APP
482 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
483 %2 = call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
486 declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone
488 define <1 x i64> @stack_fold_pcmpeqb(<1 x i64> %a, <1 x i64> %b) {
489 ; CHECK-LABEL: stack_fold_pcmpeqb:
491 ; CHECK-NEXT: movq %rsi, %mm0
492 ; CHECK-NEXT: movq %rdi, %mm1
493 ; CHECK-NEXT: pcmpeqb %mm0, %mm1
494 ; CHECK-NEXT: movq %mm1, %rax
497 ; CHECK-NEXT: #NO_APP
499 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
500 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
503 declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone
505 define <1 x i64> @stack_fold_pcmpeqd(<1 x i64> %a, <1 x i64> %b) {
506 ; CHECK-LABEL: stack_fold_pcmpeqd:
508 ; CHECK-NEXT: movq %rsi, %mm0
509 ; CHECK-NEXT: movq %rdi, %mm1
510 ; CHECK-NEXT: pcmpeqd %mm0, %mm1
511 ; CHECK-NEXT: movq %mm1, %rax
514 ; CHECK-NEXT: #NO_APP
516 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
517 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
520 declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone
522 define <1 x i64> @stack_fold_pcmpeqw(<1 x i64> %a, <1 x i64> %b) {
523 ; CHECK-LABEL: stack_fold_pcmpeqw:
525 ; CHECK-NEXT: movq %rsi, %mm0
526 ; CHECK-NEXT: movq %rdi, %mm1
527 ; CHECK-NEXT: pcmpeqw %mm0, %mm1
528 ; CHECK-NEXT: movq %mm1, %rax
531 ; CHECK-NEXT: #NO_APP
533 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
534 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
537 declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone
539 define <1 x i64> @stack_fold_pcmpgtb(<1 x i64> %a, <1 x i64> %b) {
540 ; CHECK-LABEL: stack_fold_pcmpgtb:
542 ; CHECK-NEXT: movq %rsi, %mm0
543 ; CHECK-NEXT: movq %rdi, %mm1
544 ; CHECK-NEXT: pcmpgtb %mm0, %mm1
545 ; CHECK-NEXT: movq %mm1, %rax
548 ; CHECK-NEXT: #NO_APP
550 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
551 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
554 declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone
556 define <1 x i64> @stack_fold_pcmpgtd(<1 x i64> %a, <1 x i64> %b) {
557 ; CHECK-LABEL: stack_fold_pcmpgtd:
559 ; CHECK-NEXT: movq %rsi, %mm0
560 ; CHECK-NEXT: movq %rdi, %mm1
561 ; CHECK-NEXT: pcmpgtd %mm0, %mm1
562 ; CHECK-NEXT: movq %mm1, %rax
565 ; CHECK-NEXT: #NO_APP
567 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
568 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
571 declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone
573 define <1 x i64> @stack_fold_pcmpgtw(<1 x i64> %a, <1 x i64> %b) {
574 ; CHECK-LABEL: stack_fold_pcmpgtw:
576 ; CHECK-NEXT: movq %rsi, %mm0
577 ; CHECK-NEXT: movq %rdi, %mm1
578 ; CHECK-NEXT: pcmpgtw %mm0, %mm1
579 ; CHECK-NEXT: movq %mm1, %rax
582 ; CHECK-NEXT: #NO_APP
584 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
585 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
588 declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone
590 define <1 x i64> @stack_fold_phaddd(<1 x i64> %a, <1 x i64> %b) {
591 ; CHECK-LABEL: stack_fold_phaddd:
593 ; CHECK-NEXT: movq %rsi, %mm0
594 ; CHECK-NEXT: movq %rdi, %mm1
595 ; CHECK-NEXT: phaddd %mm0, %mm1
596 ; CHECK-NEXT: movq %mm1, %rax
599 ; CHECK-NEXT: #NO_APP
601 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
602 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
605 declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone
607 define <1 x i64> @stack_fold_phaddsw(<1 x i64> %a, <1 x i64> %b) {
608 ; CHECK-LABEL: stack_fold_phaddsw:
610 ; CHECK-NEXT: movq %rsi, %mm0
611 ; CHECK-NEXT: movq %rdi, %mm1
612 ; CHECK-NEXT: phaddsw %mm0, %mm1
613 ; CHECK-NEXT: movq %mm1, %rax
616 ; CHECK-NEXT: #NO_APP
618 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
619 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
622 declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone
624 define <1 x i64> @stack_fold_phaddw(<1 x i64> %a, <1 x i64> %b) {
625 ; CHECK-LABEL: stack_fold_phaddw:
627 ; CHECK-NEXT: movq %rsi, %mm0
628 ; CHECK-NEXT: movq %rdi, %mm1
629 ; CHECK-NEXT: phaddw %mm0, %mm1
630 ; CHECK-NEXT: movq %mm1, %rax
633 ; CHECK-NEXT: #NO_APP
635 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
636 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
639 declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone
641 define <1 x i64> @stack_fold_phsubd(<1 x i64> %a, <1 x i64> %b) {
642 ; CHECK-LABEL: stack_fold_phsubd:
644 ; CHECK-NEXT: movq %rsi, %mm0
645 ; CHECK-NEXT: movq %rdi, %mm1
646 ; CHECK-NEXT: phsubd %mm0, %mm1
647 ; CHECK-NEXT: movq %mm1, %rax
650 ; CHECK-NEXT: #NO_APP
652 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
653 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
656 declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone
658 define <1 x i64> @stack_fold_phsubsw(<1 x i64> %a, <1 x i64> %b) {
659 ; CHECK-LABEL: stack_fold_phsubsw:
661 ; CHECK-NEXT: movq %rsi, %mm0
662 ; CHECK-NEXT: movq %rdi, %mm1
663 ; CHECK-NEXT: phsubsw %mm0, %mm1
664 ; CHECK-NEXT: movq %mm1, %rax
667 ; CHECK-NEXT: #NO_APP
669 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
670 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
673 declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone
675 define <1 x i64> @stack_fold_phsubw(<1 x i64> %a, <1 x i64> %b) {
676 ; CHECK-LABEL: stack_fold_phsubw:
678 ; CHECK-NEXT: movq %rsi, %mm0
679 ; CHECK-NEXT: movq %rdi, %mm1
680 ; CHECK-NEXT: phsubw %mm0, %mm1
681 ; CHECK-NEXT: movq %mm1, %rax
684 ; CHECK-NEXT: #NO_APP
686 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
687 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
690 declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone
692 ; TODO stack_fold_pinsrw
694 define <1 x i64> @stack_fold_pmaddubsw(<1 x i64> %a, <1 x i64> %b) {
695 ; CHECK-LABEL: stack_fold_pmaddubsw:
697 ; CHECK-NEXT: movq %rsi, %mm0
698 ; CHECK-NEXT: movq %rdi, %mm1
699 ; CHECK-NEXT: pmaddubsw %mm0, %mm1
700 ; CHECK-NEXT: movq %mm1, %rax
703 ; CHECK-NEXT: #NO_APP
705 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
706 %2 = call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
709 declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone
711 define <1 x i64> @stack_fold_pmaddwd(<1 x i64> %a, <1 x i64> %b) {
712 ; CHECK-LABEL: stack_fold_pmaddwd:
714 ; CHECK-NEXT: movq %rsi, %mm0
715 ; CHECK-NEXT: movq %rdi, %mm1
716 ; CHECK-NEXT: pmaddwd %mm0, %mm1
717 ; CHECK-NEXT: movq %mm1, %rax
720 ; CHECK-NEXT: #NO_APP
722 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
723 %2 = call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) nounwind readnone
726 declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone
728 define <1 x i64> @stack_fold_pmaxsw(<1 x i64> %a, <1 x i64> %b) {
729 ; CHECK-LABEL: stack_fold_pmaxsw:
731 ; CHECK-NEXT: movq %rsi, %mm0
732 ; CHECK-NEXT: movq %rdi, %mm1
733 ; CHECK-NEXT: pmaxsw %mm0, %mm1
734 ; CHECK-NEXT: movq %mm1, %rax
737 ; CHECK-NEXT: #NO_APP
739 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
740 %2 = call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
743 declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone
745 define <1 x i64> @stack_fold_pmaxub(<1 x i64> %a, <1 x i64> %b) {
746 ; CHECK-LABEL: stack_fold_pmaxub:
748 ; CHECK-NEXT: movq %rsi, %mm0
749 ; CHECK-NEXT: movq %rdi, %mm1
750 ; CHECK-NEXT: pmaxub %mm0, %mm1
751 ; CHECK-NEXT: movq %mm1, %rax
754 ; CHECK-NEXT: #NO_APP
756 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
757 %2 = call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
760 declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone
762 define <1 x i64> @stack_fold_pminsw(<1 x i64> %a, <1 x i64> %b) {
763 ; CHECK-LABEL: stack_fold_pminsw:
765 ; CHECK-NEXT: movq %rsi, %mm0
766 ; CHECK-NEXT: movq %rdi, %mm1
767 ; CHECK-NEXT: pminsw %mm0, %mm1
768 ; CHECK-NEXT: movq %mm1, %rax
771 ; CHECK-NEXT: #NO_APP
773 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
774 %2 = call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
777 declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone
779 define <1 x i64> @stack_fold_pminub(<1 x i64> %a, <1 x i64> %b) {
780 ; CHECK-LABEL: stack_fold_pminub:
782 ; CHECK-NEXT: movq %rsi, %mm0
783 ; CHECK-NEXT: movq %rdi, %mm1
784 ; CHECK-NEXT: pminub %mm0, %mm1
785 ; CHECK-NEXT: movq %mm1, %rax
788 ; CHECK-NEXT: #NO_APP
790 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
791 %2 = call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
794 declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone
796 define <1 x i64> @stack_fold_pmulhrsw(<1 x i64> %a, <1 x i64> %b) {
797 ; CHECK-LABEL: stack_fold_pmulhrsw:
799 ; CHECK-NEXT: movq %rsi, %mm0
800 ; CHECK-NEXT: movq %rdi, %mm1
801 ; CHECK-NEXT: pmulhrsw %mm0, %mm1
802 ; CHECK-NEXT: movq %mm1, %rax
805 ; CHECK-NEXT: #NO_APP
807 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
808 %2 = call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
811 declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone
813 define <1 x i64> @stack_fold_pmulhuw(<1 x i64> %a, <1 x i64> %b) {
814 ; CHECK-LABEL: stack_fold_pmulhuw:
816 ; CHECK-NEXT: movq %rsi, %mm0
817 ; CHECK-NEXT: movq %rdi, %mm1
818 ; CHECK-NEXT: pmulhuw %mm0, %mm1
819 ; CHECK-NEXT: movq %mm1, %rax
822 ; CHECK-NEXT: #NO_APP
824 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
825 %2 = call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
828 declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone
830 define <1 x i64> @stack_fold_pmulhw(<1 x i64> %a, <1 x i64> %b) {
831 ; CHECK-LABEL: stack_fold_pmulhw:
833 ; CHECK-NEXT: movq %rsi, %mm0
834 ; CHECK-NEXT: movq %rdi, %mm1
835 ; CHECK-NEXT: pmulhw %mm0, %mm1
836 ; CHECK-NEXT: movq %mm1, %rax
839 ; CHECK-NEXT: #NO_APP
841 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
842 %2 = call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
845 declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone
847 define <1 x i64> @stack_fold_pmullw(<1 x i64> %a, <1 x i64> %b) {
848 ; CHECK-LABEL: stack_fold_pmullw:
850 ; CHECK-NEXT: movq %rsi, %mm0
851 ; CHECK-NEXT: movq %rdi, %mm1
852 ; CHECK-NEXT: pmullw %mm0, %mm1
853 ; CHECK-NEXT: movq %mm1, %rax
856 ; CHECK-NEXT: #NO_APP
858 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
859 %2 = call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
862 declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone
864 define <1 x i64> @stack_fold_pmuludq(<1 x i64> %a, <1 x i64> %b) {
865 ; CHECK-LABEL: stack_fold_pmuludq:
867 ; CHECK-NEXT: movq %rsi, %mm0
868 ; CHECK-NEXT: movq %rdi, %mm1
869 ; CHECK-NEXT: pmuludq %mm0, %mm1
870 ; CHECK-NEXT: movq %mm1, %rax
873 ; CHECK-NEXT: #NO_APP
875 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
876 %2 = call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %a, <1 x i64> %b) nounwind readnone
879 declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone
881 define <1 x i64> @stack_fold_por(<1 x i64> %a, <1 x i64> %b) {
882 ; CHECK-LABEL: stack_fold_por:
884 ; CHECK-NEXT: movq %rsi, %mm0
885 ; CHECK-NEXT: movq %rdi, %mm1
886 ; CHECK-NEXT: por %mm0, %mm1
887 ; CHECK-NEXT: movq %mm1, %rax
890 ; CHECK-NEXT: #NO_APP
892 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
893 %2 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %a, <1 x i64> %b) nounwind readnone
896 declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone
898 define <1 x i64> @stack_fold_psadbw(<1 x i64> %a, <1 x i64> %b) {
899 ; CHECK-LABEL: stack_fold_psadbw:
901 ; CHECK-NEXT: movq %rsi, %mm0
902 ; CHECK-NEXT: movq %rdi, %mm1
903 ; CHECK-NEXT: psadbw %mm0, %mm1
904 ; CHECK-NEXT: movq %mm1, %rax
907 ; CHECK-NEXT: #NO_APP
909 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
910 %2 = call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
913 declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone
915 define <1 x i64> @stack_fold_pshufb(<1 x i64> %a, <1 x i64> %b) {
916 ; CHECK-LABEL: stack_fold_pshufb:
918 ; CHECK-NEXT: movq %rsi, %mm0
919 ; CHECK-NEXT: movq %rdi, %mm1
920 ; CHECK-NEXT: pshufb %mm0, %mm1
921 ; CHECK-NEXT: movq %mm1, %rax
924 ; CHECK-NEXT: #NO_APP
926 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
927 %2 = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
930 declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone
932 define <1 x i64> @stack_fold_pshufw(<1 x i64> %a) {
933 ; CHECK-LABEL: stack_fold_pshufw:
935 ; CHECK-NEXT: movq %rdi, %mm0
936 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
939 ; CHECK-NEXT: #NO_APP
940 ; CHECK-NEXT: pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
941 ; CHECK-NEXT: # mm0 = mem[1,0,0,0]
942 ; CHECK-NEXT: movq %mm0, %rax
944 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
945 %2 = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %a, i8 1) nounwind readnone
948 declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone
950 define <1 x i64> @stack_fold_psignb(<1 x i64> %a0, <1 x i64> %a1) {
951 ; CHECK-LABEL: stack_fold_psignb:
953 ; CHECK-NEXT: movq %rsi, %mm0
954 ; CHECK-NEXT: movq %rdi, %mm1
955 ; CHECK-NEXT: psignb %mm0, %mm1
956 ; CHECK-NEXT: movq %mm1, %rax
959 ; CHECK-NEXT: #NO_APP
961 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
962 %2 = call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone
965 declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone
967 define <1 x i64> @stack_fold_psignd(<1 x i64> %a0, <1 x i64> %a1) {
968 ; CHECK-LABEL: stack_fold_psignd:
970 ; CHECK-NEXT: movq %rsi, %mm0
971 ; CHECK-NEXT: movq %rdi, %mm1
972 ; CHECK-NEXT: psignd %mm0, %mm1
973 ; CHECK-NEXT: movq %mm1, %rax
976 ; CHECK-NEXT: #NO_APP
978 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
979 %2 = call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone
982 declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone
984 define <1 x i64> @stack_fold_psignw(<1 x i64> %a0, <1 x i64> %a1) {
985 ; CHECK-LABEL: stack_fold_psignw:
987 ; CHECK-NEXT: movq %rsi, %mm0
988 ; CHECK-NEXT: movq %rdi, %mm1
989 ; CHECK-NEXT: psignw %mm0, %mm1
990 ; CHECK-NEXT: movq %mm1, %rax
993 ; CHECK-NEXT: #NO_APP
995 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
996 %2 = call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone
999 declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone
1001 define <1 x i64> @stack_fold_pslld(<1 x i64> %a, <1 x i64> %b) {
1002 ; CHECK-LABEL: stack_fold_pslld:
1004 ; CHECK-NEXT: movq %rsi, %mm0
1005 ; CHECK-NEXT: movq %rdi, %mm1
1006 ; CHECK-NEXT: pslld %mm0, %mm1
1007 ; CHECK-NEXT: movq %mm1, %rax
1010 ; CHECK-NEXT: #NO_APP
1012 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1013 %2 = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1016 declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone
1018 define <1 x i64> @stack_fold_psllq(<1 x i64> %a, <1 x i64> %b) {
1019 ; CHECK-LABEL: stack_fold_psllq:
1021 ; CHECK-NEXT: movq %rsi, %mm0
1022 ; CHECK-NEXT: movq %rdi, %mm1
1023 ; CHECK-NEXT: psllq %mm0, %mm1
1024 ; CHECK-NEXT: movq %mm1, %rax
1027 ; CHECK-NEXT: #NO_APP
1029 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1030 %2 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1033 declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone
1035 define <1 x i64> @stack_fold_psllw(<1 x i64> %a, <1 x i64> %b) {
1036 ; CHECK-LABEL: stack_fold_psllw:
1038 ; CHECK-NEXT: movq %rsi, %mm0
1039 ; CHECK-NEXT: movq %rdi, %mm1
1040 ; CHECK-NEXT: psllw %mm0, %mm1
1041 ; CHECK-NEXT: movq %mm1, %rax
1044 ; CHECK-NEXT: #NO_APP
1046 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1047 %2 = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1050 declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone
1052 define <1 x i64> @stack_fold_psrad(<1 x i64> %a, <1 x i64> %b) {
1053 ; CHECK-LABEL: stack_fold_psrad:
1055 ; CHECK-NEXT: movq %rsi, %mm0
1056 ; CHECK-NEXT: movq %rdi, %mm1
1057 ; CHECK-NEXT: psrad %mm0, %mm1
1058 ; CHECK-NEXT: movq %mm1, %rax
1061 ; CHECK-NEXT: #NO_APP
1063 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1064 %2 = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1067 declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone
1069 define <1 x i64> @stack_fold_psraw(<1 x i64> %a, <1 x i64> %b) {
1070 ; CHECK-LABEL: stack_fold_psraw:
1072 ; CHECK-NEXT: movq %rsi, %mm0
1073 ; CHECK-NEXT: movq %rdi, %mm1
1074 ; CHECK-NEXT: psraw %mm0, %mm1
1075 ; CHECK-NEXT: movq %mm1, %rax
1078 ; CHECK-NEXT: #NO_APP
1080 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1081 %2 = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1084 declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone
1086 define <1 x i64> @stack_fold_psrld(<1 x i64> %a, <1 x i64> %b) {
1087 ; CHECK-LABEL: stack_fold_psrld:
1089 ; CHECK-NEXT: movq %rsi, %mm0
1090 ; CHECK-NEXT: movq %rdi, %mm1
1091 ; CHECK-NEXT: psrld %mm0, %mm1
1092 ; CHECK-NEXT: movq %mm1, %rax
1095 ; CHECK-NEXT: #NO_APP
1097 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1098 %2 = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1101 declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone
1103 define <1 x i64> @stack_fold_psrlq(<1 x i64> %a, <1 x i64> %b) {
1104 ; CHECK-LABEL: stack_fold_psrlq:
1106 ; CHECK-NEXT: movq %rsi, %mm0
1107 ; CHECK-NEXT: movq %rdi, %mm1
1108 ; CHECK-NEXT: psrlq %mm0, %mm1
1109 ; CHECK-NEXT: movq %mm1, %rax
1112 ; CHECK-NEXT: #NO_APP
1114 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1115 %2 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1118 declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone
1120 define <1 x i64> @stack_fold_psrlw(<1 x i64> %a, <1 x i64> %b) {
1121 ; CHECK-LABEL: stack_fold_psrlw:
1123 ; CHECK-NEXT: movq %rsi, %mm0
1124 ; CHECK-NEXT: movq %rdi, %mm1
1125 ; CHECK-NEXT: psrlw %mm0, %mm1
1126 ; CHECK-NEXT: movq %mm1, %rax
1129 ; CHECK-NEXT: #NO_APP
1131 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1132 %2 = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1135 declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone
1137 define <1 x i64> @stack_fold_psubb(<1 x i64> %a, <1 x i64> %b) {
1138 ; CHECK-LABEL: stack_fold_psubb:
1140 ; CHECK-NEXT: movq %rsi, %mm0
1141 ; CHECK-NEXT: movq %rdi, %mm1
1142 ; CHECK-NEXT: psubb %mm0, %mm1
1143 ; CHECK-NEXT: movq %mm1, %rax
1146 ; CHECK-NEXT: #NO_APP
1148 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1149 %2 = call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1152 declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone
1154 define <1 x i64> @stack_fold_psubd(<1 x i64> %a, <1 x i64> %b) {
1155 ; CHECK-LABEL: stack_fold_psubd:
1157 ; CHECK-NEXT: movq %rsi, %mm0
1158 ; CHECK-NEXT: movq %rdi, %mm1
1159 ; CHECK-NEXT: psubd %mm0, %mm1
1160 ; CHECK-NEXT: movq %mm1, %rax
1163 ; CHECK-NEXT: #NO_APP
1165 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1166 %2 = call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1169 declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone
1171 define <1 x i64> @stack_fold_psubq(<1 x i64> %a, <1 x i64> %b) {
1172 ; CHECK-LABEL: stack_fold_psubq:
1174 ; CHECK-NEXT: movq %rsi, %mm0
1175 ; CHECK-NEXT: movq %rdi, %mm1
1176 ; CHECK-NEXT: psubq %mm0, %mm1
1177 ; CHECK-NEXT: movq %mm1, %rax
1180 ; CHECK-NEXT: #NO_APP
1182 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1183 %2 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1186 declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
1188 define <1 x i64> @stack_fold_psubsb(<1 x i64> %a, <1 x i64> %b) {
1189 ; CHECK-LABEL: stack_fold_psubsb:
1191 ; CHECK-NEXT: movq %rsi, %mm0
1192 ; CHECK-NEXT: movq %rdi, %mm1
1193 ; CHECK-NEXT: psubsb %mm0, %mm1
1194 ; CHECK-NEXT: movq %mm1, %rax
1197 ; CHECK-NEXT: #NO_APP
1199 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1200 %2 = call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1203 declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone
1205 define <1 x i64> @stack_fold_psubsw(<1 x i64> %a, <1 x i64> %b) {
1206 ; CHECK-LABEL: stack_fold_psubsw:
1208 ; CHECK-NEXT: movq %rsi, %mm0
1209 ; CHECK-NEXT: movq %rdi, %mm1
1210 ; CHECK-NEXT: psubsw %mm0, %mm1
1211 ; CHECK-NEXT: movq %mm1, %rax
1214 ; CHECK-NEXT: #NO_APP
1216 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1217 %2 = call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1220 declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone
1222 define <1 x i64> @stack_fold_psubusb(<1 x i64> %a, <1 x i64> %b) {
1223 ; CHECK-LABEL: stack_fold_psubusb:
1225 ; CHECK-NEXT: movq %rsi, %mm0
1226 ; CHECK-NEXT: movq %rdi, %mm1
1227 ; CHECK-NEXT: psubusb %mm0, %mm1
1228 ; CHECK-NEXT: movq %mm1, %rax
1231 ; CHECK-NEXT: #NO_APP
1233 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1234 %2 = call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1237 declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone
1239 define <1 x i64> @stack_fold_psubusw(<1 x i64> %a, <1 x i64> %b) {
1240 ; CHECK-LABEL: stack_fold_psubusw:
1242 ; CHECK-NEXT: movq %rsi, %mm0
1243 ; CHECK-NEXT: movq %rdi, %mm1
1244 ; CHECK-NEXT: psubusw %mm0, %mm1
1245 ; CHECK-NEXT: movq %mm1, %rax
1248 ; CHECK-NEXT: #NO_APP
1250 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1251 %2 = call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1254 declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone
1256 define <1 x i64> @stack_fold_psubw(<1 x i64> %a, <1 x i64> %b) {
1257 ; CHECK-LABEL: stack_fold_psubw:
1259 ; CHECK-NEXT: movq %rsi, %mm0
1260 ; CHECK-NEXT: movq %rdi, %mm1
1261 ; CHECK-NEXT: psubw %mm0, %mm1
1262 ; CHECK-NEXT: movq %mm1, %rax
1265 ; CHECK-NEXT: #NO_APP
1267 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1268 %2 = call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1271 declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone
1273 define <1 x i64> @stack_fold_punpckhbw(<1 x i64> %a, <1 x i64> %b) {
1274 ; CHECK-LABEL: stack_fold_punpckhbw:
1276 ; CHECK-NEXT: movq %rsi, %mm0
1277 ; CHECK-NEXT: movq %rdi, %mm1
1278 ; CHECK-NEXT: punpckhbw %mm0, %mm1 # mm1 = mm1[4],mm0[4],mm1[5],mm0[5],mm1[6],mm0[6],mm1[7],mm0[7]
1279 ; CHECK-NEXT: movq %mm1, %rax
1282 ; CHECK-NEXT: #NO_APP
1284 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1285 %2 = call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1288 declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone
1290 define <1 x i64> @stack_fold_punpckhdq(<1 x i64> %a, <1 x i64> %b) {
1291 ; CHECK-LABEL: stack_fold_punpckhdq:
1293 ; CHECK-NEXT: movq %rsi, %mm0
1294 ; CHECK-NEXT: movq %rdi, %mm1
1295 ; CHECK-NEXT: punpckhdq %mm0, %mm1 # mm1 = mm1[1],mm0[1]
1296 ; CHECK-NEXT: movq %mm1, %rax
1299 ; CHECK-NEXT: #NO_APP
1301 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1302 %2 = call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1305 declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone
1307 define <1 x i64> @stack_fold_punpckhwd(<1 x i64> %a, <1 x i64> %b) {
1308 ; CHECK-LABEL: stack_fold_punpckhwd:
1310 ; CHECK-NEXT: movq %rsi, %mm0
1311 ; CHECK-NEXT: movq %rdi, %mm1
1312 ; CHECK-NEXT: punpckhwd %mm0, %mm1 # mm1 = mm1[2],mm0[2],mm1[3],mm0[3]
1313 ; CHECK-NEXT: movq %mm1, %rax
1316 ; CHECK-NEXT: #NO_APP
1318 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1319 %2 = call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1322 declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone
1324 define <1 x i64> @stack_fold_punpcklbw(<1 x i64> %a, <1 x i64> %b) {
1325 ; CHECK-LABEL: stack_fold_punpcklbw:
1327 ; CHECK-NEXT: movq %rsi, %mm0
1328 ; CHECK-NEXT: movq %rdi, %mm1
1329 ; CHECK-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
1330 ; CHECK-NEXT: movq %mm1, %rax
1333 ; CHECK-NEXT: #NO_APP
1335 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1336 %2 = call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1339 declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone
1341 define <1 x i64> @stack_fold_punpckldq(<1 x i64> %a, <1 x i64> %b) {
1342 ; CHECK-LABEL: stack_fold_punpckldq:
1344 ; CHECK-NEXT: movq %rsi, %mm0
1345 ; CHECK-NEXT: movq %rdi, %mm1
1346 ; CHECK-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
1347 ; CHECK-NEXT: movq %mm1, %rax
1350 ; CHECK-NEXT: #NO_APP
1352 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1353 %2 = call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1356 declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone
1358 define <1 x i64> @stack_fold_punpcklwd(<1 x i64> %a, <1 x i64> %b) {
1359 ; CHECK-LABEL: stack_fold_punpcklwd:
1361 ; CHECK-NEXT: movq %rsi, %mm0
1362 ; CHECK-NEXT: movq %rdi, %mm1
1363 ; CHECK-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
1364 ; CHECK-NEXT: movq %mm1, %rax
1367 ; CHECK-NEXT: #NO_APP
1369 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1370 %2 = call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1373 declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone
1375 define <1 x i64> @stack_fold_pxor(<1 x i64> %a, <1 x i64> %b) {
1376 ; CHECK-LABEL: stack_fold_pxor:
1378 ; CHECK-NEXT: movq %rsi, %mm0
1379 ; CHECK-NEXT: movq %rdi, %mm1
1380 ; CHECK-NEXT: pxor %mm0, %mm1
1381 ; CHECK-NEXT: movq %mm1, %rax
1384 ; CHECK-NEXT: #NO_APP
1386 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1387 %2 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %a, <1 x i64> %b) nounwind readnone
1390 declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone