1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
4 define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
5 ; CHECK-LABEL: stack_fold_cvtpd2pi:
7 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
11 ; CHECK-NEXT: cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
12 ; CHECK-NEXT: movq2dq %mm0, %xmm0
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
18 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
20 define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
21 ; CHECK-LABEL: stack_fold_cvtpi2pd:
23 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
27 ; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
29 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
30 %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
33 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
35 define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
36 ; CHECK-LABEL: stack_fold_cvtpi2ps:
38 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
42 ; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
44 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
45 %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
48 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
50 define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
51 ; CHECK-LABEL: stack_fold_cvtps2pi:
53 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
57 ; CHECK-NEXT: cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
58 ; CHECK-NEXT: movq2dq %mm0, %xmm0
60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
61 %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
64 declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
66 define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
67 ; CHECK-LABEL: stack_fold_cvttpd2pi:
69 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
73 ; CHECK-NEXT: cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
74 ; CHECK-NEXT: movq2dq %mm0, %xmm0
76 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
77 %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
80 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
82 define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
83 ; CHECK-LABEL: stack_fold_cvttps2pi:
85 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
89 ; CHECK-NEXT: cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
90 ; CHECK-NEXT: movq2dq %mm0, %xmm0
92 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
93 %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
96 declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
98 ; TODO stack_fold_movd_load
100 ; padd forces execution on mmx
101 define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind {
102 ; CHECK-LABEL: stack_fold_movd_store:
104 ; CHECK-NEXT: pushq %rbp
105 ; CHECK-NEXT: pushq %r15
106 ; CHECK-NEXT: pushq %r14
107 ; CHECK-NEXT: pushq %r13
108 ; CHECK-NEXT: pushq %r12
109 ; CHECK-NEXT: pushq %rbx
110 ; CHECK-NEXT: paddb %mm0, %mm0
111 ; CHECK-NEXT: movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
114 ; CHECK-NEXT: #NO_APP
115 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
116 ; CHECK-NEXT: popq %rbx
117 ; CHECK-NEXT: popq %r12
118 ; CHECK-NEXT: popq %r13
119 ; CHECK-NEXT: popq %r14
120 ; CHECK-NEXT: popq %r15
121 ; CHECK-NEXT: popq %rbp
123 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0)
124 %2 = bitcast x86_mmx %1 to <2 x i32>
125 %3 = extractelement <2 x i32> %2, i32 0
126 %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
130 ; TODO stack_fold_movq_load
132 ; padd forces execution on mmx
133 define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind {
134 ; CHECK-LABEL: stack_fold_movq_store:
136 ; CHECK-NEXT: pushq %rbp
137 ; CHECK-NEXT: pushq %r15
138 ; CHECK-NEXT: pushq %r14
139 ; CHECK-NEXT: pushq %r13
140 ; CHECK-NEXT: pushq %r12
141 ; CHECK-NEXT: pushq %rbx
142 ; CHECK-NEXT: paddb %mm0, %mm0
143 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
146 ; CHECK-NEXT: #NO_APP
147 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
148 ; CHECK-NEXT: popq %rbx
149 ; CHECK-NEXT: popq %r12
150 ; CHECK-NEXT: popq %r13
151 ; CHECK-NEXT: popq %r14
152 ; CHECK-NEXT: popq %r15
153 ; CHECK-NEXT: popq %rbp
155 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0)
156 %2 = bitcast x86_mmx %1 to i64
157 %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
161 define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
162 ; CHECK-LABEL: stack_fold_pabsb:
164 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
167 ; CHECK-NEXT: #NO_APP
168 ; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
169 ; CHECK-NEXT: movq2dq %mm0, %xmm0
171 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
172 %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
175 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
177 define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
178 ; CHECK-LABEL: stack_fold_pabsd:
180 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
183 ; CHECK-NEXT: #NO_APP
184 ; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
185 ; CHECK-NEXT: movq2dq %mm0, %xmm0
187 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
188 %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
191 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
193 define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
194 ; CHECK-LABEL: stack_fold_pabsw:
196 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
199 ; CHECK-NEXT: #NO_APP
200 ; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
201 ; CHECK-NEXT: movq2dq %mm0, %xmm0
203 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
204 %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
207 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
209 define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
210 ; CHECK-LABEL: stack_fold_packssdw:
212 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
215 ; CHECK-NEXT: #NO_APP
216 ; CHECK-NEXT: packssdw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
217 ; CHECK-NEXT: movq2dq %mm0, %xmm0
219 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
220 %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
223 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
225 define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
226 ; CHECK-LABEL: stack_fold_packsswb:
228 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
231 ; CHECK-NEXT: #NO_APP
232 ; CHECK-NEXT: packsswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
233 ; CHECK-NEXT: movq2dq %mm0, %xmm0
235 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
236 %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
239 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
241 define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
242 ; CHECK-LABEL: stack_fold_packuswb:
244 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
247 ; CHECK-NEXT: #NO_APP
248 ; CHECK-NEXT: packuswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
249 ; CHECK-NEXT: movq2dq %mm0, %xmm0
251 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
252 %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
255 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
257 define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
258 ; CHECK-LABEL: stack_fold_paddb:
260 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
263 ; CHECK-NEXT: #NO_APP
264 ; CHECK-NEXT: paddb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
265 ; CHECK-NEXT: movq2dq %mm0, %xmm0
267 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
268 %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
271 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
273 define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
274 ; CHECK-LABEL: stack_fold_paddd:
276 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
279 ; CHECK-NEXT: #NO_APP
280 ; CHECK-NEXT: paddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
281 ; CHECK-NEXT: movq2dq %mm0, %xmm0
283 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
284 %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
287 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
289 define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
290 ; CHECK-LABEL: stack_fold_paddq:
292 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
295 ; CHECK-NEXT: #NO_APP
296 ; CHECK-NEXT: paddq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
297 ; CHECK-NEXT: movq2dq %mm0, %xmm0
299 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
300 %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
303 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
305 define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
306 ; CHECK-LABEL: stack_fold_paddsb:
308 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
311 ; CHECK-NEXT: #NO_APP
312 ; CHECK-NEXT: paddsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
313 ; CHECK-NEXT: movq2dq %mm0, %xmm0
315 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
316 %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
319 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
321 define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
322 ; CHECK-LABEL: stack_fold_paddsw:
324 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
327 ; CHECK-NEXT: #NO_APP
328 ; CHECK-NEXT: paddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
329 ; CHECK-NEXT: movq2dq %mm0, %xmm0
331 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
332 %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
335 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
337 define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
338 ; CHECK-LABEL: stack_fold_paddusb:
340 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
343 ; CHECK-NEXT: #NO_APP
344 ; CHECK-NEXT: paddusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
345 ; CHECK-NEXT: movq2dq %mm0, %xmm0
347 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
348 %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
351 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
353 define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
354 ; CHECK-LABEL: stack_fold_paddusw:
356 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
359 ; CHECK-NEXT: #NO_APP
360 ; CHECK-NEXT: paddusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
361 ; CHECK-NEXT: movq2dq %mm0, %xmm0
363 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
364 %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
367 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
369 define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
370 ; CHECK-LABEL: stack_fold_paddw:
372 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
375 ; CHECK-NEXT: #NO_APP
376 ; CHECK-NEXT: paddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
377 ; CHECK-NEXT: movq2dq %mm0, %xmm0
379 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
380 %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
383 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
385 define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
386 ; CHECK-LABEL: stack_fold_palignr:
388 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
391 ; CHECK-NEXT: #NO_APP
392 ; CHECK-NEXT: palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
393 ; CHECK-NEXT: movq2dq %mm0, %xmm0
395 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
396 %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
399 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
401 define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
402 ; CHECK-LABEL: stack_fold_pand:
404 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
407 ; CHECK-NEXT: #NO_APP
408 ; CHECK-NEXT: pand {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
409 ; CHECK-NEXT: movq2dq %mm0, %xmm0
411 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
412 %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
415 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
417 define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
418 ; CHECK-LABEL: stack_fold_pandn:
420 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
423 ; CHECK-NEXT: #NO_APP
424 ; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
425 ; CHECK-NEXT: movq2dq %mm0, %xmm0
427 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
428 %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
431 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
433 define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
434 ; CHECK-LABEL: stack_fold_pavgb:
436 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
439 ; CHECK-NEXT: #NO_APP
440 ; CHECK-NEXT: pavgb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
441 ; CHECK-NEXT: movq2dq %mm0, %xmm0
443 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
444 %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
447 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
449 define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
450 ; CHECK-LABEL: stack_fold_pavgw:
452 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
455 ; CHECK-NEXT: #NO_APP
456 ; CHECK-NEXT: pavgw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
457 ; CHECK-NEXT: movq2dq %mm0, %xmm0
459 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
460 %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
463 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
465 define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
466 ; CHECK-LABEL: stack_fold_pcmpeqb:
468 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
471 ; CHECK-NEXT: #NO_APP
472 ; CHECK-NEXT: pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
473 ; CHECK-NEXT: movq2dq %mm0, %xmm0
475 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
476 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
479 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
481 define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
482 ; CHECK-LABEL: stack_fold_pcmpeqd:
484 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
487 ; CHECK-NEXT: #NO_APP
488 ; CHECK-NEXT: pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
489 ; CHECK-NEXT: movq2dq %mm0, %xmm0
491 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
492 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
495 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
497 define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
498 ; CHECK-LABEL: stack_fold_pcmpeqw:
500 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
503 ; CHECK-NEXT: #NO_APP
504 ; CHECK-NEXT: pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
505 ; CHECK-NEXT: movq2dq %mm0, %xmm0
507 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
508 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
511 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
513 define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
514 ; CHECK-LABEL: stack_fold_pcmpgtb:
516 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
519 ; CHECK-NEXT: #NO_APP
520 ; CHECK-NEXT: pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
521 ; CHECK-NEXT: movq2dq %mm0, %xmm0
523 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
524 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
527 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
529 define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
530 ; CHECK-LABEL: stack_fold_pcmpgtd:
532 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
535 ; CHECK-NEXT: #NO_APP
536 ; CHECK-NEXT: pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
537 ; CHECK-NEXT: movq2dq %mm0, %xmm0
539 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
540 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
543 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
545 define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
546 ; CHECK-LABEL: stack_fold_pcmpgtw:
548 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
551 ; CHECK-NEXT: #NO_APP
552 ; CHECK-NEXT: pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
553 ; CHECK-NEXT: movq2dq %mm0, %xmm0
555 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
556 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
559 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
561 define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
562 ; CHECK-LABEL: stack_fold_phaddd:
564 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
567 ; CHECK-NEXT: #NO_APP
568 ; CHECK-NEXT: phaddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
569 ; CHECK-NEXT: movq2dq %mm0, %xmm0
571 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
572 %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
575 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
577 define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
578 ; CHECK-LABEL: stack_fold_phaddsw:
580 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
583 ; CHECK-NEXT: #NO_APP
584 ; CHECK-NEXT: phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
585 ; CHECK-NEXT: movq2dq %mm0, %xmm0
587 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
588 %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
591 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
593 define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
594 ; CHECK-LABEL: stack_fold_phaddw:
596 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
599 ; CHECK-NEXT: #NO_APP
600 ; CHECK-NEXT: phaddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
601 ; CHECK-NEXT: movq2dq %mm0, %xmm0
603 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
604 %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
607 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
609 define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
610 ; CHECK-LABEL: stack_fold_phsubd:
612 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
615 ; CHECK-NEXT: #NO_APP
616 ; CHECK-NEXT: phsubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
617 ; CHECK-NEXT: movq2dq %mm0, %xmm0
619 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
620 %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
623 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
625 define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
626 ; CHECK-LABEL: stack_fold_phsubsw:
628 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
631 ; CHECK-NEXT: #NO_APP
632 ; CHECK-NEXT: phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
633 ; CHECK-NEXT: movq2dq %mm0, %xmm0
635 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
636 %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
639 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
641 define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
642 ; CHECK-LABEL: stack_fold_phsubw:
644 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
647 ; CHECK-NEXT: #NO_APP
648 ; CHECK-NEXT: phsubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
649 ; CHECK-NEXT: movq2dq %mm0, %xmm0
651 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
652 %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
655 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
657 ; TODO stack_fold_pinsrw
659 define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
660 ; CHECK-LABEL: stack_fold_pmaddubsw:
662 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
665 ; CHECK-NEXT: #NO_APP
666 ; CHECK-NEXT: pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
667 ; CHECK-NEXT: movq2dq %mm0, %xmm0
669 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
670 %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
673 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
675 define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
676 ; CHECK-LABEL: stack_fold_pmaddwd:
678 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
681 ; CHECK-NEXT: #NO_APP
682 ; CHECK-NEXT: pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
683 ; CHECK-NEXT: movq2dq %mm0, %xmm0
685 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
686 %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
689 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
691 define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
692 ; CHECK-LABEL: stack_fold_pmaxsw:
694 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
697 ; CHECK-NEXT: #NO_APP
698 ; CHECK-NEXT: pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
699 ; CHECK-NEXT: movq2dq %mm0, %xmm0
701 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
702 %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
705 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
707 define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
708 ; CHECK-LABEL: stack_fold_pmaxub:
710 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
713 ; CHECK-NEXT: #NO_APP
714 ; CHECK-NEXT: pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
715 ; CHECK-NEXT: movq2dq %mm0, %xmm0
717 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
718 %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
721 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
723 define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
724 ; CHECK-LABEL: stack_fold_pminsw:
726 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
729 ; CHECK-NEXT: #NO_APP
730 ; CHECK-NEXT: pminsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
731 ; CHECK-NEXT: movq2dq %mm0, %xmm0
733 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
734 %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
737 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
739 define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
740 ; CHECK-LABEL: stack_fold_pminub:
742 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
745 ; CHECK-NEXT: #NO_APP
746 ; CHECK-NEXT: pminub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
747 ; CHECK-NEXT: movq2dq %mm0, %xmm0
749 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
750 %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
753 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
755 define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
756 ; CHECK-LABEL: stack_fold_pmulhrsw:
758 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
761 ; CHECK-NEXT: #NO_APP
762 ; CHECK-NEXT: pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
763 ; CHECK-NEXT: movq2dq %mm0, %xmm0
765 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
766 %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
769 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
771 define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
772 ; CHECK-LABEL: stack_fold_pmulhuw:
774 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
777 ; CHECK-NEXT: #NO_APP
778 ; CHECK-NEXT: pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
779 ; CHECK-NEXT: movq2dq %mm0, %xmm0
781 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
782 %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
785 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
787 define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
788 ; CHECK-LABEL: stack_fold_pmulhw:
790 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
793 ; CHECK-NEXT: #NO_APP
794 ; CHECK-NEXT: pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
795 ; CHECK-NEXT: movq2dq %mm0, %xmm0
797 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
798 %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
801 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
803 define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
804 ; CHECK-LABEL: stack_fold_pmullw:
806 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
809 ; CHECK-NEXT: #NO_APP
810 ; CHECK-NEXT: pmullw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
811 ; CHECK-NEXT: movq2dq %mm0, %xmm0
813 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
814 %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
817 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
819 define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
820 ; CHECK-LABEL: stack_fold_pmuludq:
822 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
825 ; CHECK-NEXT: #NO_APP
826 ; CHECK-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
827 ; CHECK-NEXT: movq2dq %mm0, %xmm0
829 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
830 %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
833 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
835 define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
836 ; CHECK-LABEL: stack_fold_por:
838 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
841 ; CHECK-NEXT: #NO_APP
842 ; CHECK-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
843 ; CHECK-NEXT: movq2dq %mm0, %xmm0
845 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
846 %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
849 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
851 define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
852 ; CHECK-LABEL: stack_fold_psadbw:
854 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
857 ; CHECK-NEXT: #NO_APP
858 ; CHECK-NEXT: psadbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
859 ; CHECK-NEXT: movq2dq %mm0, %xmm0
861 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
862 %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
865 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
867 define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
868 ; CHECK-LABEL: stack_fold_pshufb:
870 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
871 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
874 ; CHECK-NEXT: #NO_APP
875 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Reload
876 ; CHECK-NEXT: pshufb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
877 ; CHECK-NEXT: movq2dq %mm0, %xmm0
879 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
880 %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
883 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
885 define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
886 ; CHECK-LABEL: stack_fold_pshufw:
888 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
891 ; CHECK-NEXT: #NO_APP
892 ; CHECK-NEXT: pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
893 ; CHECK-NEXT: # mm0 = mem[1,0,0,0]
894 ; CHECK-NEXT: movq2dq %mm0, %xmm0
896 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
897 %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
900 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
902 define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
903 ; CHECK-LABEL: stack_fold_psignb:
905 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
908 ; CHECK-NEXT: #NO_APP
909 ; CHECK-NEXT: psignb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
910 ; CHECK-NEXT: movq2dq %mm0, %xmm0
912 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
913 %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
916 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
918 define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
919 ; CHECK-LABEL: stack_fold_psignd:
921 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
924 ; CHECK-NEXT: #NO_APP
925 ; CHECK-NEXT: psignd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
926 ; CHECK-NEXT: movq2dq %mm0, %xmm0
928 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
929 %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
932 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
934 define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
935 ; CHECK-LABEL: stack_fold_psignw:
937 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
940 ; CHECK-NEXT: #NO_APP
941 ; CHECK-NEXT: psignw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
942 ; CHECK-NEXT: movq2dq %mm0, %xmm0
944 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
945 %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
948 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
950 define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
951 ; CHECK-LABEL: stack_fold_pslld:
953 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
956 ; CHECK-NEXT: #NO_APP
957 ; CHECK-NEXT: pslld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
958 ; CHECK-NEXT: movq2dq %mm0, %xmm0
960 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
961 %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
964 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
966 define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
967 ; CHECK-LABEL: stack_fold_psllq:
969 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
972 ; CHECK-NEXT: #NO_APP
973 ; CHECK-NEXT: psllq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
974 ; CHECK-NEXT: movq2dq %mm0, %xmm0
976 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
977 %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
980 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
982 define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
983 ; CHECK-LABEL: stack_fold_psllw:
985 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
988 ; CHECK-NEXT: #NO_APP
989 ; CHECK-NEXT: psllw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
990 ; CHECK-NEXT: movq2dq %mm0, %xmm0
992 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
993 %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
996 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
998 define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
999 ; CHECK-LABEL: stack_fold_psrad:
1001 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1004 ; CHECK-NEXT: #NO_APP
1005 ; CHECK-NEXT: psrad {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1006 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1008 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1009 %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1012 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
1014 define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
1015 ; CHECK-LABEL: stack_fold_psraw:
1017 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1020 ; CHECK-NEXT: #NO_APP
1021 ; CHECK-NEXT: psraw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1022 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1024 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1025 %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1028 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
1030 define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
1031 ; CHECK-LABEL: stack_fold_psrld:
1033 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1036 ; CHECK-NEXT: #NO_APP
1037 ; CHECK-NEXT: psrld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1038 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1040 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1041 %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1044 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
1046 define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
1047 ; CHECK-LABEL: stack_fold_psrlq:
1049 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1052 ; CHECK-NEXT: #NO_APP
1053 ; CHECK-NEXT: psrlq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1054 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1056 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1057 %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1060 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
1062 define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
1063 ; CHECK-LABEL: stack_fold_psrlw:
1065 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1068 ; CHECK-NEXT: #NO_APP
1069 ; CHECK-NEXT: psrlw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1070 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1072 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1073 %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1076 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
1078 define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
1079 ; CHECK-LABEL: stack_fold_psubb:
1081 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1084 ; CHECK-NEXT: #NO_APP
1085 ; CHECK-NEXT: psubb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1086 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1088 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1089 %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1092 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
1094 define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
1095 ; CHECK-LABEL: stack_fold_psubd:
1097 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1100 ; CHECK-NEXT: #NO_APP
1101 ; CHECK-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1102 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1104 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1105 %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1108 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
1110 define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
1111 ; CHECK-LABEL: stack_fold_psubq:
1113 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1116 ; CHECK-NEXT: #NO_APP
1117 ; CHECK-NEXT: psubq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1118 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1120 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1121 %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1124 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
1126 define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
1127 ; CHECK-LABEL: stack_fold_psubsb:
1129 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1132 ; CHECK-NEXT: #NO_APP
1133 ; CHECK-NEXT: psubsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1134 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1136 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1137 %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1140 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
1142 define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
1143 ; CHECK-LABEL: stack_fold_psubsw:
1145 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1148 ; CHECK-NEXT: #NO_APP
1149 ; CHECK-NEXT: psubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1150 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1152 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1153 %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1156 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
1158 define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
1159 ; CHECK-LABEL: stack_fold_psubusb:
1161 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1164 ; CHECK-NEXT: #NO_APP
1165 ; CHECK-NEXT: psubusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1166 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1168 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1169 %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1172 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
1174 define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
1175 ; CHECK-LABEL: stack_fold_psubusw:
1177 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1180 ; CHECK-NEXT: #NO_APP
1181 ; CHECK-NEXT: psubusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1182 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1184 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1185 %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1188 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
1190 define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
1191 ; CHECK-LABEL: stack_fold_psubw:
1193 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1196 ; CHECK-NEXT: #NO_APP
1197 ; CHECK-NEXT: psubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1198 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1200 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1201 %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1204 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
1206 define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
1207 ; CHECK-LABEL: stack_fold_punpckhbw:
1209 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1212 ; CHECK-NEXT: #NO_APP
1213 ; CHECK-NEXT: punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1214 ; CHECK-NEXT: # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
1215 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1217 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1218 %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1221 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
1223 define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
1224 ; CHECK-LABEL: stack_fold_punpckhdq:
1226 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1229 ; CHECK-NEXT: #NO_APP
1230 ; CHECK-NEXT: punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1231 ; CHECK-NEXT: # mm0 = mm0[1],mem[1]
1232 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1234 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1235 %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
1238 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
1240 define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
1241 ; CHECK-LABEL: stack_fold_punpckhwd:
1243 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1246 ; CHECK-NEXT: #NO_APP
1247 ; CHECK-NEXT: punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1248 ; CHECK-NEXT: # mm0 = mm0[2],mem[2],mm0[3],mem[3]
1249 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1251 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1252 %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1255 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
1257 define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
1258 ; CHECK-LABEL: stack_fold_punpcklbw:
1260 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1263 ; CHECK-NEXT: #NO_APP
1264 ; CHECK-NEXT: punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1265 ; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
1266 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1268 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1269 %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1272 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
1274 define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
1275 ; CHECK-LABEL: stack_fold_punpckldq:
1277 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1280 ; CHECK-NEXT: #NO_APP
1281 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1282 ; CHECK-NEXT: # mm0 = mm0[0],mem[0]
1283 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1285 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1286 %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
1289 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
1291 define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
1292 ; CHECK-LABEL: stack_fold_punpcklwd:
1294 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1297 ; CHECK-NEXT: #NO_APP
1298 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1299 ; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1]
1300 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1302 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1303 %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1306 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
1308 define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
1309 ; CHECK-LABEL: stack_fold_pxor:
1311 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1314 ; CHECK-NEXT: #NO_APP
1315 ; CHECK-NEXT: pxor {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1316 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1318 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1319 %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
1322 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone